nkululeko 0.88.3__tar.gz → 0.88.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nkululeko-0.88.3 → nkululeko-0.88.5}/CHANGELOG.md +10 -0
- {nkululeko-0.88.3/nkululeko.egg-info → nkululeko-0.88.5}/PKG-INFO +17 -3
- {nkululeko-0.88.3 → nkululeko-0.88.5}/README.md +6 -2
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/constants.py +1 -1
- nkululeko-0.88.5/nkululeko/ensemble.py +343 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_agender.py +5 -3
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_ast.py +2 -4
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_spkrec.py +1 -1
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/modelrunner.py +3 -3
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_svm.py +2 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/reporting/reporter.py +31 -20
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/runmanager.py +1 -1
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/utils/util.py +51 -6
- {nkululeko-0.88.3 → nkululeko-0.88.5/nkululeko.egg-info}/PKG-INFO +17 -3
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko.egg-info/SOURCES.txt +0 -2
- nkululeko-0.88.3/data/androids_orig/process_database.py +0 -93
- nkululeko-0.88.3/data/androids_test/process_database.py +0 -93
- nkululeko-0.88.3/nkululeko/ensemble.py +0 -158
- {nkululeko-0.88.3 → nkululeko-0.88.5}/LICENSE +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/aesdd/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/androids/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/ased/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/asvp-esd/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/baved/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/cafe/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/clac/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/cmu-mosei/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/demos/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/ekorpus/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/emns/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/emofilm/convert_to_16k.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/emofilm/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/emorynlp/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/emov-db/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/emovo/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/emozionalmente/create.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/enterface/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/esd/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/gerparas/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/iemocap/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/jl/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/jtes/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/meld/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/mesd/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/mess/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/mlendsnd/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/msp-improv/process_database2.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/msp-podcast/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/oreau2/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/portuguese/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/ravdess/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/ravdess/process_database_speaker.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/savee/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/shemo/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/subesco/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/tess/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/thorsten-emotional/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/urdu/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/data/vivae/process_database.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/docs/source/conf.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/meta/demos/demo_best_model.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/meta/demos/my_experiment.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/meta/demos/my_experiment_local.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/meta/demos/plot_faster_anim.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/__init__.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/aug_train.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/augment.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/augmenting/__init__.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/augmenting/augmenter.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/augmenting/randomsplicer.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/augmenting/randomsplicing.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/augmenting/resampler.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/__init__.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/ap_age.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/ap_arousal.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/ap_dominance.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/ap_gender.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/ap_mos.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/ap_pesq.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/ap_sdr.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/ap_snr.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/ap_stoi.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/ap_valence.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/autopredict/estimate_snr.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/cacheddataset.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/data/__init__.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/data/dataset.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/data/dataset_csv.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/demo.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/demo_feats.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/demo_predictor.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/experiment.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/explore.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/export.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/__init__.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_analyser.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_auddim.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_audmodel.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_clap.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_hubert.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_import.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_mld.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_mos.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_opensmile.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_oxbow.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_praat.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_snr.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_spectra.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_squim.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_trill.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_wavlm.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feats_whisper.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/featureset.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feat_extract/feinberg_praat.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/feature_extractor.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/file_checker.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/filter_data.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/glob_conf.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/losses/__init__.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/losses/loss_ccc.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/losses/loss_softf1loss.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/__init__.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_bayes.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_cnn.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_gmm.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_knn.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_knn_reg.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_lin_reg.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_mlp.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_mlp_regression.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_svr.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_tree.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_tree_reg.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_tuned.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_xgb.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/models/model_xgr.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/multidb.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/nkuluflag.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/nkululeko.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/plots.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/predict.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/reporting/__init__.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/reporting/defines.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/reporting/latex_writer.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/reporting/report.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/reporting/report_item.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/reporting/result.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/resample.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/scaler.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/segment.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/segmenting/__init__.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/segmenting/seg_silero.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/syllable_nuclei.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/test.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/test_predictor.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/test_pretrain.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/utils/__init__.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/utils/files.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko/utils/stats.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko.egg-info/dependency_links.txt +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko.egg-info/requires.txt +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/nkululeko.egg-info/top_level.txt +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/pyproject.toml +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/setup.cfg +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/setup.py +0 -0
- {nkululeko-0.88.3 → nkululeko-0.88.5}/venv/bin/activate_this.py +0 -0
@@ -1,6 +1,16 @@
|
|
1
1
|
Changelog
|
2
2
|
=========
|
3
3
|
|
4
|
+
Version 0.88.5
|
5
|
+
--------------
|
6
|
+
* add a unique name to the uncertainty plot
|
7
|
+
* fix error in speaker embedding (still need speechbrain < 1.0)
|
8
|
+
* add get_target_name function in util
|
9
|
+
|
10
|
+
Version 0.88.4
|
11
|
+
--------------
|
12
|
+
* added more ensemble methods, e.g. based on uncertainty
|
13
|
+
|
4
14
|
Version 0.88.3
|
5
15
|
--------------
|
6
16
|
* fixed bug in false uncertainty estimation
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.88.
|
3
|
+
Version: 0.88.5
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -68,7 +68,8 @@ A project to detect speaker characteristics by machine learning experiments with
|
|
68
68
|
|
69
69
|
The idea is to have a framework (based on e.g. sklearn and torch) that can be used to rapidly and automatically analyse audio data and explore machine learning models based on that data.
|
70
70
|
|
71
|
-
* NEW with
|
71
|
+
* NEW with nkululek: [Ensemble learning](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
|
72
|
+
* NEW: [Finetune transformer-models](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
72
73
|
* The latest features can be seen in [the ini-file](./ini_file.md) options that are used to control Nkululeko
|
73
74
|
* Below is a [Hello World example](#helloworld) that should set you up fastly, also on [Google Colab](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
74
75
|
* [Here's a blog post on how to set up nkululeko on your computer.](http://blog.syntheticspeech.de/2021/08/30/how-to-set-up-your-first-nkululeko-project/)
|
@@ -203,7 +204,8 @@ All of them take *--config <my_config.ini>* as an argument.
|
|
203
204
|
* **nkululeko.nkululeko**: do machine learning experiments combining features and learners
|
204
205
|
* **nkululeko.ensemble**: [combine several nkululeko experiments](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/) and report on late fusion results
|
205
206
|
* *configurations*: which experiments to combine
|
206
|
-
* *--method* (optional): majority_voting, mean, max, sum
|
207
|
+
* *--method* (optional): majority_voting, mean, max, sum, max_class, uncertainty_threshold, uncertainty_weighted, confidence_weighted
|
208
|
+
* *--threshold*: uncertainty threshold (1.0 means no threshold)
|
207
209
|
* *--outfile* (optional): name of CSV file for output
|
208
210
|
* *--no_labels* (optional): indicate that no ground truth is given
|
209
211
|
* **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
|
@@ -273,6 +275,8 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
273
275
|
* [Tweak the target variable for database comparison](http://blog.syntheticspeech.de/2024/03/13/nkululeko-how-to-tweak-the-target-variable-for-database-comparison/)
|
274
276
|
* [How to run multiple experiments in one go](http://blog.syntheticspeech.de/2022/03/28/how-to-run-multiple-experiments-in-one-go-with-nkululeko/)
|
275
277
|
* [How to finetune a transformer-model](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
278
|
+
* [Ensemble (combine) classifiers with late-fusion](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
|
279
|
+
|
276
280
|
|
277
281
|
### <a name="helloworld">Hello World example</a>
|
278
282
|
* NEW: [Here's a Google colab that runs this example out-of-the-box](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and here is the same [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
@@ -356,6 +360,16 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
356
360
|
Changelog
|
357
361
|
=========
|
358
362
|
|
363
|
+
Version 0.88.5
|
364
|
+
--------------
|
365
|
+
* add a unique name to the uncertainty plot
|
366
|
+
* fix error in speaker embedding (still need speechbrain < 1.0)
|
367
|
+
* add get_target_name function in util
|
368
|
+
|
369
|
+
Version 0.88.4
|
370
|
+
--------------
|
371
|
+
* added more ensemble methods, e.g. based on uncertainty
|
372
|
+
|
359
373
|
Version 0.88.3
|
360
374
|
--------------
|
361
375
|
* fixed bug in false uncertainty estimation
|
@@ -24,7 +24,8 @@ A project to detect speaker characteristics by machine learning experiments with
|
|
24
24
|
|
25
25
|
The idea is to have a framework (based on e.g. sklearn and torch) that can be used to rapidly and automatically analyse audio data and explore machine learning models based on that data.
|
26
26
|
|
27
|
-
* NEW with
|
27
|
+
* NEW with nkululek: [Ensemble learning](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
|
28
|
+
* NEW: [Finetune transformer-models](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
28
29
|
* The latest features can be seen in [the ini-file](./ini_file.md) options that are used to control Nkululeko
|
29
30
|
* Below is a [Hello World example](#helloworld) that should set you up fastly, also on [Google Colab](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
30
31
|
* [Here's a blog post on how to set up nkululeko on your computer.](http://blog.syntheticspeech.de/2021/08/30/how-to-set-up-your-first-nkululeko-project/)
|
@@ -159,7 +160,8 @@ All of them take *--config <my_config.ini>* as an argument.
|
|
159
160
|
* **nkululeko.nkululeko**: do machine learning experiments combining features and learners
|
160
161
|
* **nkululeko.ensemble**: [combine several nkululeko experiments](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/) and report on late fusion results
|
161
162
|
* *configurations*: which experiments to combine
|
162
|
-
* *--method* (optional): majority_voting, mean, max, sum
|
163
|
+
* *--method* (optional): majority_voting, mean, max, sum, max_class, uncertainty_threshold, uncertainty_weighted, confidence_weighted
|
164
|
+
* *--threshold*: uncertainty threshold (1.0 means no threshold)
|
163
165
|
* *--outfile* (optional): name of CSV file for output
|
164
166
|
* *--no_labels* (optional): indicate that no ground truth is given
|
165
167
|
* **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
|
@@ -229,6 +231,8 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
229
231
|
* [Tweak the target variable for database comparison](http://blog.syntheticspeech.de/2024/03/13/nkululeko-how-to-tweak-the-target-variable-for-database-comparison/)
|
230
232
|
* [How to run multiple experiments in one go](http://blog.syntheticspeech.de/2022/03/28/how-to-run-multiple-experiments-in-one-go-with-nkululeko/)
|
231
233
|
* [How to finetune a transformer-model](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
234
|
+
* [Ensemble (combine) classifiers with late-fusion](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
|
235
|
+
|
232
236
|
|
233
237
|
### <a name="helloworld">Hello World example</a>
|
234
238
|
* NEW: [Here's a Google colab that runs this example out-of-the-box](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and here is the same [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.88.
|
1
|
+
VERSION="0.88.5"
|
2
2
|
SAMPLING_RATE = 16000
|
@@ -0,0 +1,343 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
from typing import List
|
5
|
+
import configparser
|
6
|
+
import time
|
7
|
+
from argparse import ArgumentParser
|
8
|
+
from pathlib import Path
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
import pandas as pd
|
12
|
+
from sklearn.metrics import balanced_accuracy_score
|
13
|
+
|
14
|
+
from nkululeko.constants import VERSION
|
15
|
+
from nkululeko.experiment import Experiment
|
16
|
+
from nkululeko.utils.util import Util
|
17
|
+
|
18
|
+
# import torch
|
19
|
+
|
20
|
+
# Constants
|
21
|
+
DEFAULT_METHOD = "mean"
|
22
|
+
DEFAULT_OUTFILE = "ensemble_result.csv"
|
23
|
+
|
24
|
+
|
25
|
+
def majority_voting(ensemble_preds_ls):
|
26
|
+
all_predictions = pd.concat([df["predicted"] for df in ensemble_preds_ls], axis=1)
|
27
|
+
return all_predictions.mode(axis=1).iloc[:, 0]
|
28
|
+
|
29
|
+
|
30
|
+
def mean_ensemble(ensemble_preds, labels):
|
31
|
+
for label in labels:
|
32
|
+
ensemble_preds[label] = ensemble_preds[label].mean(axis=1)
|
33
|
+
return ensemble_preds[labels].idxmax(axis=1)
|
34
|
+
|
35
|
+
|
36
|
+
def max_ensemble(ensemble_preds, labels):
|
37
|
+
for label in labels:
|
38
|
+
ensemble_preds[label] = ensemble_preds[label].max(axis=1)
|
39
|
+
return ensemble_preds[labels].idxmax(axis=1)
|
40
|
+
|
41
|
+
|
42
|
+
def sum_ensemble(ensemble_preds, labels):
|
43
|
+
for label in labels:
|
44
|
+
ensemble_preds[label] = ensemble_preds[label].sum(axis=1)
|
45
|
+
return ensemble_preds[labels].idxmax(axis=1)
|
46
|
+
|
47
|
+
|
48
|
+
def uncertainty_ensemble(ensemble_preds):
|
49
|
+
"""Same as uncertainty_threshold with a threshold of 0.1"""
|
50
|
+
final_predictions = []
|
51
|
+
best_uncertainty = []
|
52
|
+
for _, row in ensemble_preds.iterrows():
|
53
|
+
uncertainties = row[["uncertainty"]].values
|
54
|
+
min_uncertainty_idx = np.argmin(uncertainties)
|
55
|
+
final_predictions.append(row["predicted"].iloc[min_uncertainty_idx])
|
56
|
+
best_uncertainty.append(uncertainties[min_uncertainty_idx])
|
57
|
+
|
58
|
+
return final_predictions, best_uncertainty
|
59
|
+
|
60
|
+
|
61
|
+
def max_class_ensemble(ensemble_preds_ls, labels):
|
62
|
+
"""Compare the highest probabilites of all models across classes (instead of same class as in max_ensemble) and return the highest probability and the class"""
|
63
|
+
final_preds = []
|
64
|
+
final_probs = []
|
65
|
+
|
66
|
+
for _, row in pd.concat(ensemble_preds_ls, axis=1).iterrows():
|
67
|
+
max_probs = []
|
68
|
+
max_classes = []
|
69
|
+
|
70
|
+
for model_df in ensemble_preds_ls:
|
71
|
+
model_probs = row[labels].astype(float)
|
72
|
+
max_prob = model_probs.max()
|
73
|
+
max_class = model_probs.idxmax()
|
74
|
+
|
75
|
+
max_probs.append(max_prob)
|
76
|
+
max_classes.append(max_class)
|
77
|
+
|
78
|
+
best_model_index = np.argmax(max_probs)
|
79
|
+
|
80
|
+
final_preds.append(max_classes[best_model_index])
|
81
|
+
final_probs.append(max_probs[best_model_index])
|
82
|
+
|
83
|
+
return pd.Series(final_preds), pd.Series(final_probs)
|
84
|
+
|
85
|
+
|
86
|
+
def uncertainty_threshold_ensemble(ensemble_preds_ls, labels, threshold):
|
87
|
+
final_predictions = []
|
88
|
+
final_uncertainties = []
|
89
|
+
|
90
|
+
for idx in ensemble_preds_ls[0].index:
|
91
|
+
uncertainties = [df.loc[idx, "uncertainty"] for df in ensemble_preds_ls]
|
92
|
+
min_uncertainty_idx = np.argmin(uncertainties)
|
93
|
+
min_uncertainty = uncertainties[min_uncertainty_idx]
|
94
|
+
|
95
|
+
if min_uncertainty <= threshold:
|
96
|
+
# Use the prediction with low uncertainty
|
97
|
+
final_predictions.append(
|
98
|
+
ensemble_preds_ls[min_uncertainty_idx].loc[idx, "predicted"]
|
99
|
+
)
|
100
|
+
final_uncertainties.append(min_uncertainty)
|
101
|
+
else: # for uncertainty above threshold
|
102
|
+
# Calculate mean of probabilities same class different model
|
103
|
+
mean_probs = np.mean(
|
104
|
+
[df.loc[idx, labels].values for df in ensemble_preds_ls], axis=0
|
105
|
+
)
|
106
|
+
final_predictions.append(labels[np.argmax(mean_probs)])
|
107
|
+
final_uncertainties.append(np.mean(uncertainties))
|
108
|
+
|
109
|
+
return final_predictions
|
110
|
+
|
111
|
+
|
112
|
+
def uncertainty_weighted_ensemble(ensemble_preds_ls, labels):
|
113
|
+
"""Weighted ensemble based on uncertainty, normalized for each class"""
|
114
|
+
final_predictions = []
|
115
|
+
final_uncertainties = []
|
116
|
+
|
117
|
+
for idx in ensemble_preds_ls[0].index:
|
118
|
+
uncertainties = [df.loc[idx, "uncertainty"] for df in ensemble_preds_ls]
|
119
|
+
# Convert uncertainties to accuracies/confidence
|
120
|
+
accuracies = [1 - uncertainty for uncertainty in uncertainties]
|
121
|
+
|
122
|
+
# Calculate weights (inverse of uncertainties)
|
123
|
+
weights = [
|
124
|
+
1 / uncertainty if uncertainty != 0 else 1e10
|
125
|
+
for uncertainty in uncertainties
|
126
|
+
]
|
127
|
+
|
128
|
+
# Normalize weights for each class
|
129
|
+
total_weight = sum(weights)
|
130
|
+
normalized_weights = [w / total_weight for w in weights]
|
131
|
+
|
132
|
+
# Calculate weighted probabilities for each class
|
133
|
+
weighted_probs = {label: 0 for label in labels}
|
134
|
+
for df, weight in zip(ensemble_preds_ls, normalized_weights):
|
135
|
+
for label in labels:
|
136
|
+
weighted_probs[label] += df.loc[idx, label] * weight
|
137
|
+
|
138
|
+
# Select the class with the highest weighted probability
|
139
|
+
predicted_class = max(weighted_probs, key=weighted_probs.get)
|
140
|
+
final_predictions.append(predicted_class)
|
141
|
+
|
142
|
+
# Use the lowest accuracy as the final uncertainty
|
143
|
+
final_uncertainties.append(1 - min(accuracies))
|
144
|
+
|
145
|
+
return final_predictions, final_uncertainties
|
146
|
+
|
147
|
+
|
148
|
+
def confidence_weighted_ensemble(ensemble_preds_ls, labels):
|
149
|
+
"""Weighted ensemble based on confidence, normalized for all samples per model"""
|
150
|
+
final_predictions = []
|
151
|
+
final_confidences = []
|
152
|
+
|
153
|
+
for idx in ensemble_preds_ls[0].index:
|
154
|
+
class_probabilities = {label: 0 for label in labels}
|
155
|
+
total_confidence = 0
|
156
|
+
|
157
|
+
for df in ensemble_preds_ls:
|
158
|
+
row = df.loc[idx]
|
159
|
+
confidence = 1 - row["uncertainty"] # confidence score
|
160
|
+
total_confidence += confidence
|
161
|
+
|
162
|
+
for label in labels:
|
163
|
+
class_probabilities[label] += row[label] * confidence
|
164
|
+
|
165
|
+
# Normalize probabilities
|
166
|
+
for label in labels:
|
167
|
+
class_probabilities[label] /= total_confidence
|
168
|
+
|
169
|
+
predicted_class = max(class_probabilities, key=class_probabilities.get)
|
170
|
+
final_predictions.append(predicted_class)
|
171
|
+
final_confidences.append(max(class_probabilities.values()))
|
172
|
+
|
173
|
+
return final_predictions, final_confidences
|
174
|
+
|
175
|
+
|
176
|
+
def ensemble_predictions(
|
177
|
+
config_files: List[str], method: str, threshold: float, no_labels: bool
|
178
|
+
) -> pd.DataFrame:
|
179
|
+
"""
|
180
|
+
Ensemble predictions from multiple experiments.
|
181
|
+
|
182
|
+
Args:
|
183
|
+
config_files (list): List of configuration file paths.
|
184
|
+
method (str): Ensemble method to use. Options are 'majority_voting', 'mean', 'max', or 'sum'.
|
185
|
+
no_labels (bool): Flag indicating whether the predictions have labels or not.
|
186
|
+
|
187
|
+
Returns:
|
188
|
+
pandas.DataFrame: The ensemble predictions.
|
189
|
+
|
190
|
+
Raises:
|
191
|
+
ValueError: If an unknown ensemble method is provided.
|
192
|
+
AssertionError: If the number of config files is less than 2 for majority voting.
|
193
|
+
|
194
|
+
"""
|
195
|
+
ensemble_preds_ls = []
|
196
|
+
for config_file in config_files:
|
197
|
+
if no_labels:
|
198
|
+
# for ensembling results from Nkululeko.demo
|
199
|
+
preds = pd.read_csv(config_file)
|
200
|
+
labels = preds.columns[1:-2]
|
201
|
+
else:
|
202
|
+
# for ensembling results from Nkululeko.nkululeko
|
203
|
+
config = configparser.ConfigParser()
|
204
|
+
config.read(config_file)
|
205
|
+
expr = Experiment(config)
|
206
|
+
module = "ensemble"
|
207
|
+
expr.set_module(module)
|
208
|
+
util = Util(module, has_config=True)
|
209
|
+
util.debug(
|
210
|
+
f"running {expr.name} from config {config_file}, nkululeko version"
|
211
|
+
f" {VERSION}"
|
212
|
+
)
|
213
|
+
|
214
|
+
# get labels
|
215
|
+
labels = expr.util.get_labels()
|
216
|
+
# load the experiment
|
217
|
+
# get CSV files of predictions
|
218
|
+
pred_name = expr.util.get_pred_name()
|
219
|
+
util.debug(f"Loading predictions from {pred_name}")
|
220
|
+
preds = pd.read_csv(pred_name)
|
221
|
+
|
222
|
+
ensemble_preds_ls.append(preds)
|
223
|
+
|
224
|
+
# pd concate
|
225
|
+
ensemble_preds = pd.concat(ensemble_preds_ls, axis=1)
|
226
|
+
|
227
|
+
if method == "majority_voting":
|
228
|
+
assert (
|
229
|
+
len(ensemble_preds_ls) > 2
|
230
|
+
), "Majority voting only works for more than two models"
|
231
|
+
ensemble_preds["predicted"] = majority_voting(ensemble_preds_ls)
|
232
|
+
elif method == "mean":
|
233
|
+
ensemble_preds["predicted"] = mean_ensemble(ensemble_preds, labels)
|
234
|
+
elif method == "max":
|
235
|
+
ensemble_preds["predicted"] = max_ensemble(ensemble_preds, labels)
|
236
|
+
elif method == "sum":
|
237
|
+
ensemble_preds["predicted"] = sum_ensemble(ensemble_preds, labels)
|
238
|
+
elif method == "max_class":
|
239
|
+
ensemble_preds["predicted"], ensemble_preds["max_probability"] = (
|
240
|
+
max_class_ensemble(ensemble_preds_ls, labels)
|
241
|
+
)
|
242
|
+
elif method == "uncertainty_threshold":
|
243
|
+
ensemble_preds["predicted"] = uncertainty_threshold_ensemble(
|
244
|
+
ensemble_preds_ls, labels, threshold
|
245
|
+
)
|
246
|
+
elif method == "uncertainty_weighted":
|
247
|
+
ensemble_preds["predicted"], ensemble_preds["uncertainty"] = (
|
248
|
+
uncertainty_weighted_ensemble(ensemble_preds_ls, labels)
|
249
|
+
)
|
250
|
+
elif method == "confidence_weighted":
|
251
|
+
ensemble_preds["predicted"], ensemble_preds["confidence"] = (
|
252
|
+
confidence_weighted_ensemble(ensemble_preds_ls, labels)
|
253
|
+
)
|
254
|
+
else:
|
255
|
+
raise ValueError(f"Unknown ensemble method: {method}")
|
256
|
+
|
257
|
+
# get the highest value from all labels to infer the label
|
258
|
+
# replace the old first predicted column
|
259
|
+
if method in ["mean", "max", "sum"]:
|
260
|
+
ensemble_preds["predicted"] = ensemble_preds[labels].idxmax(axis=1)
|
261
|
+
|
262
|
+
if no_labels:
|
263
|
+
return ensemble_preds
|
264
|
+
|
265
|
+
# Drop start, end columns
|
266
|
+
ensemble_preds = ensemble_preds.drop(columns=["start", "end"])
|
267
|
+
|
268
|
+
# Drop other column except until truth
|
269
|
+
ensemble_preds = ensemble_preds.iloc[:, : len(labels) + 3]
|
270
|
+
|
271
|
+
# calculate UAR from predicted and truth columns
|
272
|
+
|
273
|
+
truth = ensemble_preds["truth"]
|
274
|
+
predicted = ensemble_preds["predicted"]
|
275
|
+
uar = balanced_accuracy_score(truth, predicted)
|
276
|
+
acc = (truth == predicted).mean()
|
277
|
+
Util("ensemble").debug(f"{method}: UAR: {uar:.3f}, ACC: {acc:.3f}")
|
278
|
+
|
279
|
+
return ensemble_preds
|
280
|
+
|
281
|
+
|
282
|
+
def main(src_dir: Path) -> None:
|
283
|
+
parser = ArgumentParser()
|
284
|
+
parser.add_argument(
|
285
|
+
"configs",
|
286
|
+
nargs="+",
|
287
|
+
help="Paths to the configuration files of the experiments to ensemble. \
|
288
|
+
Can be INI files for Nkululeko.nkululeo or CSV files from Nkululeko.demo.",
|
289
|
+
)
|
290
|
+
parser.add_argument(
|
291
|
+
"--method",
|
292
|
+
default=DEFAULT_METHOD,
|
293
|
+
choices=[
|
294
|
+
"majority_voting",
|
295
|
+
"mean",
|
296
|
+
"max",
|
297
|
+
"sum",
|
298
|
+
"max_class",
|
299
|
+
# "uncertainty_lowest",
|
300
|
+
# "entropy",
|
301
|
+
"uncertainty_threshold",
|
302
|
+
"uncertainty_weighted",
|
303
|
+
"confidence_weighted",
|
304
|
+
],
|
305
|
+
help=f"Ensemble method to use (default: {DEFAULT_METHOD})",
|
306
|
+
)
|
307
|
+
# add threshold if method is uncertainty_threshold
|
308
|
+
parser.add_argument(
|
309
|
+
"--threshold",
|
310
|
+
default=1.0,
|
311
|
+
type=float,
|
312
|
+
help="Threshold for uncertainty_threshold method (default: 1.0, i.e. no threshold)",
|
313
|
+
)
|
314
|
+
parser.add_argument(
|
315
|
+
"--outfile",
|
316
|
+
default=DEFAULT_OUTFILE,
|
317
|
+
help=f"Output file path for the ensemble predictions (default: {DEFAULT_OUTFILE})",
|
318
|
+
)
|
319
|
+
parser.add_argument(
|
320
|
+
"--no_labels",
|
321
|
+
action="store_true",
|
322
|
+
help="True if true labels are not available. For Nkululeko.demo results.",
|
323
|
+
)
|
324
|
+
|
325
|
+
args = parser.parse_args()
|
326
|
+
|
327
|
+
start = time.time()
|
328
|
+
|
329
|
+
ensemble_preds = ensemble_predictions(
|
330
|
+
args.configs, args.method, args.threshold, args.no_labels
|
331
|
+
)
|
332
|
+
|
333
|
+
# save to csv
|
334
|
+
ensemble_preds.to_csv(args.outfile, index=False)
|
335
|
+
Util("ensemble").debug(f"Ensemble predictions saved to: {args.outfile}")
|
336
|
+
Util("ensemble").debug(f"Ensemble done, used {time.time()-start:.2f} seconds")
|
337
|
+
|
338
|
+
Util("ensemble").debug("DONE")
|
339
|
+
|
340
|
+
|
341
|
+
if __name__ == "__main__":
|
342
|
+
cwd = Path(__file__).parent
|
343
|
+
main(cwd)
|
@@ -1,13 +1,14 @@
|
|
1
1
|
# feats_agender.py
|
2
|
+
|
2
3
|
from nkululeko.feat_extract.featureset import Featureset
|
3
4
|
import os
|
4
|
-
import pandas as pd
|
5
|
+
# import pandas as pd
|
5
6
|
import audeer
|
6
7
|
import nkululeko.glob_conf as glob_conf
|
7
8
|
import audonnx
|
8
9
|
import numpy as np
|
9
10
|
import audinterface
|
10
|
-
|
11
|
+
import torch
|
11
12
|
|
12
13
|
class AgenderSet(Featureset):
|
13
14
|
"""
|
@@ -32,7 +33,8 @@ class AgenderSet(Featureset):
|
|
32
33
|
archive_path = audeer.download_url(
|
33
34
|
model_url, cache_root, verbose=True)
|
34
35
|
audeer.extract_archive(archive_path, model_root)
|
35
|
-
|
36
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
37
|
+
device = self.util.config_val("MODEL", "device", cuda)
|
36
38
|
self.model = audonnx.load(model_root, device=device)
|
37
39
|
self.util.debug(f"initialized agender model")
|
38
40
|
self.model_loaded = True
|
@@ -100,10 +100,8 @@ class Ast(Featureset):
|
|
100
100
|
embeddings = torch.mean(last_hidden_state, dim=1)
|
101
101
|
embeddings = embeddings.cpu().numpy()
|
102
102
|
|
103
|
-
#
|
104
|
-
#
|
105
|
-
print(f"hs shape: {embeddings.shape}")
|
106
|
-
|
103
|
+
# print(f"hs shape: {embeddings.shape}")
|
104
|
+
# hs shape: (1, 768)
|
107
105
|
|
108
106
|
except Exception as e:
|
109
107
|
self.util.error(f"Error extracting embeddings for file {file}: {str(e)}, fill with")
|
@@ -24,7 +24,7 @@ class Spkrec(Featureset):
|
|
24
24
|
def __init__(self, name, data_df, feat_type):
|
25
25
|
"""Constructor. is_train is needed to distinguish from test/dev sets,
|
26
26
|
because they use the codebook from the training"""
|
27
|
-
super().__init__(name, data_df)
|
27
|
+
super().__init__(name, data_df, feat_type)
|
28
28
|
# check if device is not set, use cuda if available
|
29
29
|
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
30
30
|
self.device = self.util.config_val("MODEL", "device", cuda)
|
@@ -238,21 +238,21 @@ class Modelrunner:
|
|
238
238
|
if balancing == "ros":
|
239
239
|
from imblearn.over_sampling import RandomOverSampler
|
240
240
|
|
241
|
-
sampler = RandomOverSampler()
|
241
|
+
sampler = RandomOverSampler(random_state=42)
|
242
242
|
X_res, y_res = sampler.fit_resample(
|
243
243
|
self.feats_train, self.df_train[self.target]
|
244
244
|
)
|
245
245
|
elif balancing == "smote":
|
246
246
|
from imblearn.over_sampling import SMOTE
|
247
247
|
|
248
|
-
sampler = SMOTE()
|
248
|
+
sampler = SMOTE(random_state=42)
|
249
249
|
X_res, y_res = sampler.fit_resample(
|
250
250
|
self.feats_train, self.df_train[self.target]
|
251
251
|
)
|
252
252
|
elif balancing == "adasyn":
|
253
253
|
from imblearn.over_sampling import ADASYN
|
254
254
|
|
255
|
-
sampler = ADASYN()
|
255
|
+
sampler = ADASYN(random_state=42)
|
256
256
|
X_res, y_res = sampler.fit_resample(
|
257
257
|
self.feats_train, self.df_train[self.target]
|
258
258
|
)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# model_svm.py
|
2
2
|
|
3
|
+
import random
|
3
4
|
from sklearn import svm
|
4
5
|
from nkululeko.models.model import Model
|
5
6
|
|
@@ -24,6 +25,7 @@ class SVM_model(Model):
|
|
24
25
|
gamma="scale",
|
25
26
|
probability=True,
|
26
27
|
class_weight=class_weight,
|
28
|
+
random_state=42, # for consistent result
|
27
29
|
) # set up the classifier
|
28
30
|
|
29
31
|
def set_c(self, c):
|
@@ -2,28 +2,32 @@ import ast
|
|
2
2
|
import glob
|
3
3
|
import json
|
4
4
|
import math
|
5
|
-
import os
|
6
5
|
|
7
|
-
from confidence_intervals import evaluate_with_conf_int
|
8
6
|
import matplotlib.pyplot as plt
|
9
7
|
import numpy as np
|
8
|
+
|
9
|
+
# from torch import is_tensor
|
10
|
+
from audmetric import (
|
11
|
+
accuracy,
|
12
|
+
concordance_cc,
|
13
|
+
mean_absolute_error,
|
14
|
+
mean_squared_error,
|
15
|
+
unweighted_average_recall,
|
16
|
+
)
|
17
|
+
|
18
|
+
# import os
|
19
|
+
from confidence_intervals import evaluate_with_conf_int
|
10
20
|
from scipy.special import softmax
|
11
|
-
from scipy.stats import entropy
|
12
|
-
from
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
from audmetric import accuracy
|
23
|
-
from audmetric import concordance_cc
|
24
|
-
from audmetric import mean_absolute_error
|
25
|
-
from audmetric import mean_squared_error
|
26
|
-
from audmetric import unweighted_average_recall
|
21
|
+
from scipy.stats import entropy, pearsonr
|
22
|
+
from sklearn.metrics import (
|
23
|
+
ConfusionMatrixDisplay,
|
24
|
+
auc,
|
25
|
+
classification_report,
|
26
|
+
confusion_matrix,
|
27
|
+
r2_score,
|
28
|
+
roc_auc_score,
|
29
|
+
roc_curve,
|
30
|
+
)
|
27
31
|
|
28
32
|
import nkululeko.glob_conf as glob_conf
|
29
33
|
from nkululeko.plots import Plots
|
@@ -166,7 +170,8 @@ class Reporter:
|
|
166
170
|
)
|
167
171
|
probas["uncertainty"] = uncertainty
|
168
172
|
probas["correct"] = probas.predicted == probas.truth
|
169
|
-
sp =
|
173
|
+
sp = self.util.get_pred_name()
|
174
|
+
|
170
175
|
self.probas = probas
|
171
176
|
probas.to_csv(sp)
|
172
177
|
self.util.debug(f"Saved probabilities to {sp}")
|
@@ -174,7 +179,13 @@ class Reporter:
|
|
174
179
|
ax, caption = plots.plotcatcont(
|
175
180
|
probas, "correct", "uncertainty", "uncertainty", "correct"
|
176
181
|
)
|
177
|
-
plots.save_plot(
|
182
|
+
plots.save_plot(
|
183
|
+
ax,
|
184
|
+
caption,
|
185
|
+
"Uncertainty",
|
186
|
+
"uncertainty_samples",
|
187
|
+
self.util.get_exp_name(),
|
188
|
+
)
|
178
189
|
|
179
190
|
def set_id(self, run, epoch):
|
180
191
|
"""Make the report identifiable with run and epoch index."""
|
@@ -50,7 +50,7 @@ class Runmanager:
|
|
50
50
|
self.last_epochs = [] # keep the epoch of best result per run
|
51
51
|
# for all runs
|
52
52
|
for run in range(int(self.util.config_val("EXP", "runs", 1))):
|
53
|
-
self.util.debug(f"run {run}")
|
53
|
+
self.util.debug(f"run {run} using model {glob_conf.config['MODEL']['type']}")
|
54
54
|
# set the run index as global variable for reporting
|
55
55
|
self.util.set_config_val("EXP", "run", run)
|
56
56
|
self.modelrunner = Modelrunner(
|