nkululeko 0.86.8__tar.gz → 0.87.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nkululeko-0.86.8 → nkululeko-0.87.0}/CHANGELOG.md +4 -0
- {nkululeko-0.86.8/nkululeko.egg-info → nkululeko-0.87.0}/PKG-INFO +13 -1
- {nkululeko-0.86.8 → nkululeko-0.87.0}/README.md +8 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/constants.py +1 -1
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/data/dataset_csv.py +12 -14
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/demo.py +4 -8
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/modelrunner.py +5 -5
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model.py +23 -3
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_cnn.py +41 -22
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_mlp.py +37 -17
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_mlp_regression.py +3 -1
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/plots.py +25 -37
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/reporter.py +69 -6
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/runmanager.py +8 -11
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/test_predictor.py +1 -6
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/utils/stats.py +11 -7
- {nkululeko-0.86.8 → nkululeko-0.87.0/nkululeko.egg-info}/PKG-INFO +13 -1
- {nkululeko-0.86.8 → nkululeko-0.87.0}/LICENSE +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/aesdd/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/androids/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/androids_orig/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/androids_test/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/ased/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/asvp-esd/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/baved/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/cafe/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/clac/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/cmu-mosei/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/demos/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/ekorpus/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emns/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emofilm/convert_to_16k.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emofilm/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emorynlp/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emov-db/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emovo/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/emozionalmente/create.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/enterface/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/esd/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/gerparas/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/iemocap/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/jl/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/jtes/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/meld/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/mesd/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/mess/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/mlendsnd/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/msp-improv/process_database2.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/msp-podcast/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/oreau2/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/portuguese/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/ravdess/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/ravdess/process_database_speaker.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/savee/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/shemo/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/subesco/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/tess/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/thorsten-emotional/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/urdu/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/data/vivae/process_database.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/docs/source/conf.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/meta/demos/demo_best_model.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/meta/demos/my_experiment.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/meta/demos/my_experiment_local.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/meta/demos/plot_faster_anim.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/__init__.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/aug_train.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augment.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augmenting/__init__.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augmenting/augmenter.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augmenting/randomsplicer.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augmenting/randomsplicing.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/augmenting/resampler.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/__init__.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_age.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_arousal.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_dominance.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_gender.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_mos.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_pesq.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_sdr.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_snr.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_stoi.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/ap_valence.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/autopredict/estimate_snr.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/cacheddataset.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/data/__init__.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/data/dataset.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/demo_feats.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/demo_predictor.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/experiment.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/explore.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/export.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/__init__.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_agender.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_analyser.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_auddim.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_audmodel.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_clap.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_hubert.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_import.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_mld.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_mos.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_opensmile.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_oxbow.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_praat.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_snr.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_spectra.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_spkrec.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_squim.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_trill.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_wavlm.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feats_whisper.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/featureset.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feat_extract/feinberg_praat.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/feature_extractor.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/file_checker.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/filter_data.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/glob_conf.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/losses/__init__.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/losses/loss_ccc.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/losses/loss_softf1loss.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/__init__.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_bayes.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_gmm.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_knn.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_knn_reg.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_lin_reg.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_svm.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_svr.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_tree.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_tree_reg.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_tuned.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_xgb.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/models/model_xgr.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/multidb.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/nkuluflag.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/nkululeko.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/predict.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/__init__.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/defines.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/latex_writer.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/report.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/report_item.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/reporting/result.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/resample.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/scaler.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/segment.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/segmenting/__init__.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/segmenting/seg_silero.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/syllable_nuclei.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/test.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/test_pretrain.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/utils/__init__.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/utils/files.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko/utils/util.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko.egg-info/SOURCES.txt +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko.egg-info/dependency_links.txt +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko.egg-info/requires.txt +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/nkululeko.egg-info/top_level.txt +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/pyproject.toml +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/setup.cfg +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/setup.py +0 -0
- {nkululeko-0.86.8 → nkululeko-0.87.0}/venv/bin/activate_this.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.87.0
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -51,6 +51,7 @@ Requires-Dist: pylatex
|
|
51
51
|
- [t-SNE plots](#t-sne-plots)
|
52
52
|
- [Data distribution](#data-distribution)
|
53
53
|
- [Bias checking](#bias-checking)
|
54
|
+
- [Uncertainty](#uncertainty)
|
54
55
|
- [Documentation](#documentation)
|
55
56
|
- [Installation](#installation)
|
56
57
|
- [Usage](#usage)
|
@@ -113,6 +114,13 @@ In cases you might wonder if there's bias in your data. You can try to detect th
|
|
113
114
|
|
114
115
|
<img src="meta/images/emotion-pesq.png" width="500px"/>
|
115
116
|
|
117
|
+
### Uncertainty
|
118
|
+
Nkululeko estimates uncertainty of model decision (only for classifiers) with entropy over the class-probabilities or logits per sample.
|
119
|
+
|
120
|
+
<img src="meta/images/uncertainty.png" width="500px"/>
|
121
|
+
|
122
|
+
|
123
|
+
|
116
124
|
## Documentation
|
117
125
|
The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
|
118
126
|
|
@@ -343,6 +351,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
343
351
|
Changelog
|
344
352
|
=========
|
345
353
|
|
354
|
+
Version 0.87.0
|
355
|
+
--------------
|
356
|
+
* added class probability output and uncertainty analysis
|
357
|
+
|
346
358
|
Version 0.86.8
|
347
359
|
--------------
|
348
360
|
* handle single feature sets as strings in the config
|
@@ -7,6 +7,7 @@
|
|
7
7
|
- [t-SNE plots](#t-sne-plots)
|
8
8
|
- [Data distribution](#data-distribution)
|
9
9
|
- [Bias checking](#bias-checking)
|
10
|
+
- [Uncertainty](#uncertainty)
|
10
11
|
- [Documentation](#documentation)
|
11
12
|
- [Installation](#installation)
|
12
13
|
- [Usage](#usage)
|
@@ -69,6 +70,13 @@ In cases you might wonder if there's bias in your data. You can try to detect th
|
|
69
70
|
|
70
71
|
<img src="meta/images/emotion-pesq.png" width="500px"/>
|
71
72
|
|
73
|
+
### Uncertainty
|
74
|
+
Nkululeko estimates uncertainty of model decision (only for classifiers) with entropy over the class-probabilities or logits per sample.
|
75
|
+
|
76
|
+
<img src="meta/images/uncertainty.png" width="500px"/>
|
77
|
+
|
78
|
+
|
79
|
+
|
72
80
|
## Documentation
|
73
81
|
The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
|
74
82
|
|
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.87.0"
|
2
2
|
SAMPLING_RATE = 16000
|
@@ -23,6 +23,9 @@ class Dataset_CSV(Dataset):
|
|
23
23
|
root = os.path.dirname(data_file)
|
24
24
|
audio_path = self.util.config_val_data(self.name, "audio_path", "./")
|
25
25
|
df = pd.read_csv(data_file)
|
26
|
+
# trim all string values
|
27
|
+
df_obj = df.select_dtypes("object")
|
28
|
+
df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
|
26
29
|
# special treatment for segmented dataframes with only one column:
|
27
30
|
if "start" in df.columns and len(df.columns) == 4:
|
28
31
|
index = audformat.segmented_index(
|
@@ -49,8 +52,7 @@ class Dataset_CSV(Dataset):
|
|
49
52
|
.map(lambda x: root + "/" + audio_path + "/" + x)
|
50
53
|
.values
|
51
54
|
)
|
52
|
-
df = df.set_index(df.index.set_levels(
|
53
|
-
file_index, level="file"))
|
55
|
+
df = df.set_index(df.index.set_levels(file_index, level="file"))
|
54
56
|
else:
|
55
57
|
if not isinstance(df, pd.DataFrame):
|
56
58
|
df = pd.DataFrame(df)
|
@@ -59,27 +61,24 @@ class Dataset_CSV(Dataset):
|
|
59
61
|
lambda x: root + "/" + audio_path + "/" + x
|
60
62
|
)
|
61
63
|
)
|
62
|
-
else:
|
64
|
+
else: # absolute path is True
|
63
65
|
if audformat.index_type(df.index) == "segmented":
|
64
66
|
file_index = (
|
65
|
-
df.index.levels[0]
|
66
|
-
.map(lambda x: audio_path + "/" + x)
|
67
|
-
.values
|
67
|
+
df.index.levels[0].map(lambda x: audio_path + "/" + x).values
|
68
68
|
)
|
69
|
-
df = df.set_index(df.index.set_levels(
|
70
|
-
file_index, level="file"))
|
69
|
+
df = df.set_index(df.index.set_levels(file_index, level="file"))
|
71
70
|
else:
|
72
71
|
if not isinstance(df, pd.DataFrame):
|
73
72
|
df = pd.DataFrame(df)
|
74
|
-
df = df.set_index(
|
75
|
-
lambda x: audio_path + "/" + x
|
73
|
+
df = df.set_index(
|
74
|
+
df.index.to_series().apply(lambda x: audio_path + "/" + x)
|
75
|
+
)
|
76
76
|
|
77
77
|
self.df = df
|
78
78
|
self.db = None
|
79
79
|
self.got_target = True
|
80
80
|
self.is_labeled = self.got_target
|
81
|
-
self.start_fresh = eval(
|
82
|
-
self.util.config_val("DATA", "no_reuse", "False"))
|
81
|
+
self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
|
83
82
|
is_index = False
|
84
83
|
try:
|
85
84
|
if self.is_labeled and not "class_label" in self.df.columns:
|
@@ -106,8 +105,7 @@ class Dataset_CSV(Dataset):
|
|
106
105
|
f" {self.got_gender}, got age: {self.got_age}"
|
107
106
|
)
|
108
107
|
self.util.debug(r_string)
|
109
|
-
glob_conf.report.add_item(ReportItem(
|
110
|
-
"Data", "Loaded report", r_string))
|
108
|
+
glob_conf.report.add_item(ReportItem("Data", "Loaded report", r_string))
|
111
109
|
|
112
110
|
def prepare(self):
|
113
111
|
super().prepare()
|
@@ -30,10 +30,8 @@ from transformers import pipeline
|
|
30
30
|
|
31
31
|
|
32
32
|
def main(src_dir):
|
33
|
-
parser = argparse.ArgumentParser(
|
34
|
-
|
35
|
-
parser.add_argument("--config", default="exp.ini",
|
36
|
-
help="The base configuration")
|
33
|
+
parser = argparse.ArgumentParser(description="Call the nkululeko DEMO framework.")
|
34
|
+
parser.add_argument("--config", default="exp.ini", help="The base configuration")
|
37
35
|
parser.add_argument(
|
38
36
|
"--file", help="A file that should be processed (16kHz mono wav)"
|
39
37
|
)
|
@@ -84,8 +82,7 @@ def main(src_dir):
|
|
84
82
|
)
|
85
83
|
|
86
84
|
def print_pipe(files, outfile):
|
87
|
-
"""
|
88
|
-
Prints the pipeline output for a list of files, and optionally writes the results to an output file.
|
85
|
+
"""Prints the pipeline output for a list of files, and optionally writes the results to an output file.
|
89
86
|
|
90
87
|
Args:
|
91
88
|
files (list): A list of file paths to process through the pipeline.
|
@@ -108,8 +105,7 @@ def main(src_dir):
|
|
108
105
|
f.write("\n".join(results))
|
109
106
|
|
110
107
|
if util.get_model_type() == "finetune":
|
111
|
-
model_path = os.path.join(
|
112
|
-
util.get_exp_dir(), "models", "run_0", "torch")
|
108
|
+
model_path = os.path.join(util.get_exp_dir(), "models", "run_0", "torch")
|
113
109
|
pipe = pipeline("audio-classification", model=model_path)
|
114
110
|
if args.file is not None:
|
115
111
|
print_pipe([args.file], args.outfile)
|
@@ -85,7 +85,7 @@ class Modelrunner:
|
|
85
85
|
f"run: {self.run} epoch: {epoch}: result: {test_score_metric}"
|
86
86
|
)
|
87
87
|
# print(f"performance: {performance.split(' ')[1]}")
|
88
|
-
performance = float(test_score_metric.split(
|
88
|
+
performance = float(test_score_metric.split(" ")[1])
|
89
89
|
if performance > self.best_performance:
|
90
90
|
self.best_performance = performance
|
91
91
|
self.best_epoch = epoch
|
@@ -204,15 +204,15 @@ class Modelrunner:
|
|
204
204
|
self.df_train, self.df_test, self.feats_train, self.feats_test
|
205
205
|
)
|
206
206
|
elif model_type == "cnn":
|
207
|
-
from nkululeko.models.model_cnn import
|
207
|
+
from nkululeko.models.model_cnn import CNNModel
|
208
208
|
|
209
|
-
self.model =
|
209
|
+
self.model = CNNModel(
|
210
210
|
self.df_train, self.df_test, self.feats_train, self.feats_test
|
211
211
|
)
|
212
212
|
elif model_type == "mlp":
|
213
|
-
from nkululeko.models.model_mlp import
|
213
|
+
from nkululeko.models.model_mlp import MLPModel
|
214
214
|
|
215
|
-
self.model =
|
215
|
+
self.model = MLPModel(
|
216
216
|
self.df_train, self.df_test, self.feats_train, self.feats_test
|
217
217
|
)
|
218
218
|
elif model_type == "mlp_reg":
|
@@ -247,8 +247,25 @@ class Model:
|
|
247
247
|
self.clf.fit(feats, labels)
|
248
248
|
|
249
249
|
def get_predictions(self):
|
250
|
-
predictions = self.clf.predict(self.feats_test.to_numpy())
|
251
|
-
|
250
|
+
# predictions = self.clf.predict(self.feats_test.to_numpy())
|
251
|
+
if self.util.exp_is_classification():
|
252
|
+
# make a dataframe for the class probabilities
|
253
|
+
proba_d = {}
|
254
|
+
for c in self.clf.classes_:
|
255
|
+
proba_d[c] = []
|
256
|
+
# get the class probabilities
|
257
|
+
predictions = self.clf.predict_proba(self.feats_test.to_numpy())
|
258
|
+
# pred = self.clf.predict(features)
|
259
|
+
for i, c in enumerate(self.clf.classes_):
|
260
|
+
proba_d[c] = list(predictions.T[i])
|
261
|
+
probas = pd.DataFrame(proba_d)
|
262
|
+
probas = probas.set_index(self.feats_test.index)
|
263
|
+
predictions = probas.idxmax(axis=1).values
|
264
|
+
else:
|
265
|
+
predictions = self.clf.predict(self.feats_test.to_numpy())
|
266
|
+
probas = None
|
267
|
+
|
268
|
+
return predictions, probas
|
252
269
|
|
253
270
|
def predict(self):
|
254
271
|
if self.feats_test.isna().to_numpy().any():
|
@@ -263,13 +280,16 @@ class Model:
|
|
263
280
|
)
|
264
281
|
return report
|
265
282
|
"""Predict the whole eval feature set"""
|
266
|
-
predictions = self.get_predictions()
|
283
|
+
predictions, probas = self.get_predictions()
|
284
|
+
|
267
285
|
report = Reporter(
|
268
286
|
self.df_test[self.target].to_numpy().astype(float),
|
269
287
|
predictions,
|
270
288
|
self.run,
|
271
289
|
self.epoch,
|
290
|
+
probas=probas,
|
272
291
|
)
|
292
|
+
report.print_probabilities()
|
273
293
|
return report
|
274
294
|
|
275
295
|
def get_type(self):
|
@@ -5,33 +5,40 @@ Inspired by code from Su Lei
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
+
import ast
|
9
|
+
from collections import OrderedDict
|
10
|
+
|
11
|
+
import numpy as np
|
12
|
+
import pandas as pd
|
13
|
+
from PIL import Image
|
14
|
+
from sklearn.metrics import recall_score
|
8
15
|
import torch
|
9
16
|
import torch.nn as nn
|
10
17
|
import torch.nn.functional as F
|
11
|
-
import torchvision
|
12
|
-
import torchvision.transforms as transforms
|
13
18
|
from torch.utils.data import Dataset
|
14
|
-
import
|
15
|
-
import numpy as np
|
16
|
-
from sklearn.metrics import recall_score
|
17
|
-
from collections import OrderedDict
|
18
|
-
from PIL import Image
|
19
|
-
from traitlets import default
|
19
|
+
import torchvision.transforms as transforms
|
20
20
|
|
21
|
-
from nkululeko.utils.util import Util
|
22
21
|
import nkululeko.glob_conf as glob_conf
|
22
|
+
from nkululeko.losses.loss_softf1loss import SoftF1Loss
|
23
23
|
from nkululeko.models.model import Model
|
24
24
|
from nkululeko.reporting.reporter import Reporter
|
25
|
-
from nkululeko.
|
25
|
+
from nkululeko.utils.util import Util
|
26
26
|
|
27
27
|
|
28
|
-
class
|
29
|
-
"""CNN = convolutional neural net"""
|
28
|
+
class CNNModel(Model):
|
29
|
+
"""CNN = convolutional neural net."""
|
30
30
|
|
31
31
|
is_classifier = True
|
32
32
|
|
33
33
|
def __init__(self, df_train, df_test, feats_train, feats_test):
|
34
|
-
"""Constructor taking
|
34
|
+
"""Constructor, taking all dataframes.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
df_train (pd.DataFrame): The train labels.
|
38
|
+
df_test (pd.DataFrame): The test labels.
|
39
|
+
feats_train (pd.DataFrame): The train features.
|
40
|
+
feats_test (pd.DataFrame): The test features.
|
41
|
+
"""
|
35
42
|
super().__init__(df_train, df_test, feats_train, feats_test)
|
36
43
|
super().set_model_type("ann")
|
37
44
|
self.name = "cnn"
|
@@ -147,7 +154,20 @@ class CNN_model(Model):
|
|
147
154
|
self.optimizer.step()
|
148
155
|
self.loss = (np.asarray(losses)).mean()
|
149
156
|
|
150
|
-
def
|
157
|
+
def get_probas(self, logits):
|
158
|
+
# make a dataframe for probabilites (logits)
|
159
|
+
proba_d = {}
|
160
|
+
classes = self.df_test[self.target].unique()
|
161
|
+
classes.sort()
|
162
|
+
for c in classes:
|
163
|
+
proba_d[c] = []
|
164
|
+
for i, c in enumerate(classes):
|
165
|
+
proba_d[c] = list(logits.numpy().T[i])
|
166
|
+
probas = pd.DataFrame(proba_d)
|
167
|
+
probas = probas.set_index(self.df_test.index)
|
168
|
+
return probas
|
169
|
+
|
170
|
+
def evaluate(self, model, loader, device):
|
151
171
|
logits = torch.zeros(len(loader.dataset), self.class_num)
|
152
172
|
targets = torch.zeros(len(loader.dataset))
|
153
173
|
model.eval()
|
@@ -169,14 +189,15 @@ class CNN_model(Model):
|
|
169
189
|
self.loss_eval = (np.asarray(losses)).mean()
|
170
190
|
predictions = logits.argmax(dim=1)
|
171
191
|
uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
|
172
|
-
return uar, targets, predictions
|
192
|
+
return uar, targets, predictions, logits
|
173
193
|
|
174
194
|
def predict(self):
|
175
|
-
_, truths, predictions = self.
|
195
|
+
_, truths, predictions, logits = self.evaluate(
|
176
196
|
self.model, self.testloader, self.device
|
177
197
|
)
|
178
|
-
uar, _, _ = self.
|
179
|
-
|
198
|
+
uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
|
199
|
+
probas = self.get_probas(logits)
|
200
|
+
report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
|
180
201
|
try:
|
181
202
|
report.result.loss = self.loss
|
182
203
|
except AttributeError: # if the model was loaded from disk the loss is unknown
|
@@ -189,13 +210,11 @@ class CNN_model(Model):
|
|
189
210
|
return report
|
190
211
|
|
191
212
|
def get_predictions(self):
|
192
|
-
_,
|
193
|
-
self.model, self.testloader, self.device
|
194
|
-
)
|
213
|
+
_, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
|
195
214
|
return predictions.numpy()
|
196
215
|
|
197
216
|
def predict_sample(self, features):
|
198
|
-
"""Predict one sample"""
|
217
|
+
"""Predict one sample."""
|
199
218
|
with torch.no_grad():
|
200
219
|
logits = self.model(torch.from_numpy(features).to(self.device))
|
201
220
|
a = logits.numpy()
|
@@ -1,25 +1,33 @@
|
|
1
1
|
# model_mlp.py
|
2
|
+
import ast
|
3
|
+
from collections import OrderedDict
|
4
|
+
|
5
|
+
import numpy as np
|
2
6
|
import pandas as pd
|
7
|
+
from sklearn.metrics import recall_score
|
8
|
+
import torch
|
3
9
|
|
4
|
-
from nkululeko.utils.util import Util
|
5
10
|
import nkululeko.glob_conf as glob_conf
|
11
|
+
from nkululeko.losses.loss_softf1loss import SoftF1Loss
|
6
12
|
from nkululeko.models.model import Model
|
7
13
|
from nkululeko.reporting.reporter import Reporter
|
8
|
-
import
|
9
|
-
import ast
|
10
|
-
import numpy as np
|
11
|
-
from sklearn.metrics import recall_score
|
12
|
-
from collections import OrderedDict
|
13
|
-
from nkululeko.losses.loss_softf1loss import SoftF1Loss
|
14
|
+
from nkululeko.utils.util import Util
|
14
15
|
|
15
16
|
|
16
|
-
class
|
17
|
+
class MLPModel(Model):
|
17
18
|
"""MLP = multi layer perceptron."""
|
18
19
|
|
19
20
|
is_classifier = True
|
20
21
|
|
21
22
|
def __init__(self, df_train, df_test, feats_train, feats_test):
|
22
|
-
"""Constructor taking
|
23
|
+
"""Constructor, taking all dataframes.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
df_train (pd.DataFrame): The train labels.
|
27
|
+
df_test (pd.DataFrame): The test labels.
|
28
|
+
feats_train (pd.DataFrame): The train features.
|
29
|
+
feats_test (pd.DataFrame): The test features.
|
30
|
+
"""
|
23
31
|
super().__init__(df_train, df_test, feats_train, feats_test)
|
24
32
|
super().set_model_type("ann")
|
25
33
|
self.name = "mlp"
|
@@ -97,7 +105,7 @@ class MLP_model(Model):
|
|
97
105
|
self.optimizer.step()
|
98
106
|
self.loss = (np.asarray(losses)).mean()
|
99
107
|
|
100
|
-
def
|
108
|
+
def evaluate(self, model, loader, device):
|
101
109
|
logits = torch.zeros(len(loader.dataset), self.class_num)
|
102
110
|
targets = torch.zeros(len(loader.dataset))
|
103
111
|
model.eval()
|
@@ -119,14 +127,28 @@ class MLP_model(Model):
|
|
119
127
|
self.loss_eval = (np.asarray(losses)).mean()
|
120
128
|
predictions = logits.argmax(dim=1)
|
121
129
|
uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
|
122
|
-
return uar, targets, predictions
|
130
|
+
return uar, targets, predictions, logits
|
131
|
+
|
132
|
+
def get_probas(self, logits):
|
133
|
+
# make a dataframe for probabilites (logits)
|
134
|
+
proba_d = {}
|
135
|
+
classes = self.df_test[self.target].unique()
|
136
|
+
classes.sort()
|
137
|
+
for c in classes:
|
138
|
+
proba_d[c] = []
|
139
|
+
for i, c in enumerate(classes):
|
140
|
+
proba_d[c] = list(logits.numpy().T[i])
|
141
|
+
probas = pd.DataFrame(proba_d)
|
142
|
+
probas = probas.set_index(self.df_test.index)
|
143
|
+
return probas
|
123
144
|
|
124
145
|
def predict(self):
|
125
|
-
_, truths, predictions = self.
|
146
|
+
_, truths, predictions, logits = self.evaluate(
|
126
147
|
self.model, self.testloader, self.device
|
127
148
|
)
|
128
|
-
uar, _, _ = self.
|
129
|
-
|
149
|
+
uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
|
150
|
+
probas = self.get_probas(logits)
|
151
|
+
report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
|
130
152
|
try:
|
131
153
|
report.result.loss = self.loss
|
132
154
|
except AttributeError: # if the model was loaded from disk the loss is unknown
|
@@ -139,9 +161,7 @@ class MLP_model(Model):
|
|
139
161
|
return report
|
140
162
|
|
141
163
|
def get_predictions(self):
|
142
|
-
_,
|
143
|
-
self.model, self.testloader, self.device
|
144
|
-
)
|
164
|
+
_, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
|
145
165
|
return predictions.numpy()
|
146
166
|
|
147
167
|
def get_loader(self, df_x, df_y, shuffle):
|
@@ -97,7 +97,9 @@ class MLP_Reg_model(Model):
|
|
97
97
|
self.model, self.testloader, self.device
|
98
98
|
)
|
99
99
|
result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
|
100
|
-
report = Reporter(
|
100
|
+
report = Reporter(
|
101
|
+
truths.numpy(), predictions.numpy(), None, self.run, self.epoch
|
102
|
+
)
|
101
103
|
try:
|
102
104
|
report.result.loss = self.loss
|
103
105
|
except AttributeError: # if the model was loaded from disk the loss is unknown
|
@@ -48,7 +48,7 @@ class Plots:
|
|
48
48
|
)
|
49
49
|
ax.set_ylabel(f"number of speakers")
|
50
50
|
ax.set_xlabel("number of samples")
|
51
|
-
self.
|
51
|
+
self.save_plot(
|
52
52
|
ax,
|
53
53
|
"Samples per speaker",
|
54
54
|
f"Samples per speaker ({df_speakers.shape[0]})",
|
@@ -70,9 +70,9 @@ class Plots:
|
|
70
70
|
rot=0,
|
71
71
|
)
|
72
72
|
)
|
73
|
-
ax.set_ylabel(
|
73
|
+
ax.set_ylabel("number of speakers")
|
74
74
|
ax.set_xlabel("number of samples")
|
75
|
-
self.
|
75
|
+
self.save_plot(
|
76
76
|
ax,
|
77
77
|
"Sample value counts",
|
78
78
|
f"Samples per speaker ({df_speakers.shape[0]})",
|
@@ -96,7 +96,7 @@ class Plots:
|
|
96
96
|
binned_data = self.util.continuous_to_categorical(df[class_label])
|
97
97
|
ax = binned_data.value_counts().plot(kind="bar")
|
98
98
|
filename_binned = f"{class_label}_discreet"
|
99
|
-
self.
|
99
|
+
self.save_plot(
|
100
100
|
ax,
|
101
101
|
"Sample value counts",
|
102
102
|
filename_binned,
|
@@ -106,7 +106,7 @@ class Plots:
|
|
106
106
|
dist_type = self.util.config_val("EXPL", "dist_type", "hist")
|
107
107
|
ax = df[class_label].plot(kind=dist_type)
|
108
108
|
|
109
|
-
self.
|
109
|
+
self.save_plot(
|
110
110
|
ax,
|
111
111
|
"Sample value counts",
|
112
112
|
filename,
|
@@ -131,17 +131,17 @@ class Plots:
|
|
131
131
|
df, class_label, att1, self.target, type_s
|
132
132
|
)
|
133
133
|
else:
|
134
|
-
ax, caption = self.
|
134
|
+
ax, caption = self.plotcatcont(
|
135
135
|
df, class_label, att1, att1, type_s
|
136
136
|
)
|
137
137
|
else:
|
138
138
|
if self.util.is_categorical(df[att1]):
|
139
|
-
ax, caption = self.
|
139
|
+
ax, caption = self.plotcatcont(
|
140
140
|
df, att1, class_label, att1, type_s
|
141
141
|
)
|
142
142
|
else:
|
143
143
|
ax, caption = self._plot2cont(df, class_label, att1, type_s)
|
144
|
-
self.
|
144
|
+
self.save_plot(
|
145
145
|
ax,
|
146
146
|
caption,
|
147
147
|
f"Correlation of {self.target} and {att[0]}",
|
@@ -171,15 +171,11 @@ class Plots:
|
|
171
171
|
ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
|
172
172
|
else:
|
173
173
|
# class_label = cat, att1 = cat, att2 = cont
|
174
|
-
ax, caption = self.
|
175
|
-
df, att1, att2, att1, type_s
|
176
|
-
)
|
174
|
+
ax, caption = self.plotcatcont(df, att1, att2, att1, type_s)
|
177
175
|
else:
|
178
176
|
if self.util.is_categorical(df[att2]):
|
179
177
|
# class_label = cat, att1 = cont, att2 = cat
|
180
|
-
ax, caption = self.
|
181
|
-
df, att2, att1, att2, type_s
|
182
|
-
)
|
178
|
+
ax, caption = self.plotcatcont(df, att2, att1, att2, type_s)
|
183
179
|
else:
|
184
180
|
# class_label = cat, att1 = cont, att2 = cont
|
185
181
|
ax, caption = self._plot2cont_cat(
|
@@ -205,7 +201,7 @@ class Plots:
|
|
205
201
|
# class_label = cont, att1 = cont, att2 = cont
|
206
202
|
ax, caption = self._plot2cont(df, att1, att2, type_s)
|
207
203
|
|
208
|
-
self.
|
204
|
+
self.save_plot(
|
209
205
|
ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
|
210
206
|
)
|
211
207
|
|
@@ -215,16 +211,16 @@ class Plots:
|
|
215
211
|
f" {att} has more than 2 values. Perhaps you forgot to state a list of lists?"
|
216
212
|
)
|
217
213
|
|
218
|
-
def
|
214
|
+
def save_plot(self, ax, caption, header, filename, type_s):
|
219
215
|
# one up because of the runs
|
220
216
|
fig_dir = self.util.get_path("fig_dir") + "../"
|
221
|
-
|
217
|
+
fig_plots = ax.figure
|
222
218
|
# avoid warning
|
223
219
|
# plt.tight_layout()
|
224
220
|
img_path = f"{fig_dir}{filename}_{type_s}.{self.format}"
|
225
221
|
plt.savefig(img_path)
|
226
|
-
plt.close(
|
227
|
-
|
222
|
+
plt.close(fig_plots)
|
223
|
+
self.util.debug(f"Saved plot to {img_path}")
|
228
224
|
glob_conf.report.add_item(
|
229
225
|
ReportItem(
|
230
226
|
Header.HEADER_EXPLORE,
|
@@ -244,35 +240,29 @@ class Plots:
|
|
244
240
|
return att, df
|
245
241
|
|
246
242
|
def _plot2cont_cat(self, df, cont1, cont2, cat, ylab):
|
247
|
-
"""
|
248
|
-
plot relation of two continuous distributions with one categorical
|
249
|
-
"""
|
243
|
+
"""Plot relation of two continuous distributions with one categorical."""
|
250
244
|
pearson = stats.pearsonr(df[cont1], df[cont2])
|
251
245
|
# trunc to three digits
|
252
246
|
pearson = int(pearson[0] * 1000) / 1000
|
253
247
|
pearson_string = f"PCC: {pearson}"
|
254
248
|
ax = sns.lmplot(data=df, x=cont1, y=cont2, hue=cat)
|
255
249
|
caption = f"{ylab} {df.shape[0]}. {pearson_string}"
|
256
|
-
ax.
|
250
|
+
ax.figure.suptitle(caption)
|
257
251
|
return ax, caption
|
258
252
|
|
259
253
|
def _plot2cont(self, df, col1, col2, ylab):
|
260
|
-
"""
|
261
|
-
plot relation of two continuous distributions
|
262
|
-
"""
|
254
|
+
"""Plot relation of two continuous distributions."""
|
263
255
|
pearson = stats.pearsonr(df[col1], df[col2])
|
264
256
|
# trunc to three digits
|
265
257
|
pearson = int(pearson[0] * 1000) / 1000
|
266
258
|
pearson_string = f"PCC: {pearson}"
|
267
259
|
ax = sns.lmplot(data=df, x=col1, y=col2)
|
268
260
|
caption = f"{ylab} {df.shape[0]}. {pearson_string}"
|
269
|
-
ax.
|
261
|
+
ax.figure.suptitle(caption)
|
270
262
|
return ax, caption
|
271
263
|
|
272
|
-
def
|
273
|
-
"""
|
274
|
-
plot relation of categorical distribution with continuous
|
275
|
-
"""
|
264
|
+
def plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
|
265
|
+
"""Plot relation of categorical distribution with continuous."""
|
276
266
|
dist_type = self.util.config_val("EXPL", "dist_type", "hist")
|
277
267
|
cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
|
278
268
|
if dist_type == "hist":
|
@@ -287,13 +277,11 @@ class Plots:
|
|
287
277
|
)
|
288
278
|
ax.set(xlabel=f"{cont_col}")
|
289
279
|
caption = f"{ylab} {df.shape[0]}. {cat_str} ({cats}):" f" {es}"
|
290
|
-
ax.
|
280
|
+
ax.figure.suptitle(caption)
|
291
281
|
return ax, caption
|
292
282
|
|
293
283
|
def _plot2cat(self, df, col1, col2, xlab, ylab):
|
294
|
-
"""
|
295
|
-
plot relation of 2 categorical distributions
|
296
|
-
"""
|
284
|
+
"""Plot relation of 2 categorical distributions."""
|
297
285
|
crosstab = pd.crosstab(index=df[col1], columns=df[col2])
|
298
286
|
res_pval = stats.chi2_contingency(crosstab)
|
299
287
|
res_pval = int(res_pval[1] * 1000) / 1000
|
@@ -320,8 +308,8 @@ class Plots:
|
|
320
308
|
max = self.util.to_3_digits(df.duration.max())
|
321
309
|
title = f"Duration distr. for {sample_selection} {df.shape[0]}. min={min}, max={max}"
|
322
310
|
ax.set_title(title)
|
323
|
-
ax.set_xlabel(
|
324
|
-
ax.set_ylabel(
|
311
|
+
ax.set_xlabel("duration")
|
312
|
+
ax.set_ylabel("number of samples")
|
325
313
|
fig = ax.figure
|
326
314
|
# plt.tight_layout()
|
327
315
|
img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
|