nkululeko 0.81.6__tar.gz → 0.82.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nkululeko-0.81.6 → nkululeko-0.82.0}/CHANGELOG.md +9 -0
- {nkululeko-0.81.6/nkululeko.egg-info → nkululeko-0.82.0}/PKG-INFO +20 -1
- {nkululeko-0.81.6 → nkululeko-0.82.0}/README.md +10 -0
- nkululeko-0.82.0/meta/demos/multiple_exeriments/do_experiments.py +35 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/constants.py +1 -1
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_agender.py +6 -4
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_auddim.py +5 -3
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_audmodel.py +5 -3
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_clap.py +10 -6
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_hubert.py +3 -2
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_import.py +2 -2
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_mos.py +2 -2
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_opensmile.py +10 -24
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_oxbow.py +16 -11
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_praat.py +8 -5
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_spectra.py +3 -2
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_squim.py +2 -2
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_trill.py +10 -6
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_wav2vec2.py +16 -7
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_wavlm.py +1 -4
- nkululeko-0.82.0/nkululeko/feat_extract/feats_whisper.py +110 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/featureset.py +6 -3
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feature_extractor.py +15 -4
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/multidb.py +5 -10
- nkululeko-0.82.0/nkululeko/nkuluflag.py +95 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/nkululeko.py +6 -4
- {nkululeko-0.81.6 → nkululeko-0.82.0/nkululeko.egg-info}/PKG-INFO +20 -1
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko.egg-info/SOURCES.txt +2 -1
- nkululeko-0.81.6/meta/demos/multiple_exeriments/do_experiments.py +0 -48
- nkululeko-0.81.6/meta/demos/multiple_exeriments/parse_nkulu.py +0 -112
- {nkululeko-0.81.6 → nkululeko-0.82.0}/LICENSE +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/aesdd/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/androids/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/androids_orig/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/androids_test/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/ased/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/asvp-esd/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/baved/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/cafe/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/clac/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/cmu-mosei/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/demos/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/ekorpus/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emns/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emofilm/convert_to_16k.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emofilm/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emorynlp/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emov-db/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emovo/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/emozionalmente/create.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/enterface/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/esd/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/gerparas/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/iemocap/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/jl/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/jtes/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/meld/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/mesd/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/mess/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/mlendsnd/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/msp-improv/process_database2.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/msp-podcast/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/oreau2/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/portuguese/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/ravdess/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/ravdess/process_database_speaker.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/savee/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/shemo/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/subesco/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/tess/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/thorsten-emotional/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/urdu/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/data/vivae/process_database.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/docs/source/conf.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/meta/demos/demo_best_model.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/meta/demos/my_experiment.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/meta/demos/my_experiment_local.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/meta/demos/plot_faster_anim.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/__init__.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/aug_train.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augment.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augmenting/__init__.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augmenting/augmenter.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augmenting/randomsplicer.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augmenting/randomsplicing.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/augmenting/resampler.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/__init__.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_age.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_arousal.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_dominance.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_gender.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_mos.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_pesq.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_sdr.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_snr.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_stoi.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/ap_valence.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/autopredict/estimate_snr.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/cacheddataset.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/data/__init__.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/data/dataset.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/data/dataset_csv.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/demo.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/demo_feats.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/demo_predictor.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/experiment.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/explore.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/export.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/__init__.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_analyser.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_mld.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_snr.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feats_spkrec.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/feat_extract/feinberg_praat.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/file_checker.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/filter_data.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/glob_conf.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/losses/__init__.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/losses/loss_ccc.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/losses/loss_softf1loss.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/modelrunner.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/__init__.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_bayes.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_cnn.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_gmm.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_knn.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_knn_reg.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_lin_reg.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_mlp.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_mlp_regression.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_svm.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_svr.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_tree.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_tree_reg.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_xgb.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/models/model_xgr.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/plots.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/predict.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporter.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/__init__.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/defines.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/latex_writer.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/report.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/report_item.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/reporter.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/reporting/result.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/resample.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/runmanager.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/scaler.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/segment.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/segmenting/__init__.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/segmenting/seg_silero.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/syllable_nuclei.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/test.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/test_predictor.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/utils/__init__.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/utils/files.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/utils/stats.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko/utils/util.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko.egg-info/dependency_links.txt +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko.egg-info/requires.txt +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/nkululeko.egg-info/top_level.txt +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/pyproject.toml +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/setup.cfg +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/setup.py +0 -0
- {nkululeko-0.81.6 → nkululeko-0.82.0}/venv/bin/activate_this.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.82.0
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -42,6 +42,8 @@ Requires-Dist: umap-learn
|
|
42
42
|
Requires-Dist: xgboost
|
43
43
|
Requires-Dist: pylatex
|
44
44
|
|
45
|
+
usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
|
46
|
+
[--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
|
45
47
|
- [Overview](#overview)
|
46
48
|
- [Confusion matrix](#confusion-matrix)
|
47
49
|
- [Epoch progression](#epoch-progression)
|
@@ -203,6 +205,14 @@ All of them take *--config <my_config.ini>* as an argument.
|
|
203
205
|
* **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
|
204
206
|
* **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
|
205
207
|
* **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
|
208
|
+
* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line.
|
209
|
+
* usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
|
210
|
+
[--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
|
211
|
+
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
|
206
216
|
|
207
217
|
There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
208
218
|
* [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
|
@@ -323,6 +333,15 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
323
333
|
Changelog
|
324
334
|
=========
|
325
335
|
|
336
|
+
Version 0.82.0
|
337
|
+
--------------
|
338
|
+
* added nkuluflag module
|
339
|
+
|
340
|
+
Version 0.81.7
|
341
|
+
--------------
|
342
|
+
* bugfixes
|
343
|
+
* added whisper feature extractor
|
344
|
+
|
326
345
|
Version 0.81.6
|
327
346
|
--------------
|
328
347
|
* updated documentation
|
@@ -1,3 +1,5 @@
|
|
1
|
+
usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
|
2
|
+
[--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
|
1
3
|
- [Overview](#overview)
|
2
4
|
- [Confusion matrix](#confusion-matrix)
|
3
5
|
- [Epoch progression](#epoch-progression)
|
@@ -159,6 +161,14 @@ All of them take *--config <my_config.ini>* as an argument.
|
|
159
161
|
* **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
|
160
162
|
* **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
|
161
163
|
* **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
|
164
|
+
* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line.
|
165
|
+
* usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
|
166
|
+
[--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
|
167
|
+
|
168
|
+
|
169
|
+
|
170
|
+
|
171
|
+
|
162
172
|
|
163
173
|
There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
164
174
|
* [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
|
@@ -0,0 +1,35 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
|
4
|
+
classifiers = [
|
5
|
+
{"--model": "mlp", "--layers": "\"{'l1':64,'l2':16}\"", "--epochs": 100},
|
6
|
+
{
|
7
|
+
"--model": "mlp",
|
8
|
+
"--layers": "\"{'l1':128,'l2':64,'l3':16}\"",
|
9
|
+
"--learning_rate": ".01",
|
10
|
+
"--drop": ".3",
|
11
|
+
"--epochs": 100,
|
12
|
+
},
|
13
|
+
{"--model": "xgb", "--epochs": 1},
|
14
|
+
{"--model": "svm", "--epochs": 1},
|
15
|
+
]
|
16
|
+
|
17
|
+
features = [
|
18
|
+
{"--feat": "os"},
|
19
|
+
# {'--feat': 'os',
|
20
|
+
# '--set': 'ComParE_2016',
|
21
|
+
# },
|
22
|
+
{"--feat": "audmodel"},
|
23
|
+
]
|
24
|
+
|
25
|
+
|
26
|
+
for c in classifiers:
|
27
|
+
for f in features:
|
28
|
+
cmd = "python -m nkululeko.nkuluflag --config exp.ini "
|
29
|
+
for item in c:
|
30
|
+
cmd += f"{item} {c[item]} "
|
31
|
+
for item in f:
|
32
|
+
cmd += f"{item} {f[item]} "
|
33
|
+
print(cmd)
|
34
|
+
os.system(cmd)
|
35
|
+
# print(f"results: {result}, {last_epoch}")
|
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.82.0"
|
2
2
|
SAMPLING_RATE = 16000
|
@@ -9,16 +9,17 @@ import numpy as np
|
|
9
9
|
import audinterface
|
10
10
|
|
11
11
|
|
12
|
-
class
|
12
|
+
class AgenderSet(Featureset):
|
13
13
|
"""
|
14
14
|
Embeddings from the wav2vec2. based model finetuned on agender data, described in the paper
|
15
15
|
"Speech-based Age and Gender Prediction with Transformers"
|
16
16
|
https://arxiv.org/abs/2306.16962
|
17
17
|
"""
|
18
18
|
|
19
|
-
def __init__(self, name, data_df):
|
20
|
-
super().__init__(name, data_df)
|
19
|
+
def __init__(self, name, data_df, feats_type):
|
20
|
+
super().__init__(name, data_df, feats_type)
|
21
21
|
self.model_loaded = False
|
22
|
+
self.feats_type = feats_type
|
22
23
|
|
23
24
|
def _load_model(self):
|
24
25
|
model_url = "https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip"
|
@@ -28,7 +29,8 @@ class AudModelAgenderSet(Featureset):
|
|
28
29
|
if not os.path.isdir(model_root):
|
29
30
|
cache_root = audeer.mkdir("cache")
|
30
31
|
model_root = audeer.mkdir(model_root)
|
31
|
-
archive_path = audeer.download_url(
|
32
|
+
archive_path = audeer.download_url(
|
33
|
+
model_url, cache_root, verbose=True)
|
32
34
|
audeer.extract_archive(archive_path, model_root)
|
33
35
|
device = self.util.config_val("MODEL", "device", "cpu")
|
34
36
|
self.model = audonnx.load(model_root, device=device)
|
@@ -21,9 +21,10 @@ class AuddimSet(Featureset):
|
|
21
21
|
https://arxiv.org/abs/2203.07378.
|
22
22
|
"""
|
23
23
|
|
24
|
-
def __init__(self, name, data_df):
|
25
|
-
super().__init__(name, data_df)
|
24
|
+
def __init__(self, name, data_df, feats_type):
|
25
|
+
super().__init__(name, data_df, feats_type)
|
26
26
|
self.model_loaded = False
|
27
|
+
self.feats_types = feats_type
|
27
28
|
|
28
29
|
def _load_model(self):
|
29
30
|
model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
|
@@ -31,7 +32,8 @@ class AuddimSet(Featureset):
|
|
31
32
|
if not os.path.isdir(model_root):
|
32
33
|
cache_root = audeer.mkdir("cache")
|
33
34
|
model_root = audeer.mkdir(model_root)
|
34
|
-
archive_path = audeer.download_url(
|
35
|
+
archive_path = audeer.download_url(
|
36
|
+
model_url, cache_root, verbose=True)
|
35
37
|
audeer.extract_archive(archive_path, model_root)
|
36
38
|
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
37
39
|
device = self.util.config_val("MODEL", "device", cuda)
|
@@ -19,9 +19,10 @@ class AudmodelSet(Featureset):
|
|
19
19
|
https://arxiv.org/abs/2203.07378.
|
20
20
|
"""
|
21
21
|
|
22
|
-
def __init__(self, name, data_df):
|
23
|
-
super().__init__(name, data_df)
|
22
|
+
def __init__(self, name, data_df, feats_type):
|
23
|
+
super().__init__(name, data_df, feats_type)
|
24
24
|
self.model_loaded = False
|
25
|
+
self.feats_type = feats_type
|
25
26
|
|
26
27
|
def _load_model(self):
|
27
28
|
model_url = "https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip"
|
@@ -29,7 +30,8 @@ class AudmodelSet(Featureset):
|
|
29
30
|
if not os.path.isdir(model_root):
|
30
31
|
cache_root = audeer.mkdir("cache")
|
31
32
|
model_root = audeer.mkdir(model_root)
|
32
|
-
archive_path = audeer.download_url(
|
33
|
+
archive_path = audeer.download_url(
|
34
|
+
model_url, cache_root, verbose=True)
|
33
35
|
audeer.extract_archive(archive_path, model_root)
|
34
36
|
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
35
37
|
device = self.util.config_val("MODEL", "device", cuda)
|
@@ -11,14 +11,15 @@ import laion_clap
|
|
11
11
|
import audiofile
|
12
12
|
|
13
13
|
|
14
|
-
class
|
14
|
+
class ClapSet(Featureset):
|
15
15
|
"""Class to extract laion's clap embeddings (https://github.com/LAION-AI/CLAP)"""
|
16
16
|
|
17
|
-
def __init__(self, name, data_df):
|
17
|
+
def __init__(self, name, data_df, feats_type):
|
18
18
|
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
|
19
|
-
super().__init__(name, data_df)
|
19
|
+
super().__init__(name, data_df, feats_type)
|
20
20
|
self.device = self.util.config_val("MODEL", "device", "cpu")
|
21
21
|
self.model_initialized = False
|
22
|
+
self.feat_type = feats_type
|
22
23
|
|
23
24
|
def init_model(self):
|
24
25
|
# load model
|
@@ -32,12 +33,14 @@ class Clap(Featureset):
|
|
32
33
|
store = self.util.get_path("store")
|
33
34
|
store_format = self.util.config_val("FEATS", "store_format", "pkl")
|
34
35
|
storage = f"{store}{self.name}.{store_format}"
|
35
|
-
extract = self.util.config_val(
|
36
|
+
extract = self.util.config_val(
|
37
|
+
"FEATS", "needs_feature_extraction", False)
|
36
38
|
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
37
39
|
if extract or no_reuse or not os.path.isfile(storage):
|
38
40
|
if not self.model_initialized:
|
39
41
|
self.init_model()
|
40
|
-
self.util.debug(
|
42
|
+
self.util.debug(
|
43
|
+
"extracting clap embeddings, this might take a while...")
|
41
44
|
emb_series = pd.Series(index=self.data_df.index, dtype=object)
|
42
45
|
length = len(self.data_df.index)
|
43
46
|
for idx, (file, start, end) in enumerate(
|
@@ -51,7 +54,8 @@ class Clap(Featureset):
|
|
51
54
|
)
|
52
55
|
emb = self.get_embeddings(signal, sampling_rate)
|
53
56
|
emb_series[idx] = emb
|
54
|
-
self.df = pd.DataFrame(
|
57
|
+
self.df = pd.DataFrame(
|
58
|
+
emb_series.values.tolist(), index=self.data_df.index)
|
55
59
|
self.util.write_store(self.df, storage, store_format)
|
56
60
|
try:
|
57
61
|
glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# feats_hubert.py
|
2
2
|
# HuBERT feature extractor for Nkululeko
|
3
|
-
# example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k"
|
3
|
+
# example feat_type = "hubert-large-ll60k", "hubert-xlarge-ll60k",
|
4
|
+
# "hubert-base-ls960", hubert-large-ls960-ft", "hubert-xlarge-ls960-ft"
|
4
5
|
|
5
6
|
|
6
7
|
import os
|
@@ -22,7 +23,7 @@ class Hubert(Featureset):
|
|
22
23
|
def __init__(self, name, data_df, feat_type):
|
23
24
|
"""Constructor. is_train is needed to distinguish from test/dev sets,
|
24
25
|
because they use the codebook from the training"""
|
25
|
-
super().__init__(name, data_df)
|
26
|
+
super().__init__(name, data_df, feat_type)
|
26
27
|
# check if device is not set, use cuda if available
|
27
28
|
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
28
29
|
self.device = self.util.config_val("MODEL", "device", cuda)
|
@@ -11,8 +11,8 @@ from nkululeko.feat_extract.featureset import Featureset
|
|
11
11
|
class ImportSet(Featureset):
|
12
12
|
"""Class to import features that have been compiled elsewhere"""
|
13
13
|
|
14
|
-
def __init__(self, name, data_df):
|
15
|
-
super().__init__(name, data_df)
|
14
|
+
def __init__(self, name, data_df, feats_type):
|
15
|
+
super().__init__(name, data_df, feats_type)
|
16
16
|
|
17
17
|
def extract(self):
|
18
18
|
"""Import the features."""
|
@@ -27,9 +27,9 @@ from nkululeko.feat_extract.featureset import Featureset
|
|
27
27
|
class MosSet(Featureset):
|
28
28
|
"""Class to predict MOS (mean opinion score)"""
|
29
29
|
|
30
|
-
def __init__(self, name, data_df):
|
30
|
+
def __init__(self, name, data_df, feats_type):
|
31
31
|
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
|
32
|
-
super().__init__(name, data_df)
|
32
|
+
super().__init__(name, data_df, feats_type)
|
33
33
|
self.device = self.util.config_val("MODEL", "device", "cpu")
|
34
34
|
self.model_initialized = False
|
35
35
|
|
@@ -8,31 +8,21 @@ import opensmile
|
|
8
8
|
|
9
9
|
|
10
10
|
class Opensmileset(Featureset):
|
11
|
-
def __init__(self, name, data_df):
|
12
|
-
super().__init__(name, data_df)
|
11
|
+
def __init__(self, name, data_df, feats_type=None, config_file=None):
|
12
|
+
super().__init__(name, data_df, feats_type)
|
13
13
|
self.featset = self.util.config_val("FEATS", "set", "eGeMAPSv02")
|
14
14
|
try:
|
15
15
|
self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
|
16
|
-
#'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
|
16
|
+
# 'eGeMAPSv02, ComParE_2016, GeMAPSv01a, eGeMAPSv01a':
|
17
17
|
except AttributeError:
|
18
|
-
self.util.error(
|
19
|
-
f"something is wrong with feature set: {self.featset}"
|
20
|
-
)
|
18
|
+
self.util.error(f"something is wrong with feature set: {self.featset}")
|
21
19
|
self.featlevel = self.util.config_val("FEATS", "level", "functionals")
|
22
20
|
try:
|
23
|
-
self.featlevel = self.featlevel.replace(
|
24
|
-
|
25
|
-
)
|
26
|
-
self.featlevel = self.featlevel.replace(
|
27
|
-
"functionals", "Functionals"
|
28
|
-
)
|
29
|
-
self.feature_level = eval(
|
30
|
-
f"opensmile.FeatureLevel.{self.featlevel}"
|
31
|
-
)
|
21
|
+
self.featlevel = self.featlevel.replace("lld", "LowLevelDescriptors")
|
22
|
+
self.featlevel = self.featlevel.replace("functionals", "Functionals")
|
23
|
+
self.feature_level = eval(f"opensmile.FeatureLevel.{self.featlevel}")
|
32
24
|
except AttributeError:
|
33
|
-
self.util.error(
|
34
|
-
f"something is wrong with feature level: {self.featlevel}"
|
35
|
-
)
|
25
|
+
self.util.error(f"something is wrong with feature level: {self.featlevel}")
|
36
26
|
|
37
27
|
def extract(self):
|
38
28
|
"""Extract the features based on the initialized dataset or re-open them when found on disk."""
|
@@ -44,9 +34,7 @@ class Opensmileset(Featureset):
|
|
44
34
|
)
|
45
35
|
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
46
36
|
if extract or not os.path.isfile(storage) or no_reuse:
|
47
|
-
self.util.debug(
|
48
|
-
"extracting openSmile features, this might take a while..."
|
49
|
-
)
|
37
|
+
self.util.debug("extracting openSmile features, this might take a while...")
|
50
38
|
smile = opensmile.Smile(
|
51
39
|
feature_set=self.feature_set,
|
52
40
|
feature_level=self.feature_level,
|
@@ -85,9 +73,7 @@ class Opensmileset(Featureset):
|
|
85
73
|
selected_features = ast.literal_eval(
|
86
74
|
glob_conf.config["FEATS"]["os.features"]
|
87
75
|
)
|
88
|
-
self.util.debug(
|
89
|
-
f"selecting features from opensmile: {selected_features}"
|
90
|
-
)
|
76
|
+
self.util.debug(f"selecting features from opensmile: {selected_features}")
|
91
77
|
sel_feats_df = pd.DataFrame()
|
92
78
|
hit = False
|
93
79
|
for feat in selected_features:
|
@@ -10,9 +10,10 @@ import opensmile
|
|
10
10
|
class Openxbow(Featureset):
|
11
11
|
"""Class to extract openXBOW processed opensmile features (https://github.com/openXBOW)"""
|
12
12
|
|
13
|
-
def __init__(self, name, data_df, is_train=False):
|
13
|
+
def __init__(self, name, data_df, feats_type, is_train=False):
|
14
14
|
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
|
15
|
-
super().__init__(name, data_df)
|
15
|
+
super().__init__(name, data_df, feats_type)
|
16
|
+
self.feats_types = feats_type
|
16
17
|
self.is_train = is_train
|
17
18
|
|
18
19
|
def extract(self):
|
@@ -21,11 +22,13 @@ class Openxbow(Featureset):
|
|
21
22
|
self.feature_set = eval(f"opensmile.FeatureSet.{self.featset}")
|
22
23
|
store = self.util.get_path("store")
|
23
24
|
storage = f"{store}{self.name}_{self.featset}.pkl"
|
24
|
-
extract = self.util.config_val(
|
25
|
+
extract = self.util.config_val(
|
26
|
+
"FEATS", "needs_feature_extraction", False)
|
25
27
|
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
26
28
|
if extract or no_reuse or not os.path.isfile(storage):
|
27
29
|
# extract smile features first
|
28
|
-
self.util.debug(
|
30
|
+
self.util.debug(
|
31
|
+
"extracting openSmile features, this might take a while...")
|
29
32
|
smile = opensmile.Smile(
|
30
33
|
feature_set=self.feature_set,
|
31
34
|
feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
|
@@ -48,7 +51,13 @@ class Openxbow(Featureset):
|
|
48
51
|
# save the smile features
|
49
52
|
smile_df.to_csv(lld_name, sep=";", header=False)
|
50
53
|
# get the path of the xbow java jar file
|
51
|
-
xbow_path = self.util.config_val(
|
54
|
+
xbow_path = self.util.config_val(
|
55
|
+
"FEATS", "xbow.model", "openXBOW")
|
56
|
+
# check if JAR file exist
|
57
|
+
if not os.path.isfile(f"{xbow_path}/openXBOW.jar"):
|
58
|
+
# download using wget if not exist and locate in xbow_path
|
59
|
+
os.system(
|
60
|
+
f"git clone https://github.com/openXBOW/openXBOW")
|
52
61
|
# get the size of the codebook
|
53
62
|
size = self.util.config_val("FEATS", "size", 500)
|
54
63
|
# get the number of assignements
|
@@ -57,16 +66,12 @@ class Openxbow(Featureset):
|
|
57
66
|
if self.is_train:
|
58
67
|
# store the codebook
|
59
68
|
os.system(
|
60
|
-
f"java -jar {xbow_path}openXBOW.jar -i"
|
61
|
-
f" {lld_name} -standardizeInput -log -o"
|
62
|
-
f" {xbow_name} -size {size} -a {assignments} -B"
|
63
|
-
f" {codebook_name}"
|
69
|
+
f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -standardizeInput -log -o {xbow_name} -size {size} -a {assignments} -B {codebook_name}"
|
64
70
|
)
|
65
71
|
else:
|
66
72
|
# use the codebook
|
67
73
|
os.system(
|
68
|
-
f"java -jar {xbow_path}openXBOW.jar -i {lld_name}
|
69
|
-
f" -o {xbow_name} -b {codebook_name}"
|
74
|
+
f"java -jar {xbow_path}/openXBOW.jar -i {lld_name} -o {xbow_name} -b {codebook_name}"
|
70
75
|
)
|
71
76
|
# read in the result from disk
|
72
77
|
xbow_df = pd.read_csv(xbow_name, sep=";", header=None)
|
@@ -18,18 +18,20 @@ class PraatSet(Featureset):
|
|
18
18
|
|
19
19
|
"""
|
20
20
|
|
21
|
-
def __init__(self, name, data_df):
|
22
|
-
super().__init__(name, data_df)
|
21
|
+
def __init__(self, name, data_df, feats_type):
|
22
|
+
super().__init__(name, data_df, feats_type)
|
23
23
|
|
24
24
|
def extract(self):
|
25
25
|
"""Extract the features based on the initialized dataset or re-open them when found on disk."""
|
26
26
|
store = self.util.get_path("store")
|
27
27
|
store_format = self.util.config_val("FEATS", "store_format", "pkl")
|
28
28
|
storage = f"{store}{self.name}.{store_format}"
|
29
|
-
extract = self.util.config_val(
|
29
|
+
extract = self.util.config_val(
|
30
|
+
"FEATS", "needs_feature_extraction", False)
|
30
31
|
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
31
32
|
if extract or no_reuse or not os.path.isfile(storage):
|
32
|
-
self.util.debug(
|
33
|
+
self.util.debug(
|
34
|
+
"extracting Praat features, this might take a while...")
|
33
35
|
self.df = feinberg_praat.compute_features(self.data_df.index)
|
34
36
|
self.df = self.df.set_index(self.data_df.index)
|
35
37
|
for i, col in enumerate(self.df.columns):
|
@@ -52,7 +54,8 @@ class PraatSet(Featureset):
|
|
52
54
|
self.df = self.df.astype(float)
|
53
55
|
|
54
56
|
def extract_sample(self, signal, sr):
|
55
|
-
import audiofile
|
57
|
+
import audiofile
|
58
|
+
import audformat
|
56
59
|
|
57
60
|
tmp_audio_names = ["praat_audio_tmp.wav"]
|
58
61
|
audiofile.write(tmp_audio_names[0], signal, sr)
|
@@ -4,6 +4,7 @@ feats_spectra.py
|
|
4
4
|
Inspired by code from Su Lei
|
5
5
|
|
6
6
|
"""
|
7
|
+
|
7
8
|
import os
|
8
9
|
import torchaudio
|
9
10
|
import torchaudio.transforms as T
|
@@ -23,9 +24,9 @@ import nkululeko.glob_conf as glob_conf
|
|
23
24
|
|
24
25
|
|
25
26
|
class Spectraloader(Featureset):
|
26
|
-
def __init__(self, name, data_df):
|
27
|
+
def __init__(self, name, data_df, feat_type):
|
27
28
|
"""Constructor setting the name"""
|
28
|
-
|
29
|
+
super().__init__(name, data_df, feat_type)
|
29
30
|
self.sampling_rate = SAMPLING_RATE
|
30
31
|
self.num_bands = int(self.util.config_val("FEATS", "fft_nbands", "64"))
|
31
32
|
self.win_dur = int(self.util.config_val("FEATS", "fft_win_dur", "25"))
|
@@ -30,9 +30,9 @@ from nkululeko.utils.util import Util
|
|
30
30
|
class SquimSet(Featureset):
|
31
31
|
"""Class to predict SQUIM features"""
|
32
32
|
|
33
|
-
def __init__(self, name, data_df):
|
33
|
+
def __init__(self, name, data_df, feats_type):
|
34
34
|
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
|
35
|
-
super().__init__(name, data_df)
|
35
|
+
super().__init__(name, data_df, feats_type)
|
36
36
|
self.device = self.util.config_val("MODEL", "device", "cpu")
|
37
37
|
self.model_initialized = False
|
38
38
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# feats_trill.py
|
2
|
+
import tensorflow_hub as hub
|
2
3
|
import os
|
3
4
|
import tensorflow as tf
|
4
5
|
from numpy.core.numeric import tensordot
|
@@ -11,7 +12,6 @@ from nkululeko.feat_extract.featureset import Featureset
|
|
11
12
|
|
12
13
|
# Import TF 2.X and make sure we're running eager.
|
13
14
|
assert tf.executing_eagerly()
|
14
|
-
import tensorflow_hub as hub
|
15
15
|
|
16
16
|
|
17
17
|
class TRILLset(Featureset):
|
@@ -20,7 +20,7 @@ class TRILLset(Featureset):
|
|
20
20
|
"""https://ai.googleblog.com/2020/06/improving-speech-representations-and.html"""
|
21
21
|
|
22
22
|
# Initialization of the class
|
23
|
-
def __init__(self, name, data_df):
|
23
|
+
def __init__(self, name, data_df, feats_type):
|
24
24
|
"""
|
25
25
|
Initialize the class with name, data and Util instance
|
26
26
|
Also loads the model from hub
|
@@ -31,7 +31,7 @@ class TRILLset(Featureset):
|
|
31
31
|
:type data_df: DataFrame
|
32
32
|
:return: None
|
33
33
|
"""
|
34
|
-
super().__init__(name, data_df)
|
34
|
+
super().__init__(name, data_df, feats_type)
|
35
35
|
# Load the model from the configured path
|
36
36
|
model_path = self.util.config_val(
|
37
37
|
"FEATS",
|
@@ -39,20 +39,24 @@ class TRILLset(Featureset):
|
|
39
39
|
"https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3",
|
40
40
|
)
|
41
41
|
self.module = hub.load(model_path)
|
42
|
+
self.feats_type = feats_type
|
42
43
|
|
43
44
|
def extract(self):
|
44
45
|
store = self.util.get_path("store")
|
45
46
|
storage = f"{store}{self.name}.pkl"
|
46
|
-
extract = self.util.config_val(
|
47
|
+
extract = self.util.config_val(
|
48
|
+
"FEATS", "needs_feature_extraction", False)
|
47
49
|
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
48
50
|
if extract or no_reuse or not os.path.isfile(storage):
|
49
|
-
self.util.debug(
|
51
|
+
self.util.debug(
|
52
|
+
"extracting TRILL embeddings, this might take a while...")
|
50
53
|
emb_series = pd.Series(index=self.data_df.index, dtype=object)
|
51
54
|
length = len(self.data_df.index)
|
52
55
|
for idx, file in enumerate(tqdm(self.data_df.index.get_level_values(0))):
|
53
56
|
emb = self.getEmbeddings(file)
|
54
57
|
emb_series[idx] = emb
|
55
|
-
self.df = pd.DataFrame(
|
58
|
+
self.df = pd.DataFrame(
|
59
|
+
emb_series.values.tolist(), index=self.data_df.index)
|
56
60
|
self.df.to_pickle(storage)
|
57
61
|
try:
|
58
62
|
glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
|
@@ -1,5 +1,11 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
""" feats_wav2vec2.py
|
2
|
+
feat_types example = [wav2vec2-large-robust-ft-swbd-300h,
|
3
|
+
wav2vec2-xls-r-2b, wav2vec2-large, wav2vec2-large-xlsr-53, wav2vec2-base]
|
4
|
+
|
5
|
+
Complete list: https://huggingface.co/facebook?search_models=wav2vec2
|
6
|
+
Currently only supports wav2vec2
|
7
|
+
"""
|
8
|
+
|
3
9
|
import os
|
4
10
|
from tqdm import tqdm
|
5
11
|
import pandas as pd
|
@@ -16,11 +22,11 @@ class Wav2vec2(Featureset):
|
|
16
22
|
|
17
23
|
def __init__(self, name, data_df, feat_type):
|
18
24
|
"""Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
|
19
|
-
super().__init__(name, data_df)
|
25
|
+
super().__init__(name, data_df, feat_type)
|
20
26
|
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
21
27
|
self.device = self.util.config_val("MODEL", "device", cuda)
|
22
28
|
self.model_initialized = False
|
23
|
-
if feat_type == "
|
29
|
+
if feat_type == "wav2vec2":
|
24
30
|
self.feat_type = "wav2vec2-large-robust-ft-swbd-300h"
|
25
31
|
else:
|
26
32
|
self.feat_type = feat_type
|
@@ -33,7 +39,8 @@ class Wav2vec2(Featureset):
|
|
33
39
|
)
|
34
40
|
config = transformers.AutoConfig.from_pretrained(model_path)
|
35
41
|
layer_num = config.num_hidden_layers
|
36
|
-
hidden_layer = int(self.util.config_val(
|
42
|
+
hidden_layer = int(self.util.config_val(
|
43
|
+
"FEATS", "wav2vec2.layer", "0"))
|
37
44
|
config.num_hidden_layers = layer_num - hidden_layer
|
38
45
|
self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
|
39
46
|
self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
|
@@ -48,7 +55,8 @@ class Wav2vec2(Featureset):
|
|
48
55
|
"""Extract the features or load them from disk if present."""
|
49
56
|
store = self.util.get_path("store")
|
50
57
|
storage = f"{store}{self.name}.pkl"
|
51
|
-
extract = self.util.config_val(
|
58
|
+
extract = self.util.config_val(
|
59
|
+
"FEATS", "needs_feature_extraction", False)
|
52
60
|
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
53
61
|
if extract or no_reuse or not os.path.isfile(storage):
|
54
62
|
if not self.model_initialized:
|
@@ -69,7 +77,8 @@ class Wav2vec2(Featureset):
|
|
69
77
|
emb = self.get_embeddings(signal, sampling_rate, file)
|
70
78
|
emb_series[idx] = emb
|
71
79
|
# print(f"emb_series shape: {emb_series.shape}")
|
72
|
-
self.df = pd.DataFrame(
|
80
|
+
self.df = pd.DataFrame(
|
81
|
+
emb_series.values.tolist(), index=self.data_df.index)
|
73
82
|
# print(f"df shape: {self.df.shape}")
|
74
83
|
self.df.to_pickle(storage)
|
75
84
|
try:
|
@@ -59,10 +59,7 @@ class Wavlm(Featureset):
|
|
59
59
|
frame_offset=int(start.total_seconds() * 16000),
|
60
60
|
num_frames=int((end - start).total_seconds() * 16000),
|
61
61
|
)
|
62
|
-
|
63
|
-
self.util.error(
|
64
|
-
f"sampling rate should be 16000 but is {sampling_rate}"
|
65
|
-
)
|
62
|
+
assert sampling_rate == 16000, f"sampling rate should be 16000 but is {sampling_rate}"
|
66
63
|
emb = self.get_embeddings(signal, sampling_rate, file)
|
67
64
|
emb_series.iloc[idx] = emb
|
68
65
|
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
|