nkululeko 0.95.9__tar.gz → 0.96.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nkululeko-0.95.9 → nkululeko-0.96.1}/CHANGELOG.md +8 -0
- {nkululeko-0.95.9/nkululeko.egg-info → nkululeko-0.96.1}/PKG-INFO +1 -1
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/constants.py +1 -1
- nkululeko-0.96.1/nkululeko/feat_extract/feats_bert.py +105 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feature_extractor.py +1 -1
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/reporting/reporter.py +15 -8
- {nkululeko-0.95.9 → nkululeko-0.96.1/nkululeko.egg-info}/PKG-INFO +1 -1
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko.egg-info/SOURCES.txt +1 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/pyproject.toml +1 -1
- {nkululeko-0.95.9 → nkululeko-0.96.1}/LICENSE +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/README.md +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/aesdd/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/ased/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/asvp-esd/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/baved/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/cafe/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/clac/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/cmu-mosei/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/demos/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/ekorpus/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/emns/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/emofilm/convert_to_16k.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/emofilm/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/emorynlp/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/emov-db/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/emovo/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/emozionalmente/create.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/enterface/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/esd/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/gerparas/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/iemocap/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/jl/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/jtes/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/meld/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/mesd/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/mess/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/mlendsnd/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/msp-improv/process_database2.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/msp-podcast/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/oreau2/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/portuguese/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/ravdess/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/ravdess/process_database_speaker.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/savee/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/shemo/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/subesco/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/tess/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/thorsten-emotional/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/urdu/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/data/vivae/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/docs/source/conf.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/examples/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/meta/demos/demo_best_model.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/meta/demos/my_experiment.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/meta/demos/my_experiment_local.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/meta/demos/plot_faster_anim.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/aug_train.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/augment.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/augmenting/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/augmenting/augmenter.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/augmenting/randomsplicer.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/augmenting/randomsplicing.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/augmenting/resampler.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_age.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_arousal.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_dominance.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_emotion.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_gender.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_mos.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_pesq.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_sdr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_sid.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_snr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_stoi.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_text.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_translate.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/ap_valence.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/estimate_snr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/google_translator.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/tests/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/tests/test_whisper_transcriber.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/whisper_transcriber.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/balance.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/cacheddataset.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/data/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/data/dataset.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/data/dataset_csv.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/demo-ft.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/demo.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/demo_feats.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/demo_predictor.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/ensemble.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/experiment.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/explore.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/export.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_agender.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_analyser.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_ast.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_auddim.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_audmodel.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_clap.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_emotion2vec.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_hubert.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_import.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_mld.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_mos.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_opensmile.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_oxbow.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_praat.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_praat_core.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_snr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_spectra.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_spkrec.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_squim.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_trill.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_wavlm.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/feats_whisper.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/featureset.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/tests/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/tests/test_feats_opensmile.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/tests/test_feats_praat_core.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/transformer_feature_extractor.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/file_checker.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/filter_data.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/fixedsegment.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/glob_conf.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/losses/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/losses/loss_ccc.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/losses/loss_softf1loss.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/modelrunner.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_bayes.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_cnn.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_gmm.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_knn.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_knn_reg.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_lin_reg.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_mlp.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_mlp_regression.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_svm.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_svr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_tree.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_tree_reg.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_tuned.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_xgb.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/model_xgr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/tests/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/tests/test_model_knn.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/tests/test_model_mlp.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/tests/test_model_svm.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/models/tests/test_model_xgb.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/multidb.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/nkuluflag.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/nkululeko.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/optim.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/optimizationrunner.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/plots.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/predict.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/reporting/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/reporting/defines.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/reporting/latex_writer.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/reporting/report.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/reporting/report_item.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/reporting/result.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/resample.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/runmanager.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/scaler.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/segment.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/segmenting/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/segmenting/seg_pyannote.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/segmenting/seg_silero.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/syllable_nuclei.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/testing.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/testing_predictor.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/testing_pretrain.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/tests/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/tests/test_balancing.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/tests/test_optim.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/utils/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/utils/files.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/utils/stats.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/utils/unzip.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/utils/util.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko.egg-info/dependency_links.txt +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko.egg-info/entry_points.txt +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko.egg-info/requires.txt +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko.egg-info/top_level.txt +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/setup.cfg +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/setup.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/tests/test_install.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.1}/tests/test_modules.py +0 -0
@@ -1,6 +1,14 @@
|
|
1
1
|
Changelog
|
2
2
|
=========
|
3
3
|
|
4
|
+
Version 0.96.1 (25-07-16)
|
5
|
+
--------------------------
|
6
|
+
* bugfix: wrong labels in confmatrix plots
|
7
|
+
|
8
|
+
Version 0.96.0 (25-07-14)
|
9
|
+
--------------------------
|
10
|
+
* added Bert feature extractor
|
11
|
+
|
4
12
|
Version 0.95.9 (25-07-14)
|
5
13
|
--------------------------
|
6
14
|
* added google translation
|
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.96.1"
|
2
2
|
SAMPLING_RATE = 16000
|
@@ -0,0 +1,105 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
from tqdm import tqdm
|
5
|
+
import transformers
|
6
|
+
import torch
|
7
|
+
from transformers import BertTokenizer, BertModel
|
8
|
+
|
9
|
+
from nkululeko.feat_extract.featureset import Featureset
|
10
|
+
import nkululeko.glob_conf as glob_conf
|
11
|
+
|
12
|
+
|
13
|
+
class Bert(Featureset):
|
14
|
+
"""Class to extract bert embeddings"""
|
15
|
+
|
16
|
+
def __init__(self, name, data_df, feat_type):
|
17
|
+
"""Constructor.
|
18
|
+
|
19
|
+
If_train is needed to distinguish from test/dev sets,
|
20
|
+
because they use the codebook from the training
|
21
|
+
"""
|
22
|
+
super().__init__(name, data_df, feat_type)
|
23
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
24
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
25
|
+
self.model_initialized = False
|
26
|
+
if feat_type == "bert":
|
27
|
+
self.feat_type = "bert-base-uncased"
|
28
|
+
else:
|
29
|
+
self.feat_type = feat_type
|
30
|
+
|
31
|
+
def init_model(self):
|
32
|
+
# load model
|
33
|
+
self.util.debug(f"loading {self.feat_type} model...")
|
34
|
+
model_path = self.util.config_val(
|
35
|
+
"FEATS", "bert.model", f"google-bert/{self.feat_type}"
|
36
|
+
)
|
37
|
+
config = transformers.AutoConfig.from_pretrained(model_path)
|
38
|
+
layer_num = config.num_hidden_layers
|
39
|
+
hidden_layer = int(self.util.config_val("FEATS", "bert.layer", "0"))
|
40
|
+
config.num_hidden_layers = layer_num - hidden_layer
|
41
|
+
self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
|
42
|
+
|
43
|
+
self.tokenizer = BertTokenizer.from_pretrained(model_path)
|
44
|
+
self.model = BertModel.from_pretrained(model_path, config=config).to(
|
45
|
+
self.device
|
46
|
+
)
|
47
|
+
print(f"initialized {self.feat_type} model on {self.device}")
|
48
|
+
self.model.eval()
|
49
|
+
self.model_initialized = True
|
50
|
+
|
51
|
+
def extract(self):
|
52
|
+
"""Extract the features or load them from disk if present."""
|
53
|
+
store = self.util.get_path("store")
|
54
|
+
storage = os.path.join(store, f"{self.name}.pkl")
|
55
|
+
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
|
56
|
+
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
57
|
+
if extract or no_reuse or not os.path.isfile(storage):
|
58
|
+
if not self.model_initialized:
|
59
|
+
self.init_model()
|
60
|
+
self.util.debug(
|
61
|
+
f"extracting {self.feat_type} embeddings, this might take a while..."
|
62
|
+
)
|
63
|
+
emb_series = pd.Series(index=self.data_df.index, dtype=object)
|
64
|
+
for idx, row in tqdm(self.data_df.iterrows(), total=len(self.data_df)):
|
65
|
+
file = idx[0]
|
66
|
+
text = row['text']
|
67
|
+
emb = self.get_embeddings(text, file)
|
68
|
+
emb_series[idx] = emb
|
69
|
+
# print(f"emb_series shape: {emb_series.shape}")
|
70
|
+
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
|
71
|
+
# print(f"df shape: {self.df.shape}")
|
72
|
+
self.df.to_pickle(storage)
|
73
|
+
try:
|
74
|
+
glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
|
75
|
+
except KeyError:
|
76
|
+
pass
|
77
|
+
else:
|
78
|
+
self.util.debug(f"reusing extracted {self.feat_type} embeddings")
|
79
|
+
self.df = pd.read_pickle(storage)
|
80
|
+
if self.df.isnull().values.any():
|
81
|
+
self.util.error(
|
82
|
+
f"got nan: {self.df.shape} {self.df.isnull().sum().sum()}"
|
83
|
+
)
|
84
|
+
|
85
|
+
def get_embeddings(self, text, file):
|
86
|
+
r"""Extract embeddings from raw audio signal."""
|
87
|
+
try:
|
88
|
+
with torch.no_grad():
|
89
|
+
inputs = self.tokenizer(text, return_tensors="pt")
|
90
|
+
outputs = self.model(**inputs)
|
91
|
+
# mean pooling
|
92
|
+
y = torch.mean(outputs[0], dim=1)
|
93
|
+
y = y.ravel()
|
94
|
+
except RuntimeError as re:
|
95
|
+
print(str(re))
|
96
|
+
self.util.error(f"couldn't extract file: {file}")
|
97
|
+
y = None
|
98
|
+
if y is None:
|
99
|
+
return None
|
100
|
+
return y.detach().cpu().numpy()
|
101
|
+
|
102
|
+
def extract_sample(self, text):
|
103
|
+
self.init_model()
|
104
|
+
feats = self.get_embeddings(text, "no file")
|
105
|
+
return feats
|
@@ -80,7 +80,7 @@ class FeatureExtractor:
|
|
80
80
|
return MLD_set
|
81
81
|
|
82
82
|
elif feats_type.startswith(
|
83
|
-
("wav2vec2", "hubert", "wavlm", "spkrec", "whisper", "ast", "emotion2vec")
|
83
|
+
("bert", "wav2vec2", "hubert", "wavlm", "spkrec", "whisper", "ast", "emotion2vec")
|
84
84
|
):
|
85
85
|
return self._get_feat_extractor_by_prefix(feats_type)
|
86
86
|
|
@@ -198,7 +198,9 @@ class Reporter:
|
|
198
198
|
)
|
199
199
|
|
200
200
|
def plot_proba_conf(self):
|
201
|
-
uncertainty_threshold = self.util.config_val(
|
201
|
+
uncertainty_threshold = self.util.config_val(
|
202
|
+
"PLOT", "uncertainty_threshold", False
|
203
|
+
)
|
202
204
|
if uncertainty_threshold:
|
203
205
|
uncertainty_threshold = float(uncertainty_threshold)
|
204
206
|
old_size = self.probas.shape[0]
|
@@ -210,9 +212,13 @@ class Reporter:
|
|
210
212
|
)
|
211
213
|
truths = df["truth"].values
|
212
214
|
preds = df["predicted"].values
|
213
|
-
self._plot_confmat(
|
214
|
-
|
215
|
-
|
215
|
+
self._plot_confmat(
|
216
|
+
truths,
|
217
|
+
preds,
|
218
|
+
f"uncertainty_less_than_{uncertainty_threshold}_cnf",
|
219
|
+
epoch=None,
|
220
|
+
test_result=None,
|
221
|
+
)
|
216
222
|
|
217
223
|
def set_id(self, run, epoch):
|
218
224
|
"""Make the report identifiable with run and epoch index."""
|
@@ -434,7 +440,10 @@ class Reporter:
|
|
434
440
|
self.util.debug(f"####->{file_name}<-####")
|
435
441
|
file_name = f"{res_dir}{file_name}{self.filenameadd}.txt"
|
436
442
|
if self.util.exp_is_classification():
|
437
|
-
|
443
|
+
if glob_conf.label_encoder is not None:
|
444
|
+
labels = glob_conf.label_encoder.classes_
|
445
|
+
else:
|
446
|
+
labels = glob_conf.labels
|
438
447
|
try:
|
439
448
|
rpt = classification_report(
|
440
449
|
self.truths,
|
@@ -451,9 +460,7 @@ class Reporter:
|
|
451
460
|
target_names=s_labels,
|
452
461
|
digits=4,
|
453
462
|
)
|
454
|
-
self.util.debug(
|
455
|
-
f"\n {class_report_str}"
|
456
|
-
)
|
463
|
+
self.util.debug(f"\n {class_report_str}")
|
457
464
|
except ValueError as e:
|
458
465
|
self.util.debug(
|
459
466
|
"Reporter: caught a ValueError when trying to get"
|
@@ -125,6 +125,7 @@ nkululeko/feat_extract/feats_analyser.py
|
|
125
125
|
nkululeko/feat_extract/feats_ast.py
|
126
126
|
nkululeko/feat_extract/feats_auddim.py
|
127
127
|
nkululeko/feat_extract/feats_audmodel.py
|
128
|
+
nkululeko/feat_extract/feats_bert.py
|
128
129
|
nkululeko/feat_extract/feats_clap.py
|
129
130
|
nkululeko/feat_extract/feats_emotion2vec.py
|
130
131
|
nkululeko/feat_extract/feats_hubert.py
|
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
|
|
7
7
|
|
8
8
|
[project]
|
9
9
|
name = "nkululeko"
|
10
|
-
version = "0.
|
10
|
+
version = "0.96.1"
|
11
11
|
description = "Machine learning audio prediction experiments based on templates"
|
12
12
|
authors = [
|
13
13
|
{name = "Felix Burkhardt", email = "fxburk@gmail.com"},
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/autopredict/tests/test_whisper_transcriber.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nkululeko-0.95.9 → nkululeko-0.96.1}/nkululeko/feat_extract/transformer_feature_extractor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|