nkululeko 0.95.9__tar.gz → 0.96.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nkululeko-0.95.9 → nkululeko-0.96.0}/CHANGELOG.md +4 -0
- {nkululeko-0.95.9/nkululeko.egg-info → nkululeko-0.96.0}/PKG-INFO +1 -1
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/constants.py +1 -1
- nkululeko-0.96.0/nkululeko/feat_extract/feats_bert.py +105 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feature_extractor.py +1 -1
- {nkululeko-0.95.9 → nkululeko-0.96.0/nkululeko.egg-info}/PKG-INFO +1 -1
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko.egg-info/SOURCES.txt +1 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/pyproject.toml +1 -1
- {nkululeko-0.95.9 → nkululeko-0.96.0}/LICENSE +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/README.md +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/aesdd/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/ased/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/asvp-esd/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/baved/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/cafe/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/clac/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/cmu-mosei/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/demos/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/ekorpus/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/emns/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/emofilm/convert_to_16k.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/emofilm/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/emorynlp/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/emov-db/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/emovo/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/emozionalmente/create.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/enterface/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/esd/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/gerparas/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/iemocap/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/jl/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/jtes/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/meld/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/mesd/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/mess/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/mlendsnd/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/msp-improv/process_database2.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/msp-podcast/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/oreau2/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/portuguese/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/ravdess/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/ravdess/process_database_speaker.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/savee/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/shemo/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/subesco/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/tess/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/thorsten-emotional/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/urdu/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/data/vivae/process_database.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/docs/source/conf.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/examples/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/meta/demos/demo_best_model.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/meta/demos/my_experiment.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/meta/demos/my_experiment_local.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/meta/demos/plot_faster_anim.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/aug_train.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/augment.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/augmenting/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/augmenting/augmenter.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/augmenting/randomsplicer.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/augmenting/randomsplicing.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/augmenting/resampler.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_age.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_arousal.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_dominance.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_emotion.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_gender.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_mos.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_pesq.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_sdr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_sid.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_snr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_stoi.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_text.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_translate.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/ap_valence.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/estimate_snr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/google_translator.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/tests/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/tests/test_whisper_transcriber.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/whisper_transcriber.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/balance.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/cacheddataset.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/data/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/data/dataset.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/data/dataset_csv.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/demo-ft.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/demo.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/demo_feats.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/demo_predictor.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/ensemble.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/experiment.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/explore.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/export.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_agender.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_analyser.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_ast.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_auddim.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_audmodel.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_clap.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_emotion2vec.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_hubert.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_import.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_mld.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_mos.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_opensmile.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_oxbow.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_praat.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_praat_core.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_snr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_spectra.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_spkrec.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_squim.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_trill.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_wavlm.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_whisper.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/featureset.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/tests/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/tests/test_feats_opensmile.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/tests/test_feats_praat_core.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/transformer_feature_extractor.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/file_checker.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/filter_data.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/fixedsegment.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/glob_conf.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/losses/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/losses/loss_ccc.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/losses/loss_softf1loss.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/modelrunner.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_bayes.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_cnn.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_gmm.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_knn.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_knn_reg.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_lin_reg.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_mlp.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_mlp_regression.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_svm.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_svr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_tree.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_tree_reg.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_tuned.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_xgb.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/model_xgr.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/tests/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/tests/test_model_knn.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/tests/test_model_mlp.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/tests/test_model_svm.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/models/tests/test_model_xgb.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/multidb.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/nkuluflag.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/nkululeko.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/optim.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/optimizationrunner.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/plots.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/predict.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/reporting/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/reporting/defines.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/reporting/latex_writer.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/reporting/report.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/reporting/report_item.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/reporting/reporter.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/reporting/result.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/resample.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/runmanager.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/scaler.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/segment.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/segmenting/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/segmenting/seg_pyannote.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/segmenting/seg_silero.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/syllable_nuclei.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/testing.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/testing_predictor.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/testing_pretrain.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/tests/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/tests/test_balancing.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/tests/test_optim.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/utils/__init__.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/utils/files.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/utils/stats.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/utils/unzip.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/utils/util.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko.egg-info/dependency_links.txt +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko.egg-info/entry_points.txt +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko.egg-info/requires.txt +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko.egg-info/top_level.txt +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/setup.cfg +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/setup.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/tests/test_install.py +0 -0
- {nkululeko-0.95.9 → nkululeko-0.96.0}/tests/test_modules.py +0 -0
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.96.0"
|
2
2
|
SAMPLING_RATE = 16000
|
@@ -0,0 +1,105 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
from tqdm import tqdm
|
5
|
+
import transformers
|
6
|
+
import torch
|
7
|
+
from transformers import BertTokenizer, BertModel
|
8
|
+
|
9
|
+
from nkululeko.feat_extract.featureset import Featureset
|
10
|
+
import nkululeko.glob_conf as glob_conf
|
11
|
+
|
12
|
+
|
13
|
+
class Bert(Featureset):
|
14
|
+
"""Class to extract bert embeddings"""
|
15
|
+
|
16
|
+
def __init__(self, name, data_df, feat_type):
|
17
|
+
"""Constructor.
|
18
|
+
|
19
|
+
If_train is needed to distinguish from test/dev sets,
|
20
|
+
because they use the codebook from the training
|
21
|
+
"""
|
22
|
+
super().__init__(name, data_df, feat_type)
|
23
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
24
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
25
|
+
self.model_initialized = False
|
26
|
+
if feat_type == "bert":
|
27
|
+
self.feat_type = "bert-base-uncased"
|
28
|
+
else:
|
29
|
+
self.feat_type = feat_type
|
30
|
+
|
31
|
+
def init_model(self):
|
32
|
+
# load model
|
33
|
+
self.util.debug(f"loading {self.feat_type} model...")
|
34
|
+
model_path = self.util.config_val(
|
35
|
+
"FEATS", "bert.model", f"google-bert/{self.feat_type}"
|
36
|
+
)
|
37
|
+
config = transformers.AutoConfig.from_pretrained(model_path)
|
38
|
+
layer_num = config.num_hidden_layers
|
39
|
+
hidden_layer = int(self.util.config_val("FEATS", "bert.layer", "0"))
|
40
|
+
config.num_hidden_layers = layer_num - hidden_layer
|
41
|
+
self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
|
42
|
+
|
43
|
+
self.tokenizer = BertTokenizer.from_pretrained(model_path)
|
44
|
+
self.model = BertModel.from_pretrained(model_path, config=config).to(
|
45
|
+
self.device
|
46
|
+
)
|
47
|
+
print(f"initialized {self.feat_type} model on {self.device}")
|
48
|
+
self.model.eval()
|
49
|
+
self.model_initialized = True
|
50
|
+
|
51
|
+
def extract(self):
|
52
|
+
"""Extract the features or load them from disk if present."""
|
53
|
+
store = self.util.get_path("store")
|
54
|
+
storage = os.path.join(store, f"{self.name}.pkl")
|
55
|
+
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
|
56
|
+
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
57
|
+
if extract or no_reuse or not os.path.isfile(storage):
|
58
|
+
if not self.model_initialized:
|
59
|
+
self.init_model()
|
60
|
+
self.util.debug(
|
61
|
+
f"extracting {self.feat_type} embeddings, this might take a while..."
|
62
|
+
)
|
63
|
+
emb_series = pd.Series(index=self.data_df.index, dtype=object)
|
64
|
+
for idx, row in tqdm(self.data_df.iterrows(), total=len(self.data_df)):
|
65
|
+
file = idx[0]
|
66
|
+
text = row['text']
|
67
|
+
emb = self.get_embeddings(text, file)
|
68
|
+
emb_series[idx] = emb
|
69
|
+
# print(f"emb_series shape: {emb_series.shape}")
|
70
|
+
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
|
71
|
+
# print(f"df shape: {self.df.shape}")
|
72
|
+
self.df.to_pickle(storage)
|
73
|
+
try:
|
74
|
+
glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
|
75
|
+
except KeyError:
|
76
|
+
pass
|
77
|
+
else:
|
78
|
+
self.util.debug(f"reusing extracted {self.feat_type} embeddings")
|
79
|
+
self.df = pd.read_pickle(storage)
|
80
|
+
if self.df.isnull().values.any():
|
81
|
+
self.util.error(
|
82
|
+
f"got nan: {self.df.shape} {self.df.isnull().sum().sum()}"
|
83
|
+
)
|
84
|
+
|
85
|
+
def get_embeddings(self, text, file):
|
86
|
+
r"""Extract embeddings from raw audio signal."""
|
87
|
+
try:
|
88
|
+
with torch.no_grad():
|
89
|
+
inputs = self.tokenizer(text, return_tensors="pt")
|
90
|
+
outputs = self.model(**inputs)
|
91
|
+
# mean pooling
|
92
|
+
y = torch.mean(outputs[0], dim=1)
|
93
|
+
y = y.ravel()
|
94
|
+
except RuntimeError as re:
|
95
|
+
print(str(re))
|
96
|
+
self.util.error(f"couldn't extract file: {file}")
|
97
|
+
y = None
|
98
|
+
if y is None:
|
99
|
+
return None
|
100
|
+
return y.detach().cpu().numpy()
|
101
|
+
|
102
|
+
def extract_sample(self, text):
|
103
|
+
self.init_model()
|
104
|
+
feats = self.get_embeddings(text, "no file")
|
105
|
+
return feats
|
@@ -80,7 +80,7 @@ class FeatureExtractor:
|
|
80
80
|
return MLD_set
|
81
81
|
|
82
82
|
elif feats_type.startswith(
|
83
|
-
("wav2vec2", "hubert", "wavlm", "spkrec", "whisper", "ast", "emotion2vec")
|
83
|
+
("bert", "wav2vec2", "hubert", "wavlm", "spkrec", "whisper", "ast", "emotion2vec")
|
84
84
|
):
|
85
85
|
return self._get_feat_extractor_by_prefix(feats_type)
|
86
86
|
|
@@ -125,6 +125,7 @@ nkululeko/feat_extract/feats_analyser.py
|
|
125
125
|
nkululeko/feat_extract/feats_ast.py
|
126
126
|
nkululeko/feat_extract/feats_auddim.py
|
127
127
|
nkululeko/feat_extract/feats_audmodel.py
|
128
|
+
nkululeko/feat_extract/feats_bert.py
|
128
129
|
nkululeko/feat_extract/feats_clap.py
|
129
130
|
nkululeko/feat_extract/feats_emotion2vec.py
|
130
131
|
nkululeko/feat_extract/feats_hubert.py
|
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
|
|
7
7
|
|
8
8
|
[project]
|
9
9
|
name = "nkululeko"
|
10
|
-
version = "0.
|
10
|
+
version = "0.96.0"
|
11
11
|
description = "Machine learning audio prediction experiments based on templates"
|
12
12
|
authors = [
|
13
13
|
{name = "Felix Burkhardt", email = "fxburk@gmail.com"},
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/autopredict/tests/test_whisper_transcriber.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nkululeko-0.95.9 → nkululeko-0.96.0}/nkululeko/feat_extract/transformer_feature_extractor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|