nkululeko 0.95.8__tar.gz → 0.96.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nkululeko-0.95.8 → nkululeko-0.96.0}/CHANGELOG.md +8 -0
- {nkululeko-0.95.8/nkululeko.egg-info → nkululeko-0.96.0}/PKG-INFO +1 -1
- nkululeko-0.96.0/nkululeko/autopredict/ap_translate.py +39 -0
- nkululeko-0.96.0/nkululeko/autopredict/google_translator.py +63 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/constants.py +1 -1
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/experiment.py +5 -0
- nkululeko-0.96.0/nkululeko/feat_extract/feats_bert.py +105 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feature_extractor.py +1 -1
- {nkululeko-0.95.8 → nkululeko-0.96.0/nkululeko.egg-info}/PKG-INFO +1 -1
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko.egg-info/SOURCES.txt +3 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/pyproject.toml +1 -1
- {nkululeko-0.95.8 → nkululeko-0.96.0}/LICENSE +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/README.md +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/aesdd/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/ased/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/asvp-esd/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/baved/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/cafe/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/clac/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/cmu-mosei/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/demos/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/ekorpus/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/emns/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/emofilm/convert_to_16k.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/emofilm/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/emorynlp/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/emov-db/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/emovo/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/emozionalmente/create.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/enterface/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/esd/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/gerparas/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/iemocap/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/jl/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/jtes/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/meld/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/mesd/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/mess/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/mlendsnd/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/msp-improv/process_database2.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/msp-podcast/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/oreau2/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/portuguese/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/ravdess/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/ravdess/process_database_speaker.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/savee/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/shemo/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/subesco/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/tess/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/thorsten-emotional/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/urdu/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/data/vivae/process_database.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/docs/source/conf.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/examples/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/meta/demos/demo_best_model.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/meta/demos/my_experiment.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/meta/demos/my_experiment_local.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/meta/demos/plot_faster_anim.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/aug_train.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/augment.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/augmenting/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/augmenting/augmenter.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/augmenting/randomsplicer.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/augmenting/randomsplicing.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/augmenting/resampler.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_age.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_arousal.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_dominance.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_emotion.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_gender.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_mos.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_pesq.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_sdr.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_sid.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_snr.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_stoi.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_text.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/ap_valence.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/estimate_snr.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/tests/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/tests/test_whisper_transcriber.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/whisper_transcriber.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/balance.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/cacheddataset.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/data/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/data/dataset.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/data/dataset_csv.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/demo-ft.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/demo.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/demo_feats.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/demo_predictor.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/ensemble.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/explore.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/export.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_agender.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_analyser.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_ast.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_auddim.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_audmodel.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_clap.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_emotion2vec.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_hubert.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_import.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_mld.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_mos.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_opensmile.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_oxbow.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_praat.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_praat_core.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_snr.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_spectra.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_spkrec.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_squim.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_trill.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_wav2vec2.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_wavlm.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/feats_whisper.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/featureset.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/tests/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/tests/test_feats_opensmile.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/tests/test_feats_praat_core.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/transformer_feature_extractor.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/file_checker.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/filter_data.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/fixedsegment.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/glob_conf.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/losses/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/losses/loss_ccc.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/losses/loss_softf1loss.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/modelrunner.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_bayes.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_cnn.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_gmm.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_knn.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_knn_reg.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_lin_reg.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_mlp.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_mlp_regression.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_svm.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_svr.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_tree.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_tree_reg.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_tuned.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_xgb.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/model_xgr.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/tests/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/tests/test_model_knn.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/tests/test_model_mlp.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/tests/test_model_svm.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/models/tests/test_model_xgb.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/multidb.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/nkuluflag.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/nkululeko.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/optim.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/optimizationrunner.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/plots.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/predict.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/reporting/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/reporting/defines.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/reporting/latex_writer.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/reporting/report.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/reporting/report_item.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/reporting/reporter.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/reporting/result.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/resample.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/runmanager.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/scaler.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/segment.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/segmenting/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/segmenting/seg_pyannote.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/segmenting/seg_silero.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/syllable_nuclei.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/testing.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/testing_predictor.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/testing_pretrain.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/tests/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/tests/test_balancing.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/tests/test_optim.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/utils/__init__.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/utils/files.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/utils/stats.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/utils/unzip.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/utils/util.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko.egg-info/dependency_links.txt +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko.egg-info/entry_points.txt +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko.egg-info/requires.txt +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko.egg-info/top_level.txt +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/setup.cfg +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/setup.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/tests/test_install.py +0 -0
- {nkululeko-0.95.8 → nkululeko-0.96.0}/tests/test_modules.py +0 -0
@@ -1,6 +1,14 @@
|
|
1
1
|
Changelog
|
2
2
|
=========
|
3
3
|
|
4
|
+
Version 0.96.0 (25-07-14)
|
5
|
+
--------------------------
|
6
|
+
* added Bert feature extractor
|
7
|
+
|
8
|
+
Version 0.95.9 (25-07-14)
|
9
|
+
--------------------------
|
10
|
+
* added google translation
|
11
|
+
|
4
12
|
Version 0.95.8 (25-07-14)
|
5
13
|
--------------------------
|
6
14
|
* fix bug that bool was not detected as categorical
|
@@ -0,0 +1,39 @@
|
|
1
|
+
"""A translator for text.
|
2
|
+
|
3
|
+
Currently based on google translate.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from nkululeko.utils.util import Util
|
7
|
+
|
8
|
+
|
9
|
+
class TextTranslator:
|
10
|
+
"""Translator.
|
11
|
+
|
12
|
+
translate text with the google translate model
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, df, util=None):
|
16
|
+
self.df = df
|
17
|
+
if util is not None:
|
18
|
+
self.util = util
|
19
|
+
else:
|
20
|
+
# create a new util instance
|
21
|
+
# this is needed to access the config and other utilities
|
22
|
+
# in the autopredict module
|
23
|
+
self.util = Util("translator")
|
24
|
+
|
25
|
+
self.language = self.util.config_val("PREDICT", "target_language", "en")
|
26
|
+
from nkululeko.autopredict.google_translator import GoogleTranslator
|
27
|
+
self.translator = GoogleTranslator(
|
28
|
+
language=self.language,
|
29
|
+
util=self.util,
|
30
|
+
)
|
31
|
+
|
32
|
+
def predict(self, split_selection):
|
33
|
+
self.util.debug(f"translating text for {split_selection} samples")
|
34
|
+
df = self.translator.translate_index(
|
35
|
+
self.df
|
36
|
+
)
|
37
|
+
return_df = self.df.copy()
|
38
|
+
return_df[self.language] = df[self.language].values
|
39
|
+
return return_df
|
@@ -0,0 +1,63 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
import torch
|
5
|
+
from tqdm import tqdm
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
from googletrans import Translator
|
9
|
+
|
10
|
+
import audeer
|
11
|
+
import audiofile
|
12
|
+
|
13
|
+
from nkululeko.utils.util import Util
|
14
|
+
|
15
|
+
import httpx
|
16
|
+
|
17
|
+
class GoogleTranslator:
|
18
|
+
def __init__(self, language="en", util=None):
|
19
|
+
self.language = language
|
20
|
+
self.util = util
|
21
|
+
|
22
|
+
async def translate_text(self, text):
|
23
|
+
async with Translator() as translator:
|
24
|
+
result = translator.translate(text, dest="en")
|
25
|
+
return (await result).text
|
26
|
+
|
27
|
+
def translate_index(self, df:pd.DataFrame) -> pd.DataFrame:
|
28
|
+
"""Transcribe the audio files in the given index.
|
29
|
+
|
30
|
+
:param index: Index containing tuples of (file, start, end).
|
31
|
+
:return: DataFrame with transcriptions indexed by the original index.
|
32
|
+
:rtype: pd.DataFrame
|
33
|
+
"""
|
34
|
+
file_name = ""
|
35
|
+
seg_index = 0
|
36
|
+
translations = []
|
37
|
+
translator_cache = audeer.mkdir(
|
38
|
+
audeer.path(self.util.get_path("cache"), "translations"))
|
39
|
+
file_name = ""
|
40
|
+
for idx, row in tqdm(df.iterrows(), total=len(df)):
|
41
|
+
file = idx[0]
|
42
|
+
start = idx[1]
|
43
|
+
end = idx[2]
|
44
|
+
if file != file_name:
|
45
|
+
file_name = file
|
46
|
+
seg_index = 0
|
47
|
+
cache_name = audeer.basename_wo_ext(file)+str(seg_index)
|
48
|
+
cache_path = audeer.path(translator_cache, cache_name + ".json")
|
49
|
+
if os.path.isfile(cache_path):
|
50
|
+
translation = self.util.read_json(cache_path)["translation"]
|
51
|
+
else:
|
52
|
+
text = row['text']
|
53
|
+
translation = asyncio.run(self.translate_text(text))
|
54
|
+
self.util.save_json(cache_path,
|
55
|
+
{"translation": translation,
|
56
|
+
"file": file,
|
57
|
+
"start": start.total_seconds(),
|
58
|
+
"end": end.total_seconds()})
|
59
|
+
translations.append(translation)
|
60
|
+
seg_index += 1
|
61
|
+
|
62
|
+
df = pd.DataFrame({self.language:translations}, index=df.index)
|
63
|
+
return df
|
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.96.0"
|
2
2
|
SAMPLING_RATE = 16000
|
@@ -574,6 +574,11 @@ class Experiment:
|
|
574
574
|
|
575
575
|
predictor = TextPredictor(df, self.util)
|
576
576
|
df = predictor.predict(sample_selection)
|
577
|
+
elif target == "translation":
|
578
|
+
from nkululeko.autopredict.ap_translate import TextTranslator
|
579
|
+
|
580
|
+
predictor = TextTranslator(df, self.util)
|
581
|
+
df = predictor.predict(sample_selection)
|
577
582
|
elif target == "arousal":
|
578
583
|
from nkululeko.autopredict.ap_arousal import ArousalPredictor
|
579
584
|
|
@@ -0,0 +1,105 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
from tqdm import tqdm
|
5
|
+
import transformers
|
6
|
+
import torch
|
7
|
+
from transformers import BertTokenizer, BertModel
|
8
|
+
|
9
|
+
from nkululeko.feat_extract.featureset import Featureset
|
10
|
+
import nkululeko.glob_conf as glob_conf
|
11
|
+
|
12
|
+
|
13
|
+
class Bert(Featureset):
|
14
|
+
"""Class to extract bert embeddings"""
|
15
|
+
|
16
|
+
def __init__(self, name, data_df, feat_type):
|
17
|
+
"""Constructor.
|
18
|
+
|
19
|
+
If_train is needed to distinguish from test/dev sets,
|
20
|
+
because they use the codebook from the training
|
21
|
+
"""
|
22
|
+
super().__init__(name, data_df, feat_type)
|
23
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
24
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
25
|
+
self.model_initialized = False
|
26
|
+
if feat_type == "bert":
|
27
|
+
self.feat_type = "bert-base-uncased"
|
28
|
+
else:
|
29
|
+
self.feat_type = feat_type
|
30
|
+
|
31
|
+
def init_model(self):
|
32
|
+
# load model
|
33
|
+
self.util.debug(f"loading {self.feat_type} model...")
|
34
|
+
model_path = self.util.config_val(
|
35
|
+
"FEATS", "bert.model", f"google-bert/{self.feat_type}"
|
36
|
+
)
|
37
|
+
config = transformers.AutoConfig.from_pretrained(model_path)
|
38
|
+
layer_num = config.num_hidden_layers
|
39
|
+
hidden_layer = int(self.util.config_val("FEATS", "bert.layer", "0"))
|
40
|
+
config.num_hidden_layers = layer_num - hidden_layer
|
41
|
+
self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
|
42
|
+
|
43
|
+
self.tokenizer = BertTokenizer.from_pretrained(model_path)
|
44
|
+
self.model = BertModel.from_pretrained(model_path, config=config).to(
|
45
|
+
self.device
|
46
|
+
)
|
47
|
+
print(f"initialized {self.feat_type} model on {self.device}")
|
48
|
+
self.model.eval()
|
49
|
+
self.model_initialized = True
|
50
|
+
|
51
|
+
def extract(self):
|
52
|
+
"""Extract the features or load them from disk if present."""
|
53
|
+
store = self.util.get_path("store")
|
54
|
+
storage = os.path.join(store, f"{self.name}.pkl")
|
55
|
+
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
|
56
|
+
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
57
|
+
if extract or no_reuse or not os.path.isfile(storage):
|
58
|
+
if not self.model_initialized:
|
59
|
+
self.init_model()
|
60
|
+
self.util.debug(
|
61
|
+
f"extracting {self.feat_type} embeddings, this might take a while..."
|
62
|
+
)
|
63
|
+
emb_series = pd.Series(index=self.data_df.index, dtype=object)
|
64
|
+
for idx, row in tqdm(self.data_df.iterrows(), total=len(self.data_df)):
|
65
|
+
file = idx[0]
|
66
|
+
text = row['text']
|
67
|
+
emb = self.get_embeddings(text, file)
|
68
|
+
emb_series[idx] = emb
|
69
|
+
# print(f"emb_series shape: {emb_series.shape}")
|
70
|
+
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
|
71
|
+
# print(f"df shape: {self.df.shape}")
|
72
|
+
self.df.to_pickle(storage)
|
73
|
+
try:
|
74
|
+
glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
|
75
|
+
except KeyError:
|
76
|
+
pass
|
77
|
+
else:
|
78
|
+
self.util.debug(f"reusing extracted {self.feat_type} embeddings")
|
79
|
+
self.df = pd.read_pickle(storage)
|
80
|
+
if self.df.isnull().values.any():
|
81
|
+
self.util.error(
|
82
|
+
f"got nan: {self.df.shape} {self.df.isnull().sum().sum()}"
|
83
|
+
)
|
84
|
+
|
85
|
+
def get_embeddings(self, text, file):
|
86
|
+
r"""Extract embeddings from raw audio signal."""
|
87
|
+
try:
|
88
|
+
with torch.no_grad():
|
89
|
+
inputs = self.tokenizer(text, return_tensors="pt")
|
90
|
+
outputs = self.model(**inputs)
|
91
|
+
# mean pooling
|
92
|
+
y = torch.mean(outputs[0], dim=1)
|
93
|
+
y = y.ravel()
|
94
|
+
except RuntimeError as re:
|
95
|
+
print(str(re))
|
96
|
+
self.util.error(f"couldn't extract file: {file}")
|
97
|
+
y = None
|
98
|
+
if y is None:
|
99
|
+
return None
|
100
|
+
return y.detach().cpu().numpy()
|
101
|
+
|
102
|
+
def extract_sample(self, text):
|
103
|
+
self.init_model()
|
104
|
+
feats = self.get_embeddings(text, "no file")
|
105
|
+
return feats
|
@@ -80,7 +80,7 @@ class FeatureExtractor:
|
|
80
80
|
return MLD_set
|
81
81
|
|
82
82
|
elif feats_type.startswith(
|
83
|
-
("wav2vec2", "hubert", "wavlm", "spkrec", "whisper", "ast", "emotion2vec")
|
83
|
+
("bert", "wav2vec2", "hubert", "wavlm", "spkrec", "whisper", "ast", "emotion2vec")
|
84
84
|
):
|
85
85
|
return self._get_feat_extractor_by_prefix(feats_type)
|
86
86
|
|
@@ -108,8 +108,10 @@ nkululeko/autopredict/ap_sid.py
|
|
108
108
|
nkululeko/autopredict/ap_snr.py
|
109
109
|
nkululeko/autopredict/ap_stoi.py
|
110
110
|
nkululeko/autopredict/ap_text.py
|
111
|
+
nkululeko/autopredict/ap_translate.py
|
111
112
|
nkululeko/autopredict/ap_valence.py
|
112
113
|
nkululeko/autopredict/estimate_snr.py
|
114
|
+
nkululeko/autopredict/google_translator.py
|
113
115
|
nkululeko/autopredict/whisper_transcriber.py
|
114
116
|
nkululeko/autopredict/tests/__init__.py
|
115
117
|
nkululeko/autopredict/tests/test_whisper_transcriber.py
|
@@ -123,6 +125,7 @@ nkululeko/feat_extract/feats_analyser.py
|
|
123
125
|
nkululeko/feat_extract/feats_ast.py
|
124
126
|
nkululeko/feat_extract/feats_auddim.py
|
125
127
|
nkululeko/feat_extract/feats_audmodel.py
|
128
|
+
nkululeko/feat_extract/feats_bert.py
|
126
129
|
nkululeko/feat_extract/feats_clap.py
|
127
130
|
nkululeko/feat_extract/feats_emotion2vec.py
|
128
131
|
nkululeko/feat_extract/feats_hubert.py
|
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
|
|
7
7
|
|
8
8
|
[project]
|
9
9
|
name = "nkululeko"
|
10
|
-
version = "0.
|
10
|
+
version = "0.96.0"
|
11
11
|
description = "Machine learning audio prediction experiments based on templates"
|
12
12
|
authors = [
|
13
13
|
{name = "Felix Burkhardt", email = "fxburk@gmail.com"},
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/autopredict/tests/test_whisper_transcriber.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{nkululeko-0.95.8 → nkululeko-0.96.0}/nkululeko/feat_extract/transformer_feature_extractor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|