nkululeko 0.93.12__tar.gz → 0.93.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nkululeko-0.93.12 → nkululeko-0.93.13}/CHANGELOG.md +4 -0
- nkululeko-0.93.13/PKG-INFO +39 -0
- nkululeko-0.93.13/data/aesdd/process_database.py +50 -0
- nkululeko-0.93.13/data/ased/process_database.py +58 -0
- nkululeko-0.93.13/data/asvp-esd/process_database.py +79 -0
- nkululeko-0.93.13/data/baved/process_database.py +63 -0
- nkululeko-0.93.13/data/cafe/process_database.py +50 -0
- nkululeko-0.93.13/data/clac/process_database.py +47 -0
- nkululeko-0.93.13/data/cmu-mosei/process_database.py +53 -0
- nkululeko-0.93.13/data/demos/process_database.py +67 -0
- nkululeko-0.93.13/data/ekorpus/process_database.py +54 -0
- nkululeko-0.93.13/data/emns/process_database.py +66 -0
- nkululeko-0.93.13/data/emofilm/convert_to_16k.py +38 -0
- nkululeko-0.93.13/data/emofilm/process_database.py +51 -0
- nkululeko-0.93.13/data/emorynlp/process_database.py +59 -0
- nkululeko-0.93.13/data/emov-db/process_database.py +66 -0
- nkululeko-0.93.13/data/emovo/process_database.py +57 -0
- nkululeko-0.93.13/data/emozionalmente/create.py +236 -0
- nkululeko-0.93.13/data/enterface/process_database.py +80 -0
- nkululeko-0.93.13/data/esd/process_database.py +51 -0
- nkululeko-0.93.13/data/gerparas/process_database.py +70 -0
- nkululeko-0.93.13/data/iemocap/process_database.py +83 -0
- nkululeko-0.93.13/data/jl/process_database.py +69 -0
- nkululeko-0.93.13/data/jtes/process_database.py +57 -0
- nkululeko-0.93.13/data/meld/process_database.py +112 -0
- nkululeko-0.93.13/data/mesd/process_database.py +59 -0
- nkululeko-0.93.13/data/mess/process_database.py +54 -0
- nkululeko-0.93.13/data/mlendsnd/process_database.py +59 -0
- nkululeko-0.93.13/data/msp-improv/process_database2.py +60 -0
- nkululeko-0.93.13/data/msp-podcast/process_database.py +89 -0
- nkululeko-0.93.13/data/oreau2/process_database.py +60 -0
- nkululeko-0.93.13/data/portuguese/process_database.py +122 -0
- nkululeko-0.93.13/data/ravdess/process_database.py +102 -0
- nkululeko-0.93.13/data/ravdess/process_database_speaker.py +88 -0
- nkululeko-0.93.13/data/savee/process_database.py +53 -0
- nkululeko-0.93.13/data/shemo/process_database.py +58 -0
- nkululeko-0.93.13/data/subesco/process_database.py +78 -0
- nkululeko-0.93.13/data/tess/process_database.py +52 -0
- nkululeko-0.93.13/data/thorsten-emotional/process_database.py +42 -0
- nkululeko-0.93.13/data/urdu/process_database.py +64 -0
- nkululeko-0.93.13/data/vivae/process_database.py +38 -0
- nkululeko-0.93.13/docs/source/conf.py +96 -0
- nkululeko-0.93.13/meta/demos/demo_best_model.py +32 -0
- nkululeko-0.93.13/meta/demos/my_experiment.py +52 -0
- nkululeko-0.93.13/meta/demos/my_experiment_local.py +48 -0
- nkululeko-0.93.13/meta/demos/plot_faster_anim.py +35 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/constants.py +1 -1
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/data/dataset.py +29 -58
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_analyser.py +4 -2
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_wav2vec2.py +1 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/plots.py +4 -1
- nkululeko-0.93.13/nkululeko.egg-info/PKG-INFO +39 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko.egg-info/SOURCES.txt +44 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/setup.cfg +0 -2
- nkululeko-0.93.12/PKG-INFO +0 -1491
- nkululeko-0.93.12/nkululeko.egg-info/PKG-INFO +0 -1491
- {nkululeko-0.93.12 → nkululeko-0.93.13}/LICENSE +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/README.md +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/__init__.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/aug_train.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/augment.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/augmenting/__init__.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/augmenting/augmenter.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/augmenting/randomsplicer.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/augmenting/randomsplicing.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/augmenting/resampler.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/__init__.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_age.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_arousal.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_dominance.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_gender.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_mos.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_pesq.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_sdr.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_sid.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_snr.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_stoi.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/ap_valence.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/autopredict/estimate_snr.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/cacheddataset.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/data/__init__.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/data/dataset_csv.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/demo-ft.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/demo.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/demo_feats.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/demo_predictor.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/ensemble.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/experiment.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/explore.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/export.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/__init__.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_agender.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_agender_agender.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_ast.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_auddim.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_audmodel.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_clap.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_hubert.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_import.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_mld.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_mos.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_opensmile.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_oxbow.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_praat.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_snr.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_spectra.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_spkrec.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_squim.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_trill.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_wavlm.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feats_whisper.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/featureset.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/feinberg_praat.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feat_extract/transformer_feature_extractor.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/feature_extractor.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/file_checker.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/filter_data.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/fixedsegment.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/glob_conf.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/losses/__init__.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/losses/loss_ccc.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/losses/loss_softf1loss.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/modelrunner.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/__init__.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_bayes.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_cnn.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_gmm.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_knn.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_knn_reg.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_lin_reg.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_mlp.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_mlp_regression.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_svm.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_svr.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_tree.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_tree_reg.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_tuned.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_xgb.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/models/model_xgr.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/multidb.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/nkuluflag.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/nkululeko.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/predict.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/reporting/__init__.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/reporting/defines.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/reporting/latex_writer.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/reporting/report.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/reporting/report_item.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/reporting/reporter.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/reporting/result.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/resample.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/runmanager.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/scaler.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/segment.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/segmenting/__init__.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/segmenting/seg_inaspeechsegmenter.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/segmenting/seg_pyannote.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/segmenting/seg_silero.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/syllable_nuclei.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/test.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/test_predictor.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/test_pretrain.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/utils/__init__.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/utils/files.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/utils/stats.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko/utils/util.py +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko.egg-info/dependency_links.txt +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko.egg-info/entry_points.txt +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko.egg-info/requires.txt +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/nkululeko.egg-info/top_level.txt +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/pyproject.toml +0 -0
- {nkululeko-0.93.12 → nkululeko-0.93.13}/setup.py +0 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: nkululeko
|
3
|
+
Version: 0.93.13
|
4
|
+
Summary: Machine learning audio prediction experiments based on templates
|
5
|
+
Home-page: https://github.com/felixbur/nkululeko
|
6
|
+
Author: Felix Burkhardt
|
7
|
+
Author-email: fxburk@gmail.com
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
10
|
+
Classifier: Operating System :: OS Independent
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
12
|
+
Classifier: Topic :: Scientific/Engineering
|
13
|
+
Requires-Python: >=3.9
|
14
|
+
License-File: LICENSE
|
15
|
+
Requires-Dist: audeer
|
16
|
+
Requires-Dist: audformat
|
17
|
+
Requires-Dist: audinterface
|
18
|
+
Requires-Dist: audiofile
|
19
|
+
Requires-Dist: audiomentations
|
20
|
+
Requires-Dist: audmetric
|
21
|
+
Requires-Dist: audonnx
|
22
|
+
Requires-Dist: confidence_intervals
|
23
|
+
Requires-Dist: datasets
|
24
|
+
Requires-Dist: imageio
|
25
|
+
Requires-Dist: matplotlib
|
26
|
+
Requires-Dist: numpy
|
27
|
+
Requires-Dist: opensmile
|
28
|
+
Requires-Dist: pandas
|
29
|
+
Requires-Dist: praat-parselmouth
|
30
|
+
Requires-Dist: scikit_learn
|
31
|
+
Requires-Dist: scipy
|
32
|
+
Requires-Dist: seaborn
|
33
|
+
Requires-Dist: sounddevice
|
34
|
+
Requires-Dist: torch
|
35
|
+
Requires-Dist: torchvision
|
36
|
+
Requires-Dist: transformers
|
37
|
+
Requires-Dist: umap-learn
|
38
|
+
Requires-Dist: xgboost
|
39
|
+
Requires-Dist: pylatex
|
@@ -0,0 +1,50 @@
|
|
1
|
+
import argparse
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
emotion_map = {
|
7
|
+
"a": "anger",
|
8
|
+
"d": "disgust",
|
9
|
+
"h": "happiness",
|
10
|
+
"f": "fear",
|
11
|
+
"s": "sadness",
|
12
|
+
}
|
13
|
+
|
14
|
+
|
15
|
+
def main():
|
16
|
+
parser = argparse.ArgumentParser()
|
17
|
+
parser.add_argument("--data_dir", type=str, default="AESDD", help="Path of AESDD directory")
|
18
|
+
parser.add_argument("--out_dir", type=str, default=".")
|
19
|
+
args = parser.parse_args()
|
20
|
+
|
21
|
+
data_dir = Path(args.data_dir)
|
22
|
+
out_dir = Path(args.out_dir)
|
23
|
+
|
24
|
+
paths = list(data_dir.glob("**/*.wav"))
|
25
|
+
files = [file for file in paths if file.stem != "s05 (3)"]
|
26
|
+
names = [file.stem for file in files]
|
27
|
+
emotion = [emotion_map[file.stem[0]] for file in files]
|
28
|
+
speaker = [str(int(x[x.find("(") + 1: x.find(")")])) for x in names]
|
29
|
+
gender = [["female", "male"][int(v) % 2] for v in speaker]
|
30
|
+
language =["greek" for file in files]
|
31
|
+
|
32
|
+
# convert to df
|
33
|
+
df = pd.DataFrame({"file": files, "speaker": speaker, "emotion": emotion, "gender": gender})
|
34
|
+
|
35
|
+
# print distribution per emotion
|
36
|
+
# print(df.groupby("emotion").count()['file'])
|
37
|
+
|
38
|
+
# allocate speaker 5 for test set
|
39
|
+
train_df = df[df["speaker"] != "5"]
|
40
|
+
test_df = df.drop(train_df.index)
|
41
|
+
|
42
|
+
# save to CSV
|
43
|
+
df.to_csv(out_dir / "aesdd.csv", index=False)
|
44
|
+
train_df.to_csv(out_dir / "aesdd_train.csv", index=False)
|
45
|
+
test_df.to_csv(out_dir / "aesdd_test.csv", index=False)
|
46
|
+
|
47
|
+
print(f"Total: {len(df)}, Train: {len(train_df)}, Test: {len(test_df)}")
|
48
|
+
|
49
|
+
if __name__ == "__main__":
|
50
|
+
main()
|
@@ -0,0 +1,58 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# @filename: process_database.py
|
5
|
+
# @description: Process the ASED database
|
6
|
+
|
7
|
+
|
8
|
+
import argparse
|
9
|
+
import shutil
|
10
|
+
from importlib.resources import path
|
11
|
+
from pathlib import Path
|
12
|
+
|
13
|
+
import pandas as pd
|
14
|
+
from joblib import delayed
|
15
|
+
|
16
|
+
emotion_map = {
|
17
|
+
"a": "anger",
|
18
|
+
"h": "happiness",
|
19
|
+
"n": "neutral",
|
20
|
+
"f": "fear",
|
21
|
+
"s": "sadness",
|
22
|
+
}
|
23
|
+
|
24
|
+
|
25
|
+
def main():
|
26
|
+
parser = argparse.ArgumentParser(
|
27
|
+
usage="python3 process_database.py database output"
|
28
|
+
)
|
29
|
+
parser.add_argument("--data_dir", type=str, default="ASED_V1", help="Path to the ASED database")
|
30
|
+
parser.add_argument("--out_dir", type=str, default=".", help="Path to the output directory")
|
31
|
+
args = parser.parse_args()
|
32
|
+
|
33
|
+
data_dir = Path(args.data_dir)
|
34
|
+
out_dir = Path(args.out_dir)
|
35
|
+
|
36
|
+
paths = list(data_dir.glob("**/*.wav"))
|
37
|
+
emotions = [emotion_map[path.stem[0].lower()] for path in paths]
|
38
|
+
genders = [["female", "male"][int(p.stem[9:11]) - 1] for p in paths]
|
39
|
+
spekaers = [p.stem[-2:] for p in paths]
|
40
|
+
languages = ["amharic" for p in paths]
|
41
|
+
|
42
|
+
# convert to df
|
43
|
+
df = pd.DataFrame({"file": paths, "emotion": emotions, "gender": genders, "speaker": spekaers, "language": languages})
|
44
|
+
|
45
|
+
# allocate speakers >= 55 for test
|
46
|
+
df_test = df[df["speaker"] > "55"]
|
47
|
+
df_train = df.drop(df_test.index)
|
48
|
+
|
49
|
+
|
50
|
+
# save to csv
|
51
|
+
df_train.to_csv(out_dir / "ased_train.csv", index=False)
|
52
|
+
df_test.to_csv(out_dir / "ased_test.csv", index=False)
|
53
|
+
df.to_csv(out_dir / "ased.csv", index=False)
|
54
|
+
|
55
|
+
|
56
|
+
if __name__ == "__main__":
|
57
|
+
main()
|
58
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# process_database --> ASVP-ESD
|
2
|
+
|
3
|
+
import argparse
|
4
|
+
import os
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
from sklearn.model_selection import train_test_split
|
9
|
+
|
10
|
+
emotion_map = {
|
11
|
+
'01': 'boredom',
|
12
|
+
'02': 'neutral',
|
13
|
+
'03': 'happy',
|
14
|
+
'04': 'sad',
|
15
|
+
'05': 'anger',
|
16
|
+
'06': 'fear',
|
17
|
+
'07': 'disgust',
|
18
|
+
'08': 'surprise',
|
19
|
+
'09': 'excited',
|
20
|
+
'10': 'pleasure',
|
21
|
+
'11': 'pain',
|
22
|
+
'12': 'disapointed',
|
23
|
+
'13': 'others',
|
24
|
+
}
|
25
|
+
|
26
|
+
|
27
|
+
vocal_map = {
|
28
|
+
"01" : "speech",
|
29
|
+
"02" : "non_speech",
|
30
|
+
}
|
31
|
+
|
32
|
+
language_map = {
|
33
|
+
"00" : "chinese",
|
34
|
+
"01" : "english",
|
35
|
+
"02" : "french",
|
36
|
+
"03" : "russian",
|
37
|
+
"04" : "other1",
|
38
|
+
"05" : "other2",
|
39
|
+
}
|
40
|
+
|
41
|
+
def main():
|
42
|
+
parser = argparse.ArgumentParser()
|
43
|
+
parser.add_argument('--data_dir', type=Path,
|
44
|
+
default=Path('./ASVP-ESD-Update'))
|
45
|
+
parser.add_argument('--output_dir', type=Path, default=Path('./'))
|
46
|
+
args = parser.parse_args()
|
47
|
+
|
48
|
+
# Read the database
|
49
|
+
input_dir = args.data_dir
|
50
|
+
output_dir = args.output_dir
|
51
|
+
|
52
|
+
# list of all the WAV files in the database
|
53
|
+
wav_list = list(input_dir.glob('Audio/**/*.wav'))
|
54
|
+
|
55
|
+
# file = [p for p in wav_list if os.path.getsize(p) >= 40000]
|
56
|
+
file = [p for p in wav_list]
|
57
|
+
|
58
|
+
# Vocal channel (01 = speech, 02 = non speech).
|
59
|
+
vocal = [vocal_map[p.stem.split('-')[1]] for p in file]
|
60
|
+
|
61
|
+
# emotion
|
62
|
+
emotion = [emotion_map[p.stem.split('-')[2]] for p in file]
|
63
|
+
|
64
|
+
# language --> not all data have language
|
65
|
+
# language = [language_map[p.stem.split('-')[8]] for p in wav_list]
|
66
|
+
|
67
|
+
# save to pandas dataframe
|
68
|
+
df = pd.DataFrame(data={'file': file, 'vocal': vocal, 'emotion': emotion})
|
69
|
+
|
70
|
+
# split to train, test sets using scikit-learn
|
71
|
+
|
72
|
+
train, test = train_test_split(df, test_size=0.2, random_state=42)
|
73
|
+
|
74
|
+
# save to csv
|
75
|
+
train.to_csv('asvp_train.csv', index=False)
|
76
|
+
test.to_csv('asvp_test.csv', index=False)
|
77
|
+
|
78
|
+
if __name__ == '__main__':
|
79
|
+
main()
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# process_database --> BAVED
|
2
|
+
# modified from https://github.com/Strong-AI-Lab/emotion/blob/master/datasets/BAVED/process.py
|
3
|
+
|
4
|
+
import argparse
|
5
|
+
from html import parser
|
6
|
+
from pathlib import Path
|
7
|
+
|
8
|
+
import pandas as pd
|
9
|
+
|
10
|
+
emotion_level = ["low", "normal", "high"]
|
11
|
+
word_map = [
|
12
|
+
"اعجبني",
|
13
|
+
"لم يعجبني",
|
14
|
+
"هذا",
|
15
|
+
"الفيلم",
|
16
|
+
"رائع",
|
17
|
+
"مقول",
|
18
|
+
"سيئ",
|
19
|
+
]
|
20
|
+
|
21
|
+
gender_map = {
|
22
|
+
"M": "male",
|
23
|
+
"F": "female"
|
24
|
+
}
|
25
|
+
|
26
|
+
|
27
|
+
def main():
|
28
|
+
parser = argparse.ArgumentParser()
|
29
|
+
parser.add_argument('--data_dir', type=str, default='BAVED')
|
30
|
+
parser.add_argument('--output_dir', type=str, default='.')
|
31
|
+
args = parser.parse_args()
|
32
|
+
|
33
|
+
data_dir = Path(args.data_dir)
|
34
|
+
output_dir = Path(args.output_dir)
|
35
|
+
|
36
|
+
# read data
|
37
|
+
paths = list(data_dir.glob('?/*.wav'))
|
38
|
+
emo_levels = [emotion_level[int(p.stem.split('-')[4])] for p in paths]
|
39
|
+
speakers = [p.stem.split('-')[0] for p in paths]
|
40
|
+
genders = [gender_map[p.stem.split('-')[1].upper()] for p in paths]
|
41
|
+
ages = [int(p.stem.split('-')[2]) for p in paths]
|
42
|
+
words = [word_map[int(p.stem.split('-')[3])] for p in paths]
|
43
|
+
languages = ["arabic" for p in paths]
|
44
|
+
|
45
|
+
# convert to dataframe
|
46
|
+
df = pd.DataFrame({"file": paths, "emotion_level": emo_levels,
|
47
|
+
"speakers": speakers, "gender": genders, "age": ages,
|
48
|
+
"word": words, "language": languages})
|
49
|
+
|
50
|
+
# allocate speaker 50, 46, 102 , 55, 51
|
51
|
+
test_speakers = ['46', '4', '54', '47', '51']
|
52
|
+
df_test = df[df['speakers'].isin(test_speakers)]
|
53
|
+
df_train = df.drop(df_test.index)
|
54
|
+
|
55
|
+
# save to CSV
|
56
|
+
df.to_csv(output_dir / 'baved.csv', index=False)
|
57
|
+
df_train.to_csv(output_dir / 'baved_train.csv', index=False)
|
58
|
+
df_test.to_csv(output_dir / 'baved_test.csv', index=False)
|
59
|
+
|
60
|
+
print(f"BAVED: {len(df)} samples, {len(df_train)} train, {len(df_test)} test")
|
61
|
+
|
62
|
+
if __name__ == '__main__':
|
63
|
+
main()
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# process_database.py --> CaFE
|
2
|
+
# modified from https://github.com/Strong-AI-Lab/emotion/blob/master/datasets/CaFE/process.py
|
3
|
+
|
4
|
+
import argparse
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
emotion_map = {
|
10
|
+
"C": "anger",
|
11
|
+
"D": "disgust",
|
12
|
+
"J": "happiness",
|
13
|
+
"N": "neutral",
|
14
|
+
"P": "fear",
|
15
|
+
"S": "surprise",
|
16
|
+
"T": "sadness",
|
17
|
+
}
|
18
|
+
|
19
|
+
def main():
|
20
|
+
parser = argparse.ArgumentParser()
|
21
|
+
parser.add_argument("--data_dir", type=str, default="CaFE")
|
22
|
+
parser.add_argument("--output_dir", type=str, default=".")
|
23
|
+
args = parser.parse_args()
|
24
|
+
|
25
|
+
data_dir = Path(args.data_dir)
|
26
|
+
output_dir = Path(args.output_dir)
|
27
|
+
|
28
|
+
paths = list(data_dir.glob("**/*.wav"))
|
29
|
+
files = [file for file in paths]
|
30
|
+
emotion = [emotion_map[file.stem[3]] for file in files]
|
31
|
+
speaker = [file.stem[:2] for file in files]
|
32
|
+
language = ["french" for file in files]
|
33
|
+
country = ["canada" for file in files]
|
34
|
+
gender = [["female", "male"][int(v) % 2] for v in speaker]
|
35
|
+
|
36
|
+
# convert to df
|
37
|
+
df = pd.DataFrame({"file": files, "speaker": speaker, "emotion": emotion, "language": language, "country": country})
|
38
|
+
|
39
|
+
# allocate the last two speakers (11, 12) for test
|
40
|
+
train_df = df[(df["speaker"] != "11") & (df["speaker"] != "12")]
|
41
|
+
test_df = df.drop(train_df.index)
|
42
|
+
|
43
|
+
# save to CSV
|
44
|
+
df.to_csv(output_dir / "cafe.csv", index=False)
|
45
|
+
train_df.to_csv(output_dir / "cafe_train.csv", index=False)
|
46
|
+
test_df.to_csv(output_dir / "cafe_test.csv", index=False)
|
47
|
+
|
48
|
+
|
49
|
+
if __name__ == "__main__":
|
50
|
+
main()
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# process_database.py --> CLAC
|
3
|
+
# need to install openpyxl: pip install openpyxl
|
4
|
+
|
5
|
+
import argparse
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
|
10
|
+
def main():
|
11
|
+
parser = argparse.ArgumentParser()
|
12
|
+
parser.add_argument("--data_dir", help="input dir", default="CLAC-Dataset")
|
13
|
+
parser.add_argument("--out_dir", help="output dir", default=".")
|
14
|
+
args = parser.parse_args()
|
15
|
+
|
16
|
+
# Read metadata from excel file
|
17
|
+
metadata = pd.read_excel(args.data_dir + "/metadata.xlsx", index_col=0)
|
18
|
+
|
19
|
+
# rename columns
|
20
|
+
metadata.rename(
|
21
|
+
columns={
|
22
|
+
"speakerID": "speaker",
|
23
|
+
"worker_country": "country",
|
24
|
+
"worker_region": "region",
|
25
|
+
"age (years)": "age",
|
26
|
+
},
|
27
|
+
inplace=True,
|
28
|
+
)
|
29
|
+
|
30
|
+
# remove "education (years)" column
|
31
|
+
metadata.drop(columns=["education (years)"], inplace=True)
|
32
|
+
|
33
|
+
# add file with WAV extension
|
34
|
+
metadata["file"] = metadata["speaker"] + ".wav"
|
35
|
+
|
36
|
+
# print(metadata.head())
|
37
|
+
print(metadata.head())
|
38
|
+
|
39
|
+
# print length of metadata
|
40
|
+
print(f"Length of metadata: {len(metadata)}, saved as metadata.csv")
|
41
|
+
|
42
|
+
# save to csv file
|
43
|
+
metadata.to_csv(args.out_dir + "/clac.csv")
|
44
|
+
|
45
|
+
|
46
|
+
if __name__ == "__main__":
|
47
|
+
main()
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# process_database.py -> CMU-MOSEI database
|
2
|
+
# input file:
|
3
|
+
# - label_paths: CMU_MOSEI_Labels.csv
|
4
|
+
# - db_paths: CMU-MOSEI
|
5
|
+
|
6
|
+
import argparse
|
7
|
+
import os
|
8
|
+
|
9
|
+
import pandas as pd
|
10
|
+
|
11
|
+
|
12
|
+
def preprocess(data_dirs, name, split_df, out_path):
|
13
|
+
meta_data = []
|
14
|
+
for ix, row in split_df.iterrows():
|
15
|
+
# get wav file path
|
16
|
+
# print(row['file'])
|
17
|
+
filename = row['file'] + '_' + str(row['index']) + '.wav'
|
18
|
+
file = os.path.join(data_dirs, 'Audio', 'Segmented_Audio',
|
19
|
+
name, filename)
|
20
|
+
|
21
|
+
sentiment = str(row['label2a'])
|
22
|
+
for r in (("0", "negative"), ("1", "positive")):
|
23
|
+
sentiment = sentiment.replace(*r)
|
24
|
+
|
25
|
+
emotion = str(row['label6'])
|
26
|
+
for r in (("0", "hap"), ("1", "sad"), ("2", "ang"), ("3", "sur"), ("4", "dis"), ("5", "fea")):
|
27
|
+
emotion = emotion.replace(*r)
|
28
|
+
|
29
|
+
meta_data.append({
|
30
|
+
'file': file,
|
31
|
+
'sentiment': sentiment,
|
32
|
+
'emotion': emotion,
|
33
|
+
})
|
34
|
+
|
35
|
+
# write to csv
|
36
|
+
meta_data_df = pd.DataFrame(meta_data)
|
37
|
+
meta_data_df.to_csv(out_path, index=False)
|
38
|
+
print(f'Wrote {name} partition with {len(meta_data)} samples to {out_path}.')
|
39
|
+
|
40
|
+
if __name__ == "__main__":
|
41
|
+
parser = argparse.ArgumentParser()
|
42
|
+
parser.add_argument('--data_dir', type=str, default='./CMU-MOSEI',
|
43
|
+
help='Path to CMU-MOSEI directory.')
|
44
|
+
args = parser.parse_args()
|
45
|
+
label_path = 'CMU_MOSEI_Labels.csv'
|
46
|
+
data = pd.read_csv(label_path)
|
47
|
+
for i, split_name in enumerate(['train', 'dev', 'test']):
|
48
|
+
print(f'Processing {split_name} (split == {i}).')
|
49
|
+
split_df = data[data['split'] == i]
|
50
|
+
preprocess(args.data_dir, split_name, split_df,
|
51
|
+
f'mosei_{split_name}.csv')
|
52
|
+
|
53
|
+
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# process_database.py --> DEMoS
|
3
|
+
|
4
|
+
import argparse
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
emotion_map = {
|
10
|
+
"rab": "anger",
|
11
|
+
"tri": "sadness",
|
12
|
+
"gio": "happiness",
|
13
|
+
"pau": "fear",
|
14
|
+
"dis": "disgust",
|
15
|
+
"col": "guilt",
|
16
|
+
"sor": "surprise",
|
17
|
+
"neu": "neutral",
|
18
|
+
}
|
19
|
+
|
20
|
+
gender_map = {
|
21
|
+
"f": "female",
|
22
|
+
"m": "male"
|
23
|
+
}
|
24
|
+
|
25
|
+
|
26
|
+
def main():
|
27
|
+
parser = argparse.ArgumentParser()
|
28
|
+
parser.add_argument('--data_dir', type=str, default='DEMoS', help='Path to the database file')
|
29
|
+
parser.add_argument('--output_dir', type=str, default='.', help='Path to the output directory')
|
30
|
+
args = parser.parse_args()
|
31
|
+
|
32
|
+
data_dir = Path(args.data_dir)
|
33
|
+
output_dir = Path(args.output_dir)
|
34
|
+
|
35
|
+
if not output_dir.exists():
|
36
|
+
output_dir.mkdir()
|
37
|
+
|
38
|
+
paths = list(data_dir.glob('**/*.wav'))
|
39
|
+
# emotions = [emotion_map[p.stem.split('_')[3][:3]] if p.parts[1] == 'DEMOS' else emotion_map[p.stem.split('_')[2][:3]] for p in paths]
|
40
|
+
|
41
|
+
emotions = [emotion_map[p.stem.split('_')[-1][:3]] for p in paths]
|
42
|
+
genders = [gender_map[p.stem.split('_')[-3]] for p in paths]
|
43
|
+
speakers = [p.stem.split('_')[-2] for p in paths]
|
44
|
+
languages = ["italian" for p in paths]
|
45
|
+
prototypicality = [p.stem[:2] if p.stem[:2] in {"NP", "PR"} else "neutral" for p in paths]
|
46
|
+
|
47
|
+
|
48
|
+
# convert to df
|
49
|
+
df = pd.DataFrame({"file": paths, "emotion": emotions, "gender": genders, "speaker": speakers, "language": languages, "prototypicality": prototypicality})
|
50
|
+
|
51
|
+
# split train and test based in speaker independent and balanced emotion
|
52
|
+
# allocate speakers >= 55 for test
|
53
|
+
df_test = df[df["speaker"] > "57"] #python3.9 newer
|
54
|
+
# df_test = df[df["speaker"].astype(int) > 57]
|
55
|
+
df_train = df.drop(df_test.index)
|
56
|
+
|
57
|
+
# save to csv
|
58
|
+
df.to_csv(output_dir / "demos.csv", index=False)
|
59
|
+
df_train.to_csv(output_dir / "demos_train.csv", index=False)
|
60
|
+
df_test.to_csv(output_dir / "demos_test.csv", index=False)
|
61
|
+
|
62
|
+
print(f"Total: {len(df)}, Train: {len(df_train)}, Test: {len(df_test)}")
|
63
|
+
|
64
|
+
|
65
|
+
if __name__ == "__main__":
|
66
|
+
main()
|
67
|
+
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# process_database.py --> ekorpus
|
2
|
+
# modified from https://github.com/Strong-AI-Lab/emotion/blob/master/datasets/EESC/process.py
|
3
|
+
# need to install textgrid: pip install textgrid
|
4
|
+
|
5
|
+
import argparse
|
6
|
+
from pathlib import Path
|
7
|
+
|
8
|
+
import pandas as pd
|
9
|
+
import textgrid
|
10
|
+
from sklearn.model_selection import train_test_split
|
11
|
+
from tqdm import tqdm
|
12
|
+
|
13
|
+
|
14
|
+
def main():
|
15
|
+
parser = argparse.ArgumentParser()
|
16
|
+
parser.add_argument('--data_dir', type=str, default='EKORPUS')
|
17
|
+
parser.add_argument('--output_dir', type=str, default='.')
|
18
|
+
args = parser.parse_args()
|
19
|
+
|
20
|
+
data_dir = Path(args.data_dir)
|
21
|
+
output_dir = Path(args.output_dir)
|
22
|
+
|
23
|
+
paths = list(data_dir.glob('*.wav'))
|
24
|
+
|
25
|
+
labels = {}
|
26
|
+
sentences = {}
|
27
|
+
for path in tqdm(
|
28
|
+
data_dir.glob("*.TextGrid"), desc="Processing annotations", total=len(paths)
|
29
|
+
):
|
30
|
+
grid = textgrid.TextGrid.fromFile(path)
|
31
|
+
labels[path.stem] = grid.getFirst("emotion")[0].mark
|
32
|
+
sentences[path.stem] = grid.getFirst("sentence")[0].mark
|
33
|
+
|
34
|
+
emotion = labels.values()
|
35
|
+
language = ['estonian' for _ in range(len(paths))]
|
36
|
+
|
37
|
+
df = pd.DataFrame(
|
38
|
+
{"file": paths, "emotion": emotion, "language": language})
|
39
|
+
|
40
|
+
# split for training and test
|
41
|
+
df_train, df_test = train_test_split(
|
42
|
+
df, test_size=0.2, random_state=42, stratify=df['emotion'])
|
43
|
+
|
44
|
+
|
45
|
+
# save to CSV
|
46
|
+
df.to_csv(output_dir / 'ekorpus.csv', index=False)
|
47
|
+
df_train.to_csv(output_dir / 'ekorpus_train.csv', index=False)
|
48
|
+
df_test.to_csv(output_dir / 'ekorpus_test.csv', index=False)
|
49
|
+
|
50
|
+
print(f"EKORPUS: {len(df)} samples, {len(df_train)} train, {len(df_test)} test")
|
51
|
+
|
52
|
+
|
53
|
+
if __name__ == '__main__':
|
54
|
+
main()
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# process_database.py --> EMNS
|
3
|
+
|
4
|
+
import argparse
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
from sklearn.model_selection import train_test_split
|
9
|
+
|
10
|
+
emotion_map = {
|
11
|
+
'Sarcastic': 'sarcastic',
|
12
|
+
'Excited': 'happiness', # merge excited with happiness
|
13
|
+
'Neutral': 'neutral',
|
14
|
+
'Surprised': 'surprise',
|
15
|
+
'Disgust': 'disgust',
|
16
|
+
'Sad': 'sadness',
|
17
|
+
'Angry': 'anger',
|
18
|
+
'Happy': 'happiness'
|
19
|
+
}
|
20
|
+
|
21
|
+
def main():
|
22
|
+
parser = argparse.ArgumentParser(description='Process database')
|
23
|
+
parser.add_argument('--data_dir', type=str, default='EMNS', help='data directory')
|
24
|
+
parser.add_argument('--metadata_file', type=str, default='EMNS/metadata.csv', help='metadata file')
|
25
|
+
parser.add_argument('--output_dir', type=str, default='.', help='data file')
|
26
|
+
args = parser.parse_args()
|
27
|
+
|
28
|
+
data_dir = Path(args.data_dir)
|
29
|
+
metadata_file = Path(args.metadata_file)
|
30
|
+
output_dir = Path(args.output_dir)
|
31
|
+
|
32
|
+
if not output_dir.exists():
|
33
|
+
output_dir.mkdir()
|
34
|
+
|
35
|
+
df = pd.read_csv(metadata_file, delimiter='|')
|
36
|
+
|
37
|
+
# male all lowercase
|
38
|
+
df = df.rename(columns={'audio_recording': 'file', 'user_id': 'speaker'})
|
39
|
+
|
40
|
+
# remove wavs path from file names
|
41
|
+
df['file'] = df['file'].str.replace('wavs/', '')
|
42
|
+
|
43
|
+
# make gender lowercase
|
44
|
+
df['gender'] = df['gender'].str.lower()
|
45
|
+
|
46
|
+
# map emotions
|
47
|
+
df['emotion'] = df['emotion'].map(emotion_map)
|
48
|
+
|
49
|
+
# split into train and test based on emotion
|
50
|
+
train, test = train_test_split(df, test_size=0.2, stratify=df['emotion'])
|
51
|
+
|
52
|
+
# save to csv
|
53
|
+
train.to_csv(output_dir / 'emns_train_webm.csv', index=False)
|
54
|
+
test.to_csv(output_dir / 'emns_test_webm.csv', index=False)
|
55
|
+
|
56
|
+
# save to csv with WAV extension
|
57
|
+
train['file'] = train['file'].str.replace('.webm', '.wav')
|
58
|
+
train.to_csv(output_dir / 'emns_train.csv', index=False)
|
59
|
+
test['file'] = test['file'].str.replace('.webm', '.wav')
|
60
|
+
test.to_csv(output_dir / 'emns_test.csv', index=False)
|
61
|
+
|
62
|
+
print(f"Total: {len(df)}, Train: {len(train)}, Test: {len(test)}")
|
63
|
+
|
64
|
+
|
65
|
+
if __name__ == '__main__':
|
66
|
+
main()
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# python code to convert wav fils from 44.1kHz to 16kHz
|
2
|
+
# arguments: input_dir, output_dir
|
3
|
+
# sox must be installed
|
4
|
+
|
5
|
+
import argparse
|
6
|
+
import os
|
7
|
+
import subprocess
|
8
|
+
|
9
|
+
parser = argparse.ArgumentParser()
|
10
|
+
parser.add_argument('-i', '--input_dir', type=str, default='./EmoFilm/wav_corpus')
|
11
|
+
parser.add_argument('-o', '--output_dir', type=str, default='./EmoFilm/wav_corpus_16k')
|
12
|
+
args = parser.parse_args()
|
13
|
+
|
14
|
+
source_dir = args.input_dir
|
15
|
+
target_dir = args.output_dir
|
16
|
+
|
17
|
+
# create the target directory if it does not exist
|
18
|
+
if not os.path.exists(target_dir):
|
19
|
+
os.makedirs(target_dir)
|
20
|
+
|
21
|
+
# Define the target sample rate
|
22
|
+
target_sr = 16000
|
23
|
+
|
24
|
+
# Loop over all audio files in the source directory
|
25
|
+
for root, dirs, files in os.walk(source_dir):
|
26
|
+
for file in files:
|
27
|
+
if file.endswith(('.wav', '.mp3')):
|
28
|
+
# print(file)
|
29
|
+
print(f"Resampling {os.path.join(root, file)} to 16kHz")
|
30
|
+
|
31
|
+
# Define the input and output file paths
|
32
|
+
input_path = os.path.join(root, file)
|
33
|
+
# obtain the basename
|
34
|
+
basename = os.path.basename(input_path)
|
35
|
+
output_path = os.path.join(target_dir, basename[:-4] + '_16k.wav')
|
36
|
+
|
37
|
+
# Use sox to resample the audio file
|
38
|
+
subprocess.run(['sox', input_path, '-r', str(target_sr), output_path, 'gain', '1'])
|