nkululeko 0.76.0__py3-none-any.whl → 0.77.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/augment.py +1 -1
- nkululeko/augmenting/augmenter.py +1 -1
- nkululeko/augmenting/randomsplicer.py +1 -1
- nkululeko/augmenting/resampler.py +4 -9
- nkululeko/autopredict/ap_age.py +2 -4
- nkululeko/autopredict/ap_arousal.py +2 -4
- nkululeko/autopredict/ap_dominance.py +2 -4
- nkululeko/autopredict/ap_gender.py +2 -4
- nkululeko/autopredict/ap_mos.py +2 -4
- nkululeko/autopredict/ap_pesq.py +2 -4
- nkululeko/autopredict/ap_sdr.py +2 -4
- nkululeko/autopredict/ap_snr.py +2 -4
- nkululeko/autopredict/ap_stoi.py +2 -4
- nkululeko/autopredict/ap_valence.py +2 -4
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset.py +8 -5
- nkululeko/demo.py +4 -10
- nkululeko/demo_predictor.py +1 -1
- nkululeko/experiment.py +10 -5
- nkululeko/explore.py +6 -13
- nkululeko/export.py +14 -25
- nkululeko/feat_extract/feats_analyser.py +121 -17
- nkululeko/feat_extract/feats_clap.py +4 -10
- nkululeko/feat_extract/feats_import.py +2 -4
- nkululeko/feat_extract/feats_mld.py +4 -9
- nkululeko/feat_extract/feats_mos.py +5 -13
- nkululeko/feat_extract/feats_oxbow.py +5 -12
- nkululeko/feat_extract/feats_snr.py +3 -7
- nkululeko/feat_extract/feats_squim.py +5 -13
- nkululeko/feat_extract/feats_trill.py +5 -13
- nkululeko/feat_extract/featureset.py +2 -4
- nkululeko/feat_extract/feinberg_praat.py +1 -1
- nkululeko/feature_extractor.py +1 -1
- nkululeko/file_checker.py +5 -5
- nkululeko/filter_data.py +6 -16
- nkululeko/modelrunner.py +1 -1
- nkululeko/models/model.py +1 -1
- nkululeko/models/model_cnn.py +1 -1
- nkululeko/models/model_mlp.py +1 -1
- nkululeko/models/model_mlp_regression.py +1 -1
- nkululeko/nkululeko.py +5 -13
- nkululeko/plots.py +40 -3
- nkululeko/predict.py +5 -13
- nkululeko/reporter.py +1 -1
- nkululeko/reporting/latex_writer.py +14 -9
- nkululeko/reporting/report.py +2 -1
- nkululeko/resample.py +5 -13
- nkululeko/runmanager.py +1 -1
- nkululeko/scaler.py +1 -1
- nkululeko/segment.py +1 -1
- nkululeko/segmenting/seg_silero.py +3 -5
- nkululeko/test.py +4 -10
- nkululeko/test_predictor.py +1 -1
- nkululeko/utils/stats.py +8 -0
- {nkululeko-0.76.0.dist-info → nkululeko-0.77.1.dist-info}/METADATA +12 -1
- nkululeko-0.77.1.dist-info/RECORD +104 -0
- nkululeko/balancer.py +0 -1
- nkululeko/split/__init__.py +0 -3
- nkululeko/split/example_binning.py +0 -27
- nkululeko/split/example_trainDevTestSplit.py +0 -81
- nkululeko/split/example_trainTestSplit.py +0 -77
- nkululeko/split/split_utils.py +0 -528
- nkululeko-0.76.0.dist-info/RECORD +0 -110
- /nkululeko/{util.py → utils/util.py} +0 -0
- {nkululeko-0.76.0.dist-info → nkululeko-0.77.1.dist-info}/LICENSE +0 -0
- {nkululeko-0.76.0.dist-info → nkululeko-0.77.1.dist-info}/WHEEL +0 -0
- {nkululeko-0.76.0.dist-info → nkululeko-0.77.1.dist-info}/top_level.txt +0 -0
nkululeko/augment.py
CHANGED
@@ -8,7 +8,7 @@ import shutil
|
|
8
8
|
import audformat
|
9
9
|
import pandas as pd
|
10
10
|
import torchaudio
|
11
|
-
from nkululeko.util import Util
|
11
|
+
from nkululeko.utils.util import Util
|
12
12
|
|
13
13
|
|
14
14
|
class Resampler:
|
@@ -42,9 +42,7 @@ class Resampler:
|
|
42
42
|
continue
|
43
43
|
if org_sr != self.SAMPLING_RATE:
|
44
44
|
self.util.debug(f"resampling {f} (sr = {org_sr})")
|
45
|
-
resampler = torchaudio.transforms.Resample(
|
46
|
-
org_sr, self.SAMPLING_RATE
|
47
|
-
)
|
45
|
+
resampler = torchaudio.transforms.Resample(org_sr, self.SAMPLING_RATE)
|
48
46
|
signal = resampler(signal)
|
49
47
|
if replace:
|
50
48
|
torchaudio.save(
|
@@ -61,9 +59,7 @@ class Resampler:
|
|
61
59
|
self.df = self.df.set_index(
|
62
60
|
self.df.index.set_levels(new_files, level="file")
|
63
61
|
)
|
64
|
-
target_file = self.util.config_val(
|
65
|
-
"RESAMPLE", "target", "resampled.csv"
|
66
|
-
)
|
62
|
+
target_file = self.util.config_val("RESAMPLE", "target", "resampled.csv")
|
67
63
|
# remove encoded labels
|
68
64
|
target = self.util.config_val("DATA", "target", "emotion")
|
69
65
|
if "class_label" in self.df.columns:
|
@@ -72,8 +68,7 @@ class Resampler:
|
|
72
68
|
# save file
|
73
69
|
self.df.to_csv(target_file)
|
74
70
|
self.util.debug(
|
75
|
-
"saved resampled list of files to"
|
76
|
-
f" {os.path.abspath(target_file)}"
|
71
|
+
"saved resampled list of files to" f" {os.path.abspath(target_file)}"
|
77
72
|
)
|
78
73
|
self.util.debug(f"resampled {succes} files, {error} errors")
|
79
74
|
|
nkululeko/autopredict/ap_age.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
A predictor for age.
|
3
3
|
Currently based on audEERING's agender model.
|
4
4
|
"""
|
5
|
-
from nkululeko.util import Util
|
5
|
+
from nkululeko.utils.util import Util
|
6
6
|
from nkululeko.feature_extractor import FeatureExtractor
|
7
7
|
import ast
|
8
8
|
import nkululeko.glob_conf as glob_conf
|
@@ -21,9 +21,7 @@ class AgePredictor:
|
|
21
21
|
|
22
22
|
def predict(self, split_selection):
|
23
23
|
self.util.debug(f"predicting age for {split_selection} samples")
|
24
|
-
feats_name = "_".join(
|
25
|
-
ast.literal_eval(glob_conf.config["DATA"]["databases"])
|
26
|
-
)
|
24
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
27
25
|
self.feature_extractor = FeatureExtractor(
|
28
26
|
self.df, ["agender_agender"], feats_name, split_selection
|
29
27
|
)
|
@@ -2,7 +2,7 @@
|
|
2
2
|
A predictor for emotional arousal.
|
3
3
|
Currently based on audEERING's emotional dimension model.
|
4
4
|
"""
|
5
|
-
from nkululeko.util import Util
|
5
|
+
from nkululeko.utils.util import Util
|
6
6
|
from nkululeko.feature_extractor import FeatureExtractor
|
7
7
|
import ast
|
8
8
|
import nkululeko.glob_conf as glob_conf
|
@@ -21,9 +21,7 @@ class ArousalPredictor:
|
|
21
21
|
|
22
22
|
def predict(self, split_selection):
|
23
23
|
self.util.debug(f"predicting arousal for {split_selection} samples")
|
24
|
-
feats_name = "_".join(
|
25
|
-
ast.literal_eval(glob_conf.config["DATA"]["databases"])
|
26
|
-
)
|
24
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
27
25
|
self.feature_extractor = FeatureExtractor(
|
28
26
|
self.df, ["auddim"], feats_name, split_selection
|
29
27
|
)
|
@@ -3,7 +3,7 @@ A predictor for emotional dominance.
|
|
3
3
|
Currently based on audEERING's emotional dimension model.
|
4
4
|
"""
|
5
5
|
|
6
|
-
from nkululeko.util import Util
|
6
|
+
from nkululeko.utils.util import Util
|
7
7
|
from nkululeko.feature_extractor import FeatureExtractor
|
8
8
|
import ast
|
9
9
|
import nkululeko.glob_conf as glob_conf
|
@@ -21,9 +21,7 @@ class DominancePredictor:
|
|
21
21
|
|
22
22
|
def predict(self, split_selection):
|
23
23
|
self.util.debug(f"predicting dominance for {split_selection} samples")
|
24
|
-
feats_name = "_".join(
|
25
|
-
ast.literal_eval(glob_conf.config["DATA"]["databases"])
|
26
|
-
)
|
24
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
27
25
|
self.feature_extractor = FeatureExtractor(
|
28
26
|
self.df, ["auddim"], feats_name, split_selection
|
29
27
|
)
|
@@ -2,7 +2,7 @@
|
|
2
2
|
A predictor for biological sex.
|
3
3
|
Currently based on audEERING's agender model.
|
4
4
|
"""
|
5
|
-
from nkululeko.util import Util
|
5
|
+
from nkululeko.utils.util import Util
|
6
6
|
from nkululeko.feature_extractor import FeatureExtractor
|
7
7
|
import ast
|
8
8
|
import nkululeko.glob_conf as glob_conf
|
@@ -21,9 +21,7 @@ class GenderPredictor:
|
|
21
21
|
|
22
22
|
def predict(self, split_selection):
|
23
23
|
self.util.debug(f"predicting gender for {split_selection} samples")
|
24
|
-
feats_name = "_".join(
|
25
|
-
ast.literal_eval(glob_conf.config["DATA"]["databases"])
|
26
|
-
)
|
24
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
27
25
|
self.feature_extractor = FeatureExtractor(
|
28
26
|
self.df, ["agender_agender"], feats_name, split_selection
|
29
27
|
)
|
nkululeko/autopredict/ap_mos.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
""""
|
2
2
|
A predictor for MOS - mean opinion score.
|
3
3
|
"""
|
4
|
-
from nkululeko.util import Util
|
4
|
+
from nkululeko.utils.util import Util
|
5
5
|
import ast
|
6
6
|
import nkululeko.glob_conf as glob_conf
|
7
7
|
from nkululeko.feature_extractor import FeatureExtractor
|
@@ -22,9 +22,7 @@ class MOSPredictor:
|
|
22
22
|
def predict(self, split_selection):
|
23
23
|
self.util.debug(f"estimating MOS for {split_selection} samples")
|
24
24
|
return_df = self.df.copy()
|
25
|
-
feats_name = "_".join(
|
26
|
-
ast.literal_eval(glob_conf.config["DATA"]["databases"])
|
27
|
-
)
|
25
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
28
26
|
self.feature_extractor = FeatureExtractor(
|
29
27
|
self.df, ["mos"], feats_name, split_selection
|
30
28
|
)
|
nkululeko/autopredict/ap_pesq.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
""""
|
2
2
|
A predictor for PESQ - Perceptual Evaluation of Speech Quality.
|
3
3
|
"""
|
4
|
-
from nkululeko.util import Util
|
4
|
+
from nkululeko.utils.util import Util
|
5
5
|
import ast
|
6
6
|
import nkululeko.glob_conf as glob_conf
|
7
7
|
from nkululeko.feature_extractor import FeatureExtractor
|
@@ -22,9 +22,7 @@ class PESQPredictor:
|
|
22
22
|
def predict(self, split_selection):
|
23
23
|
self.util.debug(f"estimating PESQ for {split_selection} samples")
|
24
24
|
return_df = self.df.copy()
|
25
|
-
feats_name = "_".join(
|
26
|
-
ast.literal_eval(glob_conf.config["DATA"]["databases"])
|
27
|
-
)
|
25
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
28
26
|
self.feature_extractor = FeatureExtractor(
|
29
27
|
self.df, ["squim"], feats_name, split_selection
|
30
28
|
)
|
nkululeko/autopredict/ap_sdr.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
A predictor for SDR - Signal to Distortion Ratio.
|
3
3
|
as estimated by Scale-Invariant Signal-to-Distortion Ratio (SI-SDR)
|
4
4
|
"""
|
5
|
-
from nkululeko.util import Util
|
5
|
+
from nkululeko.utils.util import Util
|
6
6
|
import ast
|
7
7
|
import nkululeko.glob_conf as glob_conf
|
8
8
|
from nkululeko.feature_extractor import FeatureExtractor
|
@@ -23,9 +23,7 @@ class SDRPredictor:
|
|
23
23
|
def predict(self, split_selection):
|
24
24
|
self.util.debug(f"estimating SDR for {split_selection} samples")
|
25
25
|
return_df = self.df.copy()
|
26
|
-
feats_name = "_".join(
|
27
|
-
ast.literal_eval(glob_conf.config["DATA"]["databases"])
|
28
|
-
)
|
26
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
29
27
|
self.feature_extractor = FeatureExtractor(
|
30
28
|
self.df, ["squim"], feats_name, split_selection
|
31
29
|
)
|
nkululeko/autopredict/ap_snr.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
""""
|
2
2
|
A predictor for SNR - signal-to-noise ratio.
|
3
3
|
"""
|
4
|
-
from nkululeko.util import Util
|
4
|
+
from nkululeko.utils.util import Util
|
5
5
|
import ast
|
6
6
|
import nkululeko.glob_conf as glob_conf
|
7
7
|
from nkululeko.feature_extractor import FeatureExtractor
|
@@ -22,9 +22,7 @@ class SNRPredictor:
|
|
22
22
|
def predict(self, split_selection):
|
23
23
|
self.util.debug(f"estimating SNR for {split_selection} samples")
|
24
24
|
return_df = self.df.copy()
|
25
|
-
feats_name = "_".join(
|
26
|
-
ast.literal_eval(glob_conf.config["DATA"]["databases"])
|
27
|
-
)
|
25
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
28
26
|
self.feature_extractor = FeatureExtractor(
|
29
27
|
self.df, ["snr"], feats_name, split_selection
|
30
28
|
)
|
nkululeko/autopredict/ap_stoi.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
""""
|
2
2
|
A predictor for STOI - Short-Time Objective Intelligibility (STOI)
|
3
3
|
"""
|
4
|
-
from nkululeko.util import Util
|
4
|
+
from nkululeko.utils.util import Util
|
5
5
|
import ast
|
6
6
|
import nkululeko.glob_conf as glob_conf
|
7
7
|
from nkululeko.feature_extractor import FeatureExtractor
|
@@ -22,9 +22,7 @@ class STOIPredictor:
|
|
22
22
|
def predict(self, split_selection):
|
23
23
|
self.util.debug(f"estimating STOI for {split_selection} samples")
|
24
24
|
return_df = self.df.copy()
|
25
|
-
feats_name = "_".join(
|
26
|
-
ast.literal_eval(glob_conf.config["DATA"]["databases"])
|
27
|
-
)
|
25
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
28
26
|
self.feature_extractor = FeatureExtractor(
|
29
27
|
self.df, ["squim"], feats_name, split_selection
|
30
28
|
)
|
@@ -2,7 +2,7 @@
|
|
2
2
|
A predictor for emotional valence.
|
3
3
|
Currently based on audEERING's emotional dimension model.
|
4
4
|
"""
|
5
|
-
from nkululeko.util import Util
|
5
|
+
from nkululeko.utils.util import Util
|
6
6
|
from nkululeko.feature_extractor import FeatureExtractor
|
7
7
|
import ast
|
8
8
|
import nkululeko.glob_conf as glob_conf
|
@@ -21,9 +21,7 @@ class ValencePredictor:
|
|
21
21
|
|
22
22
|
def predict(self, split_selection):
|
23
23
|
self.util.debug(f"predicting valence for {split_selection} samples")
|
24
|
-
feats_name = "_".join(
|
25
|
-
ast.literal_eval(glob_conf.config["DATA"]["databases"])
|
26
|
-
)
|
24
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
27
25
|
self.feature_extractor = FeatureExtractor(
|
28
26
|
self.df, ["auddim"], feats_name, split_selection
|
29
27
|
)
|
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.77.1"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/data/dataset.py
CHANGED
@@ -14,8 +14,7 @@ import nkululeko.glob_conf as glob_conf
|
|
14
14
|
from nkululeko.filter_data import DataFilter
|
15
15
|
from nkululeko.plots import Plots
|
16
16
|
from nkululeko.reporting.report_item import ReportItem
|
17
|
-
from nkululeko.util import Util
|
18
|
-
import nkululeko.split.split_utils as split
|
17
|
+
from nkululeko.utils.util import Util
|
19
18
|
|
20
19
|
|
21
20
|
class Dataset:
|
@@ -431,6 +430,9 @@ class Dataset:
|
|
431
430
|
|
432
431
|
def balanced_split(self):
|
433
432
|
"""One way to split train and eval sets: Generate split dataframes for some balancing criterion"""
|
433
|
+
from splitutils import binning
|
434
|
+
from splitutils import optimize_traintest_split
|
435
|
+
|
434
436
|
seed = 42
|
435
437
|
k = 30
|
436
438
|
test_size = int(self.util.config_val_data(self.name, "test_size", 20)) / 100.0
|
@@ -441,7 +443,7 @@ class Dataset:
|
|
441
443
|
bins = self.util.config_val("DATA", f"bin", False)
|
442
444
|
if bins:
|
443
445
|
nbins = len(ast.literal_eval(bins))
|
444
|
-
targets =
|
446
|
+
targets = binning(targets, nbins=nbins)
|
445
447
|
# on which variable to split
|
446
448
|
speakers = df["speaker"].to_numpy()
|
447
449
|
|
@@ -460,7 +462,7 @@ class Dataset:
|
|
460
462
|
data = df[stratif_var].to_numpy()
|
461
463
|
bins = self.util.config_val("DATA", f"{stratif_var}_bins", False)
|
462
464
|
if bins:
|
463
|
-
data =
|
465
|
+
data = binning(data, nbins=int(bins))
|
464
466
|
stratif_vars_array[stratif_var] = data
|
465
467
|
# weights for all stratify_on variables and
|
466
468
|
# and for test proportion match. Give target
|
@@ -473,7 +475,8 @@ class Dataset:
|
|
473
475
|
weights[key] = value
|
474
476
|
# find optimal test indices TEST_I in DF
|
475
477
|
# info: dict with goodness of split information
|
476
|
-
|
478
|
+
|
479
|
+
train_i, test_i, info = optimize_traintest_split(
|
477
480
|
X=df,
|
478
481
|
y=targets,
|
479
482
|
split_on=speakers,
|
nkululeko/demo.py
CHANGED
@@ -5,19 +5,15 @@
|
|
5
5
|
|
6
6
|
from nkululeko.experiment import Experiment
|
7
7
|
import configparser
|
8
|
-
from nkululeko.util import Util
|
8
|
+
from nkululeko.utils.util import Util
|
9
9
|
from nkululeko.constants import VERSION
|
10
10
|
import argparse
|
11
11
|
import os
|
12
12
|
|
13
13
|
|
14
14
|
def main(src_dir):
|
15
|
-
parser = argparse.ArgumentParser(
|
16
|
-
|
17
|
-
)
|
18
|
-
parser.add_argument(
|
19
|
-
"--config", default="exp.ini", help="The base configuration"
|
20
|
-
)
|
15
|
+
parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
|
16
|
+
parser.add_argument("--config", default="exp.ini", help="The base configuration")
|
21
17
|
parser.add_argument(
|
22
18
|
"--file", help="A file that should be processed (16kHz mono wav)"
|
23
19
|
)
|
@@ -66,6 +62,4 @@ def main(src_dir):
|
|
66
62
|
|
67
63
|
if __name__ == "__main__":
|
68
64
|
cwd = os.path.dirname(os.path.abspath(__file__))
|
69
|
-
main(
|
70
|
-
cwd
|
71
|
-
) # use this if you want to state the config file path on command line
|
65
|
+
main(cwd) # use this if you want to state the config file path on command line
|
nkululeko/demo_predictor.py
CHANGED
nkululeko/experiment.py
CHANGED
@@ -25,7 +25,7 @@ from nkululeko.reporting.report import Report
|
|
25
25
|
from nkululeko.runmanager import Runmanager
|
26
26
|
from nkululeko.scaler import Scaler
|
27
27
|
from nkululeko.test_predictor import Test_predictor
|
28
|
-
from nkululeko.util import Util
|
28
|
+
from nkululeko.utils.util import Util
|
29
29
|
|
30
30
|
|
31
31
|
class Experiment:
|
@@ -45,10 +45,15 @@ class Experiment:
|
|
45
45
|
audeer.mkdir(self.data_dir) # create the experiment directory
|
46
46
|
self.util = Util("experiment")
|
47
47
|
glob_conf.set_util(self.util)
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
48
|
+
fresh_report = eval(self.util.config_val("REPORT", "fresh", "False"))
|
49
|
+
if not fresh_report:
|
50
|
+
try:
|
51
|
+
with open(os.path.join(self.data_dir, "report.pkl"), "rb") as handle:
|
52
|
+
self.report = pickle.load(handle)
|
53
|
+
except FileNotFoundError:
|
54
|
+
self.report = Report()
|
55
|
+
else:
|
56
|
+
self.util.debug("starting a fresh report")
|
52
57
|
self.report = Report()
|
53
58
|
glob_conf.set_report(self.report)
|
54
59
|
self.loso = self.util.config_val("MODEL", "loso", False)
|
nkululeko/explore.py
CHANGED
@@ -3,18 +3,15 @@
|
|
3
3
|
|
4
4
|
from nkululeko.experiment import Experiment
|
5
5
|
import configparser
|
6
|
-
from nkululeko.util import Util
|
6
|
+
from nkululeko.utils.util import Util
|
7
7
|
from nkululeko.constants import VERSION
|
8
8
|
import argparse
|
9
9
|
import os
|
10
10
|
|
11
|
+
|
11
12
|
def main(src_dir):
|
12
|
-
parser = argparse.ArgumentParser(
|
13
|
-
|
14
|
-
)
|
15
|
-
parser.add_argument(
|
16
|
-
"--config", default="exp.ini", help="The base configuration"
|
17
|
-
)
|
13
|
+
parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
|
14
|
+
parser.add_argument("--config", default="exp.ini", help="The base configuration")
|
18
15
|
args = parser.parse_args()
|
19
16
|
if args.config is not None:
|
20
17
|
config_file = args.config
|
@@ -47,9 +44,7 @@ def main(src_dir):
|
|
47
44
|
|
48
45
|
# split into train and test
|
49
46
|
expr.fill_train_and_tests()
|
50
|
-
util.debug(
|
51
|
-
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}"
|
52
|
-
)
|
47
|
+
util.debug(f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
|
53
48
|
|
54
49
|
plot_feats = eval(util.config_val("EXPL", "feature_distributions", "False"))
|
55
50
|
tsne = eval(util.config_val("EXPL", "tsne", "False"))
|
@@ -70,6 +65,4 @@ def main(src_dir):
|
|
70
65
|
|
71
66
|
if __name__ == "__main__":
|
72
67
|
cwd = os.path.dirname(os.path.abspath(__file__))
|
73
|
-
main(
|
74
|
-
cwd
|
75
|
-
) # use this if you want to state the config file path on command line
|
68
|
+
main(cwd) # use this if you want to state the config file path on command line
|
nkululeko/export.py
CHANGED
@@ -8,18 +8,14 @@ import audeer
|
|
8
8
|
import argparse
|
9
9
|
import audiofile
|
10
10
|
from nkululeko.experiment import Experiment
|
11
|
-
from nkululeko.util import Util
|
11
|
+
from nkululeko.utils.util import Util
|
12
12
|
from nkululeko.constants import VERSION
|
13
13
|
import shutil
|
14
14
|
|
15
15
|
|
16
16
|
def main(src_dir):
|
17
|
-
parser = argparse.ArgumentParser(
|
18
|
-
|
19
|
-
)
|
20
|
-
parser.add_argument(
|
21
|
-
"--config", default="exp.ini", help="The base configuration"
|
22
|
-
)
|
17
|
+
parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
|
18
|
+
parser.add_argument("--config", default="exp.ini", help="The base configuration")
|
23
19
|
args = parser.parse_args()
|
24
20
|
if args.config is not None:
|
25
21
|
config_file = args.config
|
@@ -52,9 +48,7 @@ def main(src_dir):
|
|
52
48
|
|
53
49
|
# split into train and test
|
54
50
|
expr.fill_train_and_tests()
|
55
|
-
util.debug(
|
56
|
-
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}"
|
57
|
-
)
|
51
|
+
util.debug(f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
|
58
52
|
|
59
53
|
# export
|
60
54
|
df_train = expr.df_train
|
@@ -62,9 +56,7 @@ def main(src_dir):
|
|
62
56
|
target_root = util.config_val("EXPORT", "root", "./exported_data/")
|
63
57
|
orig_root = util.config_val("EXPORT", "orig_root", None)
|
64
58
|
data_name = util.config_val("EXPORT", "data_name", "export")
|
65
|
-
segments_as_files = eval(
|
66
|
-
util.config_val("EXPORT", "segments_as_files", "False")
|
67
|
-
)
|
59
|
+
segments_as_files = eval(util.config_val("EXPORT", "segments_as_files", "False"))
|
68
60
|
audeer.mkdir(target_root)
|
69
61
|
splits = {"train": df_train, "test": df_test}
|
70
62
|
df_all = pd.DataFrame()
|
@@ -81,12 +73,11 @@ def main(src_dir):
|
|
81
73
|
always_2d=True,
|
82
74
|
)
|
83
75
|
file_name = (
|
84
|
-
os.path.splitext(file)[0]
|
85
|
-
|
86
|
-
|
87
|
-
|
76
|
+
os.path.splitext(file)[0] + "_" + start.total_seconds() + ".wav"
|
77
|
+
)
|
78
|
+
wav_folder = (
|
79
|
+
f"{target_root}/{os.path.basename(os.path.normpath(orig_root))}"
|
88
80
|
)
|
89
|
-
wav_folder = f"{target_root}/{os.path.basename(os.path.normpath(orig_root))}"
|
90
81
|
audeer.mkdir(wav_folder)
|
91
82
|
new_rel_path = file_dir[
|
92
83
|
file_dir.index(orig_root) + 1 + len(orig_root) :
|
@@ -99,7 +90,9 @@ def main(src_dir):
|
|
99
90
|
files.append(new_file_name)
|
100
91
|
else:
|
101
92
|
file_name = os.path.basename(file)
|
102
|
-
wav_folder =
|
93
|
+
wav_folder = (
|
94
|
+
f"{target_root}/{os.path.basename(os.path.normpath(orig_root))}"
|
95
|
+
)
|
103
96
|
audeer.mkdir(wav_folder)
|
104
97
|
new_rel_path = file_dir[
|
105
98
|
file_dir.index(orig_root) + 1 + len(orig_root) :
|
@@ -121,15 +114,11 @@ def main(src_dir):
|
|
121
114
|
df_all = df_all.rename(columns={"class_label": target})
|
122
115
|
|
123
116
|
df_all.to_csv(f"{target_root}/{data_name}.csv")
|
124
|
-
util.debug(
|
125
|
-
f"saved {data_name}.csv to {target_root}, {df.shape[0]} samples."
|
126
|
-
)
|
117
|
+
util.debug(f"saved {data_name}.csv to {target_root}, {df.shape[0]} samples.")
|
127
118
|
|
128
119
|
print("DONE")
|
129
120
|
|
130
121
|
|
131
122
|
if __name__ == "__main__":
|
132
123
|
cwd = os.path.dirname(os.path.abspath(__file__))
|
133
|
-
main(
|
134
|
-
cwd
|
135
|
-
) # use this if you want to state the config file path on command line
|
124
|
+
main(cwd) # use this if you want to state the config file path on command line
|