nkululeko 0.81.3__py3-none-any.whl → 0.81.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/autopredict/estimate_snr.py +17 -6
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset.py +11 -4
- nkululeko/demo.py +20 -5
- nkululeko/demo_predictor.py +6 -3
- nkululeko/experiment.py +3 -3
- nkululeko/explore.py +13 -8
- nkululeko/feat_extract/feats_agender.py +1 -4
- nkululeko/feat_extract/{feats_audmodel_dim.py → feats_auddim.py} +13 -8
- nkululeko/feat_extract/feats_audmodel.py +5 -4
- nkululeko/feat_extract/feats_import.py +1 -1
- nkululeko/feat_extract/feats_mos.py +2 -1
- nkululeko/feat_extract/feats_praat.py +10 -8
- nkululeko/feat_extract/feats_snr.py +17 -9
- nkululeko/feat_extract/feats_squim.py +13 -16
- nkululeko/feature_extractor.py +72 -148
- nkululeko/modelrunner.py +2 -2
- nkululeko/models/model.py +13 -13
- nkululeko/models/model_svm.py +5 -2
- nkululeko/multidb.py +18 -12
- nkululeko/predict.py +29 -9
- nkululeko/reporter.py +332 -0
- nkululeko/resample.py +12 -7
- nkululeko/runmanager.py +17 -8
- nkululeko/test.py +9 -6
- nkululeko/test_predictor.py +1 -0
- nkululeko/utils/stats.py +12 -5
- {nkululeko-0.81.3.dist-info → nkululeko-0.81.6.dist-info}/METADATA +16 -1
- {nkululeko-0.81.3.dist-info → nkululeko-0.81.6.dist-info}/RECORD +32 -31
- {nkululeko-0.81.3.dist-info → nkululeko-0.81.6.dist-info}/LICENSE +0 -0
- {nkululeko-0.81.3.dist-info → nkululeko-0.81.6.dist-info}/WHEEL +0 -0
- {nkululeko-0.81.3.dist-info → nkululeko-0.81.6.dist-info}/top_level.txt +0 -0
nkululeko/feature_extractor.py
CHANGED
@@ -1,22 +1,25 @@
|
|
1
|
-
"""
|
2
|
-
feature_extractor.py
|
3
|
-
|
4
|
-
Helper class to encapsulate feature extraction methods
|
1
|
+
"""Extract acoustic features from audio samples.
|
5
2
|
|
3
|
+
Extract acoustic features using several feature extractors
|
4
|
+
(appends the features column-wise)
|
6
5
|
"""
|
6
|
+
|
7
7
|
import pandas as pd
|
8
8
|
|
9
9
|
from nkululeko.utils.util import Util
|
10
10
|
|
11
11
|
|
12
12
|
class FeatureExtractor:
|
13
|
-
"""
|
14
|
-
|
13
|
+
"""Extract acoustic features from audio samples.
|
14
|
+
|
15
|
+
Extract acoustic features using several feature extractors (appends the features column-wise).
|
16
|
+
|
15
17
|
Args:
|
16
18
|
data_df (pandas.DataFrame): dataframe with audiofile paths as index
|
17
|
-
feats_types (
|
18
|
-
data_name (
|
19
|
-
feats_designation (
|
19
|
+
feats_types (List[str]): designations of acoustic feature extractors to be used
|
20
|
+
data_name (str): name of databases that are extracted (for caching)
|
21
|
+
feats_designation (str): the type of split (train/test), also is used for the cache name.
|
22
|
+
|
20
23
|
Returns:
|
21
24
|
df (pandas.DataFrame): dataframe with same index as data_df and acoustic features in columns
|
22
25
|
"""
|
@@ -25,7 +28,6 @@ class FeatureExtractor:
|
|
25
28
|
df = None
|
26
29
|
data_df = None # dataframe to get audio paths
|
27
30
|
|
28
|
-
# def __init__
|
29
31
|
def __init__(self, data_df, feats_types, data_name, feats_designation):
|
30
32
|
self.data_df = data_df
|
31
33
|
self.data_name = data_name
|
@@ -34,147 +36,69 @@ class FeatureExtractor:
|
|
34
36
|
self.feats_designation = feats_designation
|
35
37
|
|
36
38
|
def extract(self):
|
37
|
-
# feats_types = self.util.config_val_list('FEATS', 'type', ['os'])
|
38
|
-
self.featExtractor = None
|
39
39
|
self.feats = pd.DataFrame()
|
40
|
-
_scale = True
|
41
40
|
for feats_type in self.feats_types:
|
42
41
|
store_name = f"{self.data_name}_{feats_type}"
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
48
|
-
)
|
49
|
-
elif feats_type == "spectra":
|
50
|
-
from nkululeko.feat_extract.feats_spectra import Spectraloader
|
51
|
-
|
52
|
-
self.featExtractor = Spectraloader(
|
53
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
54
|
-
)
|
55
|
-
elif feats_type == "trill":
|
56
|
-
from nkululeko.feat_extract.feats_trill import TRILLset
|
57
|
-
|
58
|
-
self.featExtractor = TRILLset(
|
59
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
60
|
-
)
|
61
|
-
elif feats_type.startswith("wav2vec"):
|
62
|
-
from nkululeko.feat_extract.feats_wav2vec2 import Wav2vec2
|
63
|
-
|
64
|
-
self.featExtractor = Wav2vec2(
|
65
|
-
f"{store_name}_{self.feats_designation}",
|
66
|
-
self.data_df,
|
67
|
-
feats_type,
|
68
|
-
)
|
69
|
-
elif feats_type.startswith("hubert"):
|
70
|
-
from nkululeko.feat_extract.feats_hubert import Hubert
|
71
|
-
|
72
|
-
self.featExtractor = Hubert(
|
73
|
-
f"{store_name}_{self.feats_designation}",
|
74
|
-
self.data_df,
|
75
|
-
feats_type,
|
76
|
-
)
|
77
|
-
|
78
|
-
elif feats_type.startswith("wavlm"):
|
79
|
-
from nkululeko.feat_extract.feats_wavlm import Wavlm
|
80
|
-
|
81
|
-
self.featExtractor = Wavlm(
|
82
|
-
f"{store_name}_{self.feats_designation}",
|
83
|
-
self.data_df,
|
84
|
-
feats_type,
|
85
|
-
)
|
86
|
-
|
87
|
-
elif feats_type.startswith("spkrec"):
|
88
|
-
from nkululeko.feat_extract.feats_spkrec import Spkrec
|
89
|
-
|
90
|
-
self.featExtractor = Spkrec(
|
91
|
-
f"{store_name}_{self.feats_designation}",
|
92
|
-
self.data_df,
|
93
|
-
feats_type,
|
94
|
-
)
|
95
|
-
elif feats_type == "audmodel":
|
96
|
-
from nkululeko.feat_extract.feats_audmodel import AudModelSet
|
97
|
-
|
98
|
-
self.featExtractor = AudModelSet(
|
99
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
100
|
-
)
|
101
|
-
elif feats_type == "auddim":
|
102
|
-
from nkululeko.feat_extract.feats_audmodel_dim import (
|
103
|
-
AudModelDimSet,
|
104
|
-
)
|
105
|
-
|
106
|
-
self.featExtractor = AudModelDimSet(
|
107
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
108
|
-
)
|
109
|
-
elif feats_type == "agender":
|
110
|
-
from nkululeko.feat_extract.feats_agender import (
|
111
|
-
AudModelAgenderSet,
|
112
|
-
)
|
113
|
-
|
114
|
-
self.featExtractor = AudModelAgenderSet(
|
115
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
116
|
-
)
|
117
|
-
elif feats_type == "agender_agender":
|
118
|
-
from nkululeko.feat_extract.feats_agender_agender import (
|
119
|
-
AgenderAgenderSet,
|
120
|
-
)
|
121
|
-
|
122
|
-
self.featExtractor = AgenderAgenderSet(
|
123
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
124
|
-
)
|
125
|
-
elif feats_type == "snr":
|
126
|
-
from nkululeko.feat_extract.feats_snr import SNRSet
|
127
|
-
|
128
|
-
self.featExtractor = SNRSet(
|
129
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
130
|
-
)
|
131
|
-
elif feats_type == "mos":
|
132
|
-
from nkululeko.feat_extract.feats_mos import MOSSet
|
133
|
-
|
134
|
-
self.featExtractor = MOSSet(
|
135
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
136
|
-
)
|
137
|
-
elif feats_type == "squim":
|
138
|
-
from nkululeko.feat_extract.feats_squim import SQUIMSet
|
139
|
-
|
140
|
-
self.featExtractor = SQUIMSet(
|
141
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
142
|
-
)
|
143
|
-
elif feats_type == "clap":
|
144
|
-
from nkululeko.feat_extract.feats_clap import Clap
|
145
|
-
|
146
|
-
self.featExtractor = Clap(
|
147
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
148
|
-
)
|
149
|
-
elif feats_type == "praat":
|
150
|
-
from nkululeko.feat_extract.feats_praat import Praatset
|
151
|
-
|
152
|
-
self.featExtractor = Praatset(
|
153
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
154
|
-
)
|
155
|
-
elif feats_type == "mld":
|
156
|
-
from nkululeko.feat_extract.feats_mld import MLD_set
|
157
|
-
|
158
|
-
self.featExtractor = MLD_set(
|
159
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
160
|
-
)
|
161
|
-
elif feats_type == "import":
|
162
|
-
from nkululeko.feat_extract.feats_import import Importset
|
163
|
-
|
164
|
-
self.featExtractor = Importset(
|
165
|
-
f"{store_name}_{self.feats_designation}", self.data_df
|
166
|
-
)
|
167
|
-
else:
|
168
|
-
self.util.error(f"unknown feats_type: {feats_type}")
|
169
|
-
|
170
|
-
self.featExtractor.extract()
|
171
|
-
self.featExtractor.filter()
|
172
|
-
# remove samples that were not extracted by MLD
|
173
|
-
# self.df_test = self.df_test.loc[self.df_test.index.intersection(featExtractor_test.df.index)]
|
174
|
-
# self.df_train = self.df_train.loc[self.df_train.index.intersection(featExtractor_train.df.index)]
|
175
|
-
self.util.debug(f"{feats_type}: shape : {self.featExtractor.df.shape}")
|
176
|
-
self.feats = pd.concat([self.feats, self.featExtractor.df], axis=1)
|
42
|
+
self.feat_extractor = self._get_feat_extractor(store_name, feats_type)
|
43
|
+
self.feat_extractor.extract()
|
44
|
+
self.feat_extractor.filter()
|
45
|
+
self.feats = pd.concat([self.feats, self.feat_extractor.df], axis=1)
|
177
46
|
return self.feats
|
178
47
|
|
179
48
|
def extract_sample(self, signal, sr):
|
180
|
-
return self.
|
49
|
+
return self.feat_extractor.extract_sample(signal, sr)
|
50
|
+
|
51
|
+
def _get_feat_extractor(self, store_name, feats_type):
|
52
|
+
feat_extractor_class = self._get_feat_extractor_class(feats_type)
|
53
|
+
if feat_extractor_class is None:
|
54
|
+
self.util.error(f"unknown feats_type: {feats_type}")
|
55
|
+
return feat_extractor_class(
|
56
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
57
|
+
)
|
58
|
+
|
59
|
+
def _get_feat_extractor_class(self, feats_type):
|
60
|
+
if feats_type == "os":
|
61
|
+
from nkululeko.feat_extract.feats_opensmile import Opensmileset
|
62
|
+
|
63
|
+
return Opensmileset
|
64
|
+
elif feats_type == "spectra":
|
65
|
+
from nkululeko.feat_extract.feats_spectra import Spectraloader
|
66
|
+
|
67
|
+
return Spectraloader
|
68
|
+
elif feats_type == "trill":
|
69
|
+
from nkululeko.feat_extract.feats_trill import TRILLset
|
70
|
+
|
71
|
+
return TRILLset
|
72
|
+
elif feats_type.startswith(("wav2vec", "hubert", "wavlm", "spkrec")):
|
73
|
+
return self._get_feat_extractor_by_prefix(feats_type)
|
74
|
+
elif feats_type in (
|
75
|
+
"audmodel",
|
76
|
+
"auddim",
|
77
|
+
"agender",
|
78
|
+
"agender_agender",
|
79
|
+
"snr",
|
80
|
+
"mos",
|
81
|
+
"squim",
|
82
|
+
"clap",
|
83
|
+
"praat",
|
84
|
+
"mld",
|
85
|
+
"import",
|
86
|
+
):
|
87
|
+
return self._get_feat_extractor_by_name(feats_type)
|
88
|
+
else:
|
89
|
+
return None
|
90
|
+
|
91
|
+
def _get_feat_extractor_by_prefix(self, feats_type):
|
92
|
+
prefix, _, ext = feats_type.partition("_")
|
93
|
+
from importlib import import_module
|
94
|
+
|
95
|
+
module = import_module(f"nkululeko.feat_extract.feats_{prefix.lower()}")
|
96
|
+
class_name = f"{prefix.capitalize()}{ext.capitalize()}set"
|
97
|
+
return getattr(module, class_name)
|
98
|
+
|
99
|
+
def _get_feat_extractor_by_name(self, feats_type):
|
100
|
+
from importlib import import_module
|
101
|
+
|
102
|
+
module = import_module(f"nkululeko.feat_extract.feats_{feats_type.lower()}")
|
103
|
+
class_name = f"{feats_type.capitalize()}Set"
|
104
|
+
return getattr(module, class_name)
|
nkululeko/modelrunner.py
CHANGED
@@ -39,7 +39,7 @@ class Modelrunner:
|
|
39
39
|
plot_epochs = self.util.config_val("PLOT", "epochs", False)
|
40
40
|
only_test = self.util.config_val("MODEL", "only_test", False)
|
41
41
|
epoch_num = int(self.util.config_val("EXP", "epochs", 1))
|
42
|
-
if not self.model.
|
42
|
+
if not self.model.is_ann() and epoch_num > 1:
|
43
43
|
self.util.warn(f"setting epoch num to 1 (was {epoch_num}) if model not ANN")
|
44
44
|
epoch_num = 1
|
45
45
|
glob_conf.config["EXP"]["epochs"] = "1"
|
@@ -69,7 +69,7 @@ class Modelrunner:
|
|
69
69
|
if plot_epochs:
|
70
70
|
self.util.debug(f"plotting conf matrix to {plot_name}")
|
71
71
|
report.plot_confmatrix(plot_name, epoch)
|
72
|
-
store_models = self.util.config_val("
|
72
|
+
store_models = self.util.config_val("EXP", "save", False)
|
73
73
|
plot_best_model = self.util.config_val("PLOT", "best_model", False)
|
74
74
|
if (store_models or plot_best_model) and (
|
75
75
|
not only_test
|
nkululeko/models/model.py
CHANGED
@@ -1,23 +1,25 @@
|
|
1
1
|
# model.py
|
2
|
-
from nkululeko.utils.util import Util
|
3
|
-
import pandas as pd
|
4
|
-
import numpy as np
|
5
|
-
import nkululeko.glob_conf as glob_conf
|
6
|
-
import sklearn.utils
|
7
|
-
from nkululeko.reporting.reporter import Reporter
|
8
2
|
import ast
|
9
|
-
from sklearn.model_selection import GridSearchCV
|
10
3
|
import pickle
|
11
4
|
import random
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
from sklearn.model_selection import GridSearchCV
|
12
9
|
from sklearn.model_selection import LeaveOneGroupOut
|
13
10
|
from sklearn.model_selection import StratifiedKFold
|
11
|
+
import sklearn.utils
|
12
|
+
|
13
|
+
import nkululeko.glob_conf as glob_conf
|
14
|
+
from nkululeko.reporting.reporter import Reporter
|
15
|
+
from nkululeko.utils.util import Util
|
14
16
|
|
15
17
|
|
16
18
|
class Model:
|
17
|
-
"""Generic model class for linear (non-neural) algorithms"""
|
19
|
+
"""Generic model class for linear (non-neural) algorithms."""
|
18
20
|
|
19
21
|
def __init__(self, df_train, df_test, feats_train, feats_test):
|
20
|
-
"""Constructor taking the configuration and all dataframes"""
|
22
|
+
"""Constructor taking the configuration and all dataframes."""
|
21
23
|
self.df_train, self.df_test, self.feats_train, self.feats_test = (
|
22
24
|
df_train,
|
23
25
|
df_test,
|
@@ -35,7 +37,7 @@ class Model:
|
|
35
37
|
def set_model_type(self, type):
|
36
38
|
self.model_type = type
|
37
39
|
|
38
|
-
def
|
40
|
+
def is_ann(self):
|
39
41
|
if self.model_type == "ann":
|
40
42
|
return True
|
41
43
|
else:
|
@@ -277,8 +279,6 @@ class Model:
|
|
277
279
|
prediction = {}
|
278
280
|
if self.util.exp_is_classification():
|
279
281
|
# get the class probabilities
|
280
|
-
if not self.get_type() == "xgb":
|
281
|
-
features = [features]
|
282
282
|
predictions = self.clf.predict_proba(features)
|
283
283
|
# pred = self.clf.predict(features)
|
284
284
|
for i in range(len(self.clf.classes_)):
|
@@ -302,7 +302,7 @@ class Model:
|
|
302
302
|
self.clf = pickle.load(handle)
|
303
303
|
except FileNotFoundError as fe:
|
304
304
|
self.util.error(
|
305
|
-
f"
|
305
|
+
f"Did you forget to store your models? needs: \n[MODEL]\nsave=True\n{fe}"
|
306
306
|
)
|
307
307
|
|
308
308
|
def load_path(self, path, run, epoch):
|
nkululeko/models/model_svm.py
CHANGED
nkululeko/multidb.py
CHANGED
@@ -3,23 +3,27 @@
|
|
3
3
|
|
4
4
|
import argparse
|
5
5
|
import ast
|
6
|
-
import
|
7
|
-
import
|
8
|
-
|
6
|
+
import configparser
|
7
|
+
import os
|
8
|
+
|
9
9
|
import matplotlib.cm as cm
|
10
|
+
import matplotlib.pyplot as plt
|
10
11
|
import numpy as np
|
11
|
-
import
|
12
|
+
import pandas as pd
|
13
|
+
import seaborn as sn
|
14
|
+
|
15
|
+
import nkululeko.glob_conf as glob_conf
|
16
|
+
from nkululeko.aug_train import doit as aug_train
|
12
17
|
from nkululeko.experiment import Experiment
|
13
|
-
import configparser
|
14
|
-
from nkululeko.utils.util import Util
|
15
18
|
from nkululeko.nkululeko import doit as nkulu
|
16
|
-
from nkululeko.
|
17
|
-
import nkululeko.glob_conf as glob_conf
|
19
|
+
from nkululeko.utils.util import Util
|
18
20
|
|
19
21
|
|
20
22
|
def main(src_dir):
|
21
|
-
parser = argparse.ArgumentParser(
|
22
|
-
|
23
|
+
parser = argparse.ArgumentParser(
|
24
|
+
description="Call the nkululeko MULTIDB framework.")
|
25
|
+
parser.add_argument("--config", default="exp.ini",
|
26
|
+
help="The base configuration")
|
23
27
|
args = parser.parse_args()
|
24
28
|
if args.config is not None:
|
25
29
|
config_file = args.config
|
@@ -54,7 +58,8 @@ def main(src_dir):
|
|
54
58
|
dataset = datasets[i]
|
55
59
|
print(f"running {dataset}")
|
56
60
|
if extra_trains:
|
57
|
-
extra_trains_1 = extra_trains.removeprefix(
|
61
|
+
extra_trains_1 = extra_trains.removeprefix(
|
62
|
+
"[").removesuffix("]")
|
58
63
|
config["DATA"]["databases"] = f"['{dataset}', {extra_trains_1}]"
|
59
64
|
extra_trains_2 = ast.literal_eval(extra_trains)
|
60
65
|
for extra_train in extra_trains_2:
|
@@ -67,7 +72,8 @@ def main(src_dir):
|
|
67
72
|
test = datasets[j]
|
68
73
|
print(f"running train: {train}, test: {test}")
|
69
74
|
if extra_trains:
|
70
|
-
extra_trains_1 = extra_trains.removeprefix(
|
75
|
+
extra_trains_1 = extra_trains.removeprefix(
|
76
|
+
"[").removesuffix("]")
|
71
77
|
config["DATA"][
|
72
78
|
"databases"
|
73
79
|
] = f"['{train}', '{test}', {extra_trains_1}]"
|
nkululeko/predict.py
CHANGED
@@ -1,17 +1,34 @@
|
|
1
1
|
# predict.py
|
2
|
-
# use some model and add automatically predicted labels to train and test splits
|
2
|
+
# use some model and add automatically predicted labels to train and test splits
|
3
|
+
# then save as a new dataset
|
4
|
+
|
5
|
+
"""This script is used to call the nkululeko PREDICT framework.
|
6
|
+
|
7
|
+
It loads a configuration file, creates a new experiment,
|
8
|
+
and performs automatic prediction on the train and test datasets. The predicted labels are added to the datasets and
|
9
|
+
saved as a new dataset.
|
10
|
+
|
11
|
+
Usage: \n
|
12
|
+
python3 -m nkululeko.predict [--config CONFIG_FILE] \n
|
13
|
+
|
14
|
+
Arguments: \n
|
15
|
+
--config (str): The path to the base configuration file (default: exp.ini)
|
16
|
+
"""
|
3
17
|
|
4
|
-
from nkululeko.experiment import Experiment
|
5
|
-
import configparser
|
6
|
-
from nkululeko.utils.util import Util
|
7
|
-
from nkululeko.constants import VERSION
|
8
18
|
import argparse
|
19
|
+
import configparser
|
9
20
|
import os
|
10
21
|
|
22
|
+
from nkululeko.constants import VERSION
|
23
|
+
from nkululeko.experiment import Experiment
|
24
|
+
from nkululeko.utils.util import Util
|
25
|
+
|
11
26
|
|
12
27
|
def main(src_dir):
|
13
|
-
parser = argparse.ArgumentParser(
|
14
|
-
|
28
|
+
parser = argparse.ArgumentParser(
|
29
|
+
description="Call the nkululeko PREDICT framework.")
|
30
|
+
parser.add_argument("--config", default="exp.ini",
|
31
|
+
help="The base configuration")
|
15
32
|
args = parser.parse_args()
|
16
33
|
if args.config is not None:
|
17
34
|
config_file = args.config
|
@@ -28,7 +45,9 @@ def main(src_dir):
|
|
28
45
|
config.read(config_file)
|
29
46
|
# create a new experiment
|
30
47
|
expr = Experiment(config)
|
31
|
-
|
48
|
+
module = "predict"
|
49
|
+
expr.set_module(module)
|
50
|
+
util = Util(module)
|
32
51
|
util.debug(
|
33
52
|
f"running {expr.name} from config {config_file}, nkululeko version"
|
34
53
|
f" {VERSION}"
|
@@ -39,7 +58,8 @@ def main(src_dir):
|
|
39
58
|
|
40
59
|
# split into train and test
|
41
60
|
expr.fill_train_and_tests()
|
42
|
-
util.debug(
|
61
|
+
util.debug(
|
62
|
+
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
|
43
63
|
|
44
64
|
# process the data
|
45
65
|
df = expr.autopredict()
|