nkululeko 0.81.4__py3-none-any.whl → 0.81.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/autopredict/estimate_snr.py +17 -6
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset.py +9 -2
- nkululeko/demo.py +20 -5
- nkululeko/demo_predictor.py +6 -3
- nkululeko/experiment.py +1 -1
- nkululeko/explore.py +13 -8
- nkululeko/feat_extract/feats_agender.py +1 -4
- nkululeko/feat_extract/{feats_audmodel_dim.py → feats_auddim.py} +5 -4
- nkululeko/feat_extract/feats_audmodel.py +5 -4
- nkululeko/feat_extract/feats_import.py +1 -1
- nkululeko/feat_extract/feats_mos.py +2 -1
- nkululeko/feat_extract/feats_praat.py +10 -8
- nkululeko/feat_extract/feats_snr.py +17 -9
- nkululeko/feat_extract/feats_squim.py +13 -16
- nkululeko/feature_extractor.py +72 -148
- nkululeko/multidb.py +18 -12
- nkululeko/predict.py +26 -8
- nkululeko/reporter.py +332 -0
- nkululeko/resample.py +12 -7
- nkululeko/runmanager.py +17 -8
- nkululeko/test.py +9 -6
- nkululeko/test_predictor.py +1 -0
- nkululeko/utils/stats.py +12 -5
- {nkululeko-0.81.4.dist-info → nkululeko-0.81.6.dist-info}/METADATA +11 -1
- {nkululeko-0.81.4.dist-info → nkululeko-0.81.6.dist-info}/RECORD +29 -28
- {nkululeko-0.81.4.dist-info → nkululeko-0.81.6.dist-info}/LICENSE +0 -0
- {nkululeko-0.81.4.dist-info → nkululeko-0.81.6.dist-info}/WHEEL +0 -0
- {nkululeko-0.81.4.dist-info → nkululeko-0.81.6.dist-info}/top_level.txt +0 -0
nkululeko/multidb.py
CHANGED
@@ -3,23 +3,27 @@
|
|
3
3
|
|
4
4
|
import argparse
|
5
5
|
import ast
|
6
|
-
import
|
7
|
-
import
|
8
|
-
|
6
|
+
import configparser
|
7
|
+
import os
|
8
|
+
|
9
9
|
import matplotlib.cm as cm
|
10
|
+
import matplotlib.pyplot as plt
|
10
11
|
import numpy as np
|
11
|
-
import
|
12
|
+
import pandas as pd
|
13
|
+
import seaborn as sn
|
14
|
+
|
15
|
+
import nkululeko.glob_conf as glob_conf
|
16
|
+
from nkululeko.aug_train import doit as aug_train
|
12
17
|
from nkululeko.experiment import Experiment
|
13
|
-
import configparser
|
14
|
-
from nkululeko.utils.util import Util
|
15
18
|
from nkululeko.nkululeko import doit as nkulu
|
16
|
-
from nkululeko.
|
17
|
-
import nkululeko.glob_conf as glob_conf
|
19
|
+
from nkululeko.utils.util import Util
|
18
20
|
|
19
21
|
|
20
22
|
def main(src_dir):
|
21
|
-
parser = argparse.ArgumentParser(
|
22
|
-
|
23
|
+
parser = argparse.ArgumentParser(
|
24
|
+
description="Call the nkululeko MULTIDB framework.")
|
25
|
+
parser.add_argument("--config", default="exp.ini",
|
26
|
+
help="The base configuration")
|
23
27
|
args = parser.parse_args()
|
24
28
|
if args.config is not None:
|
25
29
|
config_file = args.config
|
@@ -54,7 +58,8 @@ def main(src_dir):
|
|
54
58
|
dataset = datasets[i]
|
55
59
|
print(f"running {dataset}")
|
56
60
|
if extra_trains:
|
57
|
-
extra_trains_1 = extra_trains.removeprefix(
|
61
|
+
extra_trains_1 = extra_trains.removeprefix(
|
62
|
+
"[").removesuffix("]")
|
58
63
|
config["DATA"]["databases"] = f"['{dataset}', {extra_trains_1}]"
|
59
64
|
extra_trains_2 = ast.literal_eval(extra_trains)
|
60
65
|
for extra_train in extra_trains_2:
|
@@ -67,7 +72,8 @@ def main(src_dir):
|
|
67
72
|
test = datasets[j]
|
68
73
|
print(f"running train: {train}, test: {test}")
|
69
74
|
if extra_trains:
|
70
|
-
extra_trains_1 = extra_trains.removeprefix(
|
75
|
+
extra_trains_1 = extra_trains.removeprefix(
|
76
|
+
"[").removesuffix("]")
|
71
77
|
config["DATA"][
|
72
78
|
"databases"
|
73
79
|
] = f"['{train}', '{test}', {extra_trains_1}]"
|
nkululeko/predict.py
CHANGED
@@ -1,17 +1,34 @@
|
|
1
1
|
# predict.py
|
2
|
-
# use some model and add automatically predicted labels to train and test splits
|
2
|
+
# use some model and add automatically predicted labels to train and test splits
|
3
|
+
# then save as a new dataset
|
4
|
+
|
5
|
+
"""This script is used to call the nkululeko PREDICT framework.
|
6
|
+
|
7
|
+
It loads a configuration file, creates a new experiment,
|
8
|
+
and performs automatic prediction on the train and test datasets. The predicted labels are added to the datasets and
|
9
|
+
saved as a new dataset.
|
10
|
+
|
11
|
+
Usage: \n
|
12
|
+
python3 -m nkululeko.predict [--config CONFIG_FILE] \n
|
13
|
+
|
14
|
+
Arguments: \n
|
15
|
+
--config (str): The path to the base configuration file (default: exp.ini)
|
16
|
+
"""
|
3
17
|
|
4
|
-
from nkululeko.experiment import Experiment
|
5
|
-
import configparser
|
6
|
-
from nkululeko.utils.util import Util
|
7
|
-
from nkululeko.constants import VERSION
|
8
18
|
import argparse
|
19
|
+
import configparser
|
9
20
|
import os
|
10
21
|
|
22
|
+
from nkululeko.constants import VERSION
|
23
|
+
from nkululeko.experiment import Experiment
|
24
|
+
from nkululeko.utils.util import Util
|
25
|
+
|
11
26
|
|
12
27
|
def main(src_dir):
|
13
|
-
parser = argparse.ArgumentParser(
|
14
|
-
|
28
|
+
parser = argparse.ArgumentParser(
|
29
|
+
description="Call the nkululeko PREDICT framework.")
|
30
|
+
parser.add_argument("--config", default="exp.ini",
|
31
|
+
help="The base configuration")
|
15
32
|
args = parser.parse_args()
|
16
33
|
if args.config is not None:
|
17
34
|
config_file = args.config
|
@@ -41,7 +58,8 @@ def main(src_dir):
|
|
41
58
|
|
42
59
|
# split into train and test
|
43
60
|
expr.fill_train_and_tests()
|
44
|
-
util.debug(
|
61
|
+
util.debug(
|
62
|
+
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
|
45
63
|
|
46
64
|
# process the data
|
47
65
|
df = expr.autopredict()
|
nkululeko/reporter.py
ADDED
@@ -0,0 +1,332 @@
|
|
1
|
+
"""Reporter module.
|
2
|
+
|
3
|
+
This module contains the Reporter class which is responsible for generating reports.
|
4
|
+
"""
|
5
|
+
|
6
|
+
|
7
|
+
import ast
|
8
|
+
import glob
|
9
|
+
import json
|
10
|
+
import math
|
11
|
+
|
12
|
+
import matplotlib.pyplot as plt
|
13
|
+
import numpy as np
|
14
|
+
from scipy.stats import pearsonr
|
15
|
+
from sklearn.metrics import (
|
16
|
+
ConfusionMatrixDisplay,
|
17
|
+
accuracy_score,
|
18
|
+
classification_report,
|
19
|
+
confusion_matrix,
|
20
|
+
mean_absolute_error,
|
21
|
+
mean_squared_error,
|
22
|
+
r2_score,
|
23
|
+
recall_score,
|
24
|
+
)
|
25
|
+
from sklearn.utils import resample
|
26
|
+
|
27
|
+
import nkululeko.glob_conf as glob_conf
|
28
|
+
from nkululeko.reporting.defines import Header
|
29
|
+
from nkululeko.reporting.report_item import ReportItem
|
30
|
+
from nkululeko.result import Result
|
31
|
+
from nkululeko.utils.util import Util
|
32
|
+
|
33
|
+
|
34
|
+
class Reporter:
|
35
|
+
def __set_measure(self):
|
36
|
+
if self.util.exp_is_classification():
|
37
|
+
self.MEASURE = "UAR"
|
38
|
+
self.result.measure = self.MEASURE
|
39
|
+
self.is_classification = True
|
40
|
+
else:
|
41
|
+
self.is_classification = False
|
42
|
+
self.measure = self.util.config_val("MODEL", "measure", "mse")
|
43
|
+
if self.measure == "mse":
|
44
|
+
self.MEASURE = "MSE"
|
45
|
+
self.result.measure = self.MEASURE
|
46
|
+
elif self.measure == "mae":
|
47
|
+
self.MEASURE = "MAE"
|
48
|
+
self.result.measure = self.MEASURE
|
49
|
+
elif self.measure == "ccc":
|
50
|
+
self.MEASURE = "CCC"
|
51
|
+
self.result.measure = self.MEASURE
|
52
|
+
|
53
|
+
def __init__(self, truths, preds, run, epoch):
|
54
|
+
"""Initialization with ground truth und predictions vector"""
|
55
|
+
self.util = Util("reporter")
|
56
|
+
self.format = self.util.config_val("PLOT", "format", "png")
|
57
|
+
self.truths = truths
|
58
|
+
self.preds = preds
|
59
|
+
self.result = Result(0, 0, 0, 0, "unknown")
|
60
|
+
self.run = run
|
61
|
+
self.epoch = epoch
|
62
|
+
self.__set_measure()
|
63
|
+
self.cont_to_cat = False
|
64
|
+
if len(self.truths) > 0 and len(self.preds) > 0:
|
65
|
+
if self.util.exp_is_classification():
|
66
|
+
self.result.test = recall_score(
|
67
|
+
self.truths, self.preds, average="macro"
|
68
|
+
)
|
69
|
+
self.result.loss = 1 - accuracy_score(self.truths, self.preds)
|
70
|
+
else:
|
71
|
+
# regression experiment
|
72
|
+
if self.measure == "mse":
|
73
|
+
self.result.test = mean_squared_error(
|
74
|
+
self.truths, self.preds)
|
75
|
+
elif self.measure == "mae":
|
76
|
+
self.result.test = mean_absolute_error(
|
77
|
+
self.truths, self.preds)
|
78
|
+
elif self.measure == "ccc":
|
79
|
+
self.result.test = self.ccc(self.truths, self.preds)
|
80
|
+
if math.isnan(self.result.test):
|
81
|
+
self.util.debug(f"Truth: {self.truths}")
|
82
|
+
self.util.debug(f"Predict.: {self.preds}")
|
83
|
+
self.util.debug(f"Result is NAN: setting to -1")
|
84
|
+
self.result.test = -1
|
85
|
+
else:
|
86
|
+
self.util.error(f"unknown measure: {self.measure}")
|
87
|
+
|
88
|
+
# train and loss are being set by the model
|
89
|
+
|
90
|
+
def set_id(self, run, epoch):
|
91
|
+
"""Make the report identifiable with run and epoch index"""
|
92
|
+
self.run = run
|
93
|
+
self.epoch = epoch
|
94
|
+
|
95
|
+
def continuous_to_categorical(self):
|
96
|
+
if self.cont_to_cat:
|
97
|
+
return
|
98
|
+
self.cont_to_cat = True
|
99
|
+
bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
|
100
|
+
self.truths = np.digitize(self.truths, bins) - 1
|
101
|
+
self.preds = np.digitize(self.preds, bins) - 1
|
102
|
+
|
103
|
+
def plot_confmatrix(self, plot_name, epoch):
|
104
|
+
if not self.util.exp_is_classification():
|
105
|
+
self.continuous_to_categorical()
|
106
|
+
self._plot_confmat(self.truths, self.preds, plot_name, epoch)
|
107
|
+
|
108
|
+
|
109
|
+
def plot_per_speaker(self, result_df, plot_name, function):
|
110
|
+
"""Plot a confusion matrix with the mode category per speakers.
|
111
|
+
|
112
|
+
This function creates a confusion matrix for each speaker in the result_df.
|
113
|
+
The result_df should contain the columns: preds, truths and speaker.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
* result_df: a pandas dataframe with columns: preds, truths and speaker
|
117
|
+
* plot_name: a string with the name of the plot
|
118
|
+
* function: a string with the function to use for each speaker,
|
119
|
+
can be 'mode' or 'mean'
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
* None
|
123
|
+
"""
|
124
|
+
# Initialize empty arrays for predictions and truths
|
125
|
+
pred = np.zeros(0)
|
126
|
+
truth = np.zeros(0)
|
127
|
+
|
128
|
+
# Iterate over each speaker
|
129
|
+
for s in result_df.speaker.unique():
|
130
|
+
# Filter the dataframe for the current speaker
|
131
|
+
s_df = result_df[result_df.speaker == s]
|
132
|
+
|
133
|
+
# Get the mode or mean prediction for the current speaker
|
134
|
+
mode = s_df.pred.mode().iloc[-1]
|
135
|
+
mean = s_df.pred.mean()
|
136
|
+
if function == "mode":
|
137
|
+
s_df.pred = mode
|
138
|
+
elif function == "mean":
|
139
|
+
s_df.pred = mean
|
140
|
+
else:
|
141
|
+
self.util.error(f"unknown function {function}")
|
142
|
+
|
143
|
+
# Append the current speaker's predictions and truths to the arrays
|
144
|
+
pred = np.append(pred, s_df.pred.values)
|
145
|
+
truth = np.append(truth, s_df["truth"].values)
|
146
|
+
|
147
|
+
# If the experiment is not a classification or continuous to categorical conversion was performed,
|
148
|
+
# convert the truths and predictions to categorical
|
149
|
+
if not (self.is_classification or self.cont_to_cat):
|
150
|
+
bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
|
151
|
+
truth = np.digitize(truth, bins) - 1
|
152
|
+
pred = np.digitize(pred, bins) - 1
|
153
|
+
|
154
|
+
# Plot the confusion matrix for the speakers
|
155
|
+
self._plot_confmat(truth, pred.astype("int"), plot_name, 0)
|
156
|
+
|
157
|
+
def _plot_confmat(self, truths, preds, plot_name, epoch):
|
158
|
+
# print(truths)
|
159
|
+
# print(preds)
|
160
|
+
fig_dir = self.util.get_path("fig_dir")
|
161
|
+
labels = glob_conf.labels
|
162
|
+
fig = plt.figure() # figsize=[5, 5]
|
163
|
+
uar = recall_score(truths, preds, average="macro")
|
164
|
+
acc = accuracy_score(truths, preds)
|
165
|
+
cm = confusion_matrix(
|
166
|
+
truths, preds, normalize=None
|
167
|
+
) # normalize must be one of {'true', 'pred', 'all', None}
|
168
|
+
if cm.shape[0] != len(labels):
|
169
|
+
self.util.error(
|
170
|
+
f"mismatch between confmatrix dim ({cm.shape[0]}) and labels"
|
171
|
+
f" length ({len(labels)}: {labels})"
|
172
|
+
)
|
173
|
+
try:
|
174
|
+
disp = ConfusionMatrixDisplay(
|
175
|
+
confusion_matrix=cm, display_labels=labels
|
176
|
+
).plot(cmap="Blues")
|
177
|
+
except ValueError:
|
178
|
+
disp = ConfusionMatrixDisplay(
|
179
|
+
confusion_matrix=cm,
|
180
|
+
display_labels=list(labels).remove("neutral"),
|
181
|
+
).plot(cmap="Blues")
|
182
|
+
|
183
|
+
reg_res = ""
|
184
|
+
if not self.is_classification:
|
185
|
+
reg_res = f", {self.MEASURE}: {self.result.test:.3f}"
|
186
|
+
|
187
|
+
if epoch != 0:
|
188
|
+
plt.title(
|
189
|
+
f"Confusion Matrix, UAR: {uar:.3f}{reg_res}, Epoch: {epoch}")
|
190
|
+
else:
|
191
|
+
plt.title(f"Confusion Matrix, UAR: {uar:.3f}{reg_res}")
|
192
|
+
img_path = f"{fig_dir}{plot_name}.{self.format}"
|
193
|
+
plt.savefig(img_path)
|
194
|
+
fig.clear()
|
195
|
+
plt.close(fig)
|
196
|
+
plt.savefig(img_path)
|
197
|
+
plt.close(fig)
|
198
|
+
glob_conf.report.add_item(
|
199
|
+
ReportItem(
|
200
|
+
Header.HEADER_RESULTS,
|
201
|
+
self.util.get_model_description(),
|
202
|
+
"Confusion matrix",
|
203
|
+
img_path,
|
204
|
+
)
|
205
|
+
)
|
206
|
+
|
207
|
+
res_dir = self.util.get_path("res_dir")
|
208
|
+
uar = int(uar * 1000) / 1000.0
|
209
|
+
acc = int(acc * 1000) / 1000.0
|
210
|
+
rpt = f"epoch: {epoch}, UAR: {uar}, ACC: {acc}"
|
211
|
+
# print(rpt)
|
212
|
+
self.util.debug(rpt)
|
213
|
+
file_name = f"{res_dir}{self.util.get_exp_name()}_conf.txt"
|
214
|
+
with open(file_name, "w") as text_file:
|
215
|
+
text_file.write(rpt)
|
216
|
+
|
217
|
+
def print_results(self, epoch):
|
218
|
+
"""Print all evaluation values to text file"""
|
219
|
+
res_dir = self.util.get_path("res_dir")
|
220
|
+
file_name = f"{res_dir}{self.util.get_exp_name()}_{epoch}.txt"
|
221
|
+
if self.util.exp_is_classification():
|
222
|
+
labels = glob_conf.labels
|
223
|
+
try:
|
224
|
+
rpt = classification_report(
|
225
|
+
self.truths,
|
226
|
+
self.preds,
|
227
|
+
target_names=labels,
|
228
|
+
output_dict=True,
|
229
|
+
)
|
230
|
+
except ValueError as e:
|
231
|
+
self.util.debug(
|
232
|
+
"Reporter: caught a ValueError when trying to get"
|
233
|
+
" classification_report: " + e
|
234
|
+
)
|
235
|
+
rpt = self.result.to_string()
|
236
|
+
with open(file_name, "w") as text_file:
|
237
|
+
c_ress = list(range(len(labels)))
|
238
|
+
for i, l in enumerate(labels):
|
239
|
+
c_res = rpt[l]["f1-score"]
|
240
|
+
c_ress[i] = float(f"{c_res:.3f}")
|
241
|
+
self.util.debug(f"labels: {labels}")
|
242
|
+
f1_per_class = f"result per class (F1 score): {c_ress}"
|
243
|
+
self.util.debug(f1_per_class)
|
244
|
+
rpt_str = f"{json.dumps(rpt)}\n{f1_per_class}"
|
245
|
+
text_file.write(rpt_str)
|
246
|
+
glob_conf.report.add_item(
|
247
|
+
ReportItem(
|
248
|
+
Header.HEADER_RESULTS,
|
249
|
+
f"Classification result {self.util.get_model_description()}",
|
250
|
+
rpt_str,
|
251
|
+
)
|
252
|
+
)
|
253
|
+
|
254
|
+
else: # regression
|
255
|
+
result = self.result.test
|
256
|
+
r2 = r2_score(self.truths, self.preds)
|
257
|
+
pcc = pearsonr(self.truths, self.preds)[0]
|
258
|
+
measure = self.util.config_val("MODEL", "measure", "mse")
|
259
|
+
with open(file_name, "w") as text_file:
|
260
|
+
text_file.write(
|
261
|
+
f"{measure}: {result:.3f}, r_2: {r2:.3f}, pcc {pcc:.3f}"
|
262
|
+
)
|
263
|
+
|
264
|
+
def make_conf_animation(self, out_name):
|
265
|
+
import imageio
|
266
|
+
|
267
|
+
fig_dir = self.util.get_path("fig_dir")
|
268
|
+
filenames = glob.glob(
|
269
|
+
fig_dir + f"{self.util.get_plot_name()}*_?_???_cnf.png")
|
270
|
+
images = []
|
271
|
+
for filename in filenames:
|
272
|
+
images.append(imageio.imread(filename))
|
273
|
+
fps = self.util.config_val("PLOT", "fps", "1")
|
274
|
+
try:
|
275
|
+
imageio.mimsave(fig_dir + out_name, images, fps=int(fps))
|
276
|
+
except RuntimeError as e:
|
277
|
+
self.util.error("error writing anim gif: " + e)
|
278
|
+
|
279
|
+
def get_result(self):
|
280
|
+
return self.result
|
281
|
+
|
282
|
+
def plot_epoch_progression(self, reports, out_name):
|
283
|
+
fig_dir = self.util.get_path("fig_dir")
|
284
|
+
results, losses, train_results, losses_eval = [], [], [], []
|
285
|
+
for r in reports:
|
286
|
+
results.append(r.get_result().test)
|
287
|
+
losses.append(r.get_result().loss)
|
288
|
+
train_results.append(r.get_result().train)
|
289
|
+
losses_eval.append(r.get_result().loss_eval)
|
290
|
+
|
291
|
+
# do a plot per run
|
292
|
+
# scale the losses so they fit on the picture
|
293
|
+
losses, results, train_results, losses_eval = (
|
294
|
+
np.asarray(losses),
|
295
|
+
np.asarray(results),
|
296
|
+
np.asarray(train_results),
|
297
|
+
np.asarray(losses_eval),
|
298
|
+
)
|
299
|
+
|
300
|
+
if np.all((results > 1)):
|
301
|
+
# scale down values
|
302
|
+
results = results / 100.0
|
303
|
+
train_results = train_results / 100.0
|
304
|
+
# if np.all((losses < 1)):
|
305
|
+
# scale up values
|
306
|
+
plt.figure(dpi=200)
|
307
|
+
plt.plot(train_results, "green", label="train set")
|
308
|
+
plt.plot(results, "red", label="dev set")
|
309
|
+
plt.plot(losses, "black", label="losses")
|
310
|
+
plt.plot(losses_eval, "grey", label="losses_eval")
|
311
|
+
plt.xlabel("epochs")
|
312
|
+
plt.ylabel(f"{self.MEASURE}")
|
313
|
+
plt.legend()
|
314
|
+
plt.savefig(f"{fig_dir}{out_name}.{self.format}")
|
315
|
+
plt.close()
|
316
|
+
|
317
|
+
@staticmethod
|
318
|
+
def ccc(ground_truth, prediction):
|
319
|
+
mean_gt = np.mean(ground_truth, 0)
|
320
|
+
mean_pred = np.mean(prediction, 0)
|
321
|
+
var_gt = np.var(ground_truth, 0)
|
322
|
+
var_pred = np.var(prediction, 0)
|
323
|
+
v_pred = prediction - mean_pred
|
324
|
+
v_gt = ground_truth - mean_gt
|
325
|
+
cor = sum(v_pred * v_gt) / \
|
326
|
+
(np.sqrt(sum(v_pred**2)) * np.sqrt(sum(v_gt**2)))
|
327
|
+
sd_gt = np.std(ground_truth)
|
328
|
+
sd_pred = np.std(prediction)
|
329
|
+
numerator = 2 * cor * sd_gt * sd_pred
|
330
|
+
denominator = var_gt + var_pred + (mean_gt - mean_pred) ** 2
|
331
|
+
ccc = numerator / denominator
|
332
|
+
return ccc
|
nkululeko/resample.py
CHANGED
@@ -1,19 +1,23 @@
|
|
1
1
|
# resample.py
|
2
2
|
# change the sampling rate for train and test splits
|
3
3
|
|
4
|
-
from nkululeko.experiment import Experiment
|
5
|
-
import configparser
|
6
|
-
from nkululeko.utils.util import Util
|
7
|
-
from nkululeko.constants import VERSION
|
8
4
|
import argparse
|
5
|
+
import configparser
|
9
6
|
import os
|
7
|
+
|
10
8
|
import pandas as pd
|
9
|
+
|
11
10
|
from nkululeko.augmenting.resampler import Resampler
|
11
|
+
from nkululeko.constants import VERSION
|
12
|
+
from nkululeko.experiment import Experiment
|
13
|
+
from nkululeko.utils.util import Util
|
12
14
|
|
13
15
|
|
14
16
|
def main(src_dir):
|
15
|
-
parser = argparse.ArgumentParser(
|
16
|
-
|
17
|
+
parser = argparse.ArgumentParser(
|
18
|
+
description="Call the nkululeko RESAMPLE framework.")
|
19
|
+
parser.add_argument("--config", default="exp.ini",
|
20
|
+
help="The base configuration")
|
17
21
|
args = parser.parse_args()
|
18
22
|
if args.config is not None:
|
19
23
|
config_file = args.config
|
@@ -48,7 +52,8 @@ def main(src_dir):
|
|
48
52
|
|
49
53
|
# split into train and test
|
50
54
|
expr.fill_train_and_tests()
|
51
|
-
util.debug(
|
55
|
+
util.debug(
|
56
|
+
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
|
52
57
|
|
53
58
|
sample_selection = util.config_val("RESAMPLE", "sample_selection", "all")
|
54
59
|
if sample_selection == "all":
|
nkululeko/runmanager.py
CHANGED
@@ -1,9 +1,13 @@
|
|
1
|
-
|
1
|
+
"""Runmanager module.
|
2
|
+
|
3
|
+
This module contains the Runmanager class which is responsible for managing the
|
4
|
+
runs of the experiment.
|
5
|
+
"""
|
2
6
|
|
3
|
-
from nkululeko.reporting.reporter import Reporter
|
4
|
-
from nkululeko.utils.util import Util
|
5
7
|
import nkululeko.glob_conf as glob_conf
|
6
8
|
from nkululeko.modelrunner import Modelrunner
|
9
|
+
from nkululeko.reporting.reporter import Reporter
|
10
|
+
from nkululeko.utils.util import Util
|
7
11
|
|
8
12
|
|
9
13
|
class Runmanager:
|
@@ -59,7 +63,8 @@ class Runmanager:
|
|
59
63
|
)
|
60
64
|
self.reports, last_epoch = self.modelrunner.do_epochs()
|
61
65
|
# wrap up the run
|
62
|
-
plot_anim_progression = self.util.config_val(
|
66
|
+
plot_anim_progression = self.util.config_val(
|
67
|
+
"PLOT", "anim_progression", 0)
|
63
68
|
if plot_anim_progression:
|
64
69
|
plot_name_suggest = self.util.get_exp_name()
|
65
70
|
plot_name = (
|
@@ -83,7 +88,8 @@ class Runmanager:
|
|
83
88
|
+ "_epoch_progression"
|
84
89
|
)
|
85
90
|
self.util.debug(f"plotting progression to {plot_name}")
|
86
|
-
self.reports[-1].plot_epoch_progression(
|
91
|
+
self.reports[-1].plot_epoch_progression(
|
92
|
+
self.reports, plot_name)
|
87
93
|
# remember the best run
|
88
94
|
best_report = self.get_best_result(self.reports)
|
89
95
|
plot_best_model = self.util.config_val("PLOT", "best_model", False)
|
@@ -122,7 +128,8 @@ class Runmanager:
|
|
122
128
|
self.print_model(best_report, plot_name)
|
123
129
|
|
124
130
|
def print_given_result(self, run, epoch):
|
125
|
-
"""Print a result (confusion matrix) for a given epoch and run
|
131
|
+
"""Print a result (confusion matrix) for a given epoch and run.
|
132
|
+
|
126
133
|
Args:
|
127
134
|
run: for which run
|
128
135
|
epoch: for which epoch
|
@@ -139,7 +146,8 @@ class Runmanager:
|
|
139
146
|
self.print_model(report, plot_name)
|
140
147
|
|
141
148
|
def print_model(self, report, plot_name):
|
142
|
-
"""Print a confusion matrix for a special report
|
149
|
+
"""Print a confusion matrix for a special report.
|
150
|
+
|
143
151
|
Args:
|
144
152
|
report: for which report (will be computed newly from model)
|
145
153
|
plot_name: name of plot file
|
@@ -152,7 +160,8 @@ class Runmanager:
|
|
152
160
|
report.print_results(epoch)
|
153
161
|
|
154
162
|
def load_model(self, report):
|
155
|
-
"""Load a model from disk for a specific run and epoch and evaluate
|
163
|
+
"""Load a model from disk for a specific run and epoch and evaluate it.
|
164
|
+
|
156
165
|
Args:
|
157
166
|
report: for which report (will be re-evaluated)
|
158
167
|
|
nkululeko/test.py
CHANGED
@@ -1,17 +1,20 @@
|
|
1
1
|
# test.py
|
2
2
|
# Just use a database as test
|
3
3
|
|
4
|
-
from nkululeko.experiment import Experiment
|
5
|
-
import configparser
|
6
|
-
from nkululeko.utils.util import Util
|
7
|
-
from nkululeko.constants import VERSION
|
8
4
|
import argparse
|
5
|
+
import configparser
|
9
6
|
import os
|
10
7
|
|
8
|
+
from nkululeko.constants import VERSION
|
9
|
+
from nkululeko.experiment import Experiment
|
10
|
+
from nkululeko.utils.util import Util
|
11
|
+
|
11
12
|
|
12
13
|
def main(src_dir):
|
13
|
-
parser = argparse.ArgumentParser(
|
14
|
-
|
14
|
+
parser = argparse.ArgumentParser(
|
15
|
+
description="Call the nkululeko TEST framework.")
|
16
|
+
parser.add_argument("--config", default="exp.ini",
|
17
|
+
help="The base configuration")
|
15
18
|
parser.add_argument(
|
16
19
|
"--outfile",
|
17
20
|
default="my_results.csv",
|
nkululeko/test_predictor.py
CHANGED
nkululeko/utils/stats.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
from itertools import combinations
|
2
1
|
import math
|
2
|
+
from itertools import combinations
|
3
|
+
|
3
4
|
import numpy as np
|
4
5
|
import pandas as pd
|
5
6
|
|
@@ -16,7 +17,8 @@ def check_na(a):
|
|
16
17
|
|
17
18
|
def cohen_d(d1, d2):
|
18
19
|
"""
|
19
|
-
Compute Cohen's d from two distributions of real valued arrays
|
20
|
+
Compute Cohen's d from two distributions of real valued arrays.
|
21
|
+
|
20
22
|
Args:
|
21
23
|
d1: one array
|
22
24
|
d2: the other array
|
@@ -50,8 +52,10 @@ def all_combinations(items_list):
|
|
50
52
|
|
51
53
|
|
52
54
|
def get_effect_size(df, target, variable):
|
53
|
-
"""
|
54
|
-
|
55
|
+
"""Get the effect size as Cohen's D.
|
56
|
+
|
57
|
+
Effect size is computed from a real numbered variable on a categorical target.
|
58
|
+
|
55
59
|
Args:
|
56
60
|
df: a pd.Dataframe with at least target and variable as columns
|
57
61
|
target: the categorical target, e.g. emotion
|
@@ -88,7 +92,10 @@ def cohens_D_to_string(val):
|
|
88
92
|
|
89
93
|
|
90
94
|
def normalize(values):
|
91
|
-
"""Do a z-transformation of a distribution
|
95
|
+
"""Do a z-transformation of a distribution.
|
96
|
+
|
97
|
+
So that mean = 0 and variance = 1
|
98
|
+
"""
|
92
99
|
from sklearn.preprocessing import StandardScaler
|
93
100
|
|
94
101
|
scaler = StandardScaler()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.81.
|
3
|
+
Version: 0.81.6
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -323,6 +323,16 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
323
323
|
Changelog
|
324
324
|
=========
|
325
325
|
|
326
|
+
Version 0.81.6
|
327
|
+
--------------
|
328
|
+
* updated documentation
|
329
|
+
* updated crema-d
|
330
|
+
* updated tests
|
331
|
+
|
332
|
+
Version 0.81.5
|
333
|
+
--------------
|
334
|
+
* added sex=gender for speaker mappings
|
335
|
+
|
326
336
|
Version 0.81.4
|
327
337
|
--------------
|
328
338
|
* fixed bug in demo module
|