nkululeko 0.88.1__py3-none-any.whl → 0.88.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +24 -37
- nkululeko/reporting/reporter.py +115 -86
- nkululeko/reporting/result.py +13 -5
- nkululeko/utils/util.py +27 -20
- {nkululeko-0.88.1.dist-info → nkululeko-0.88.2.dist-info}/METADATA +5 -1
- {nkululeko-0.88.1.dist-info → nkululeko-0.88.2.dist-info}/RECORD +10 -10
- {nkululeko-0.88.1.dist-info → nkululeko-0.88.2.dist-info}/LICENSE +0 -0
- {nkululeko-0.88.1.dist-info → nkululeko-0.88.2.dist-info}/WHEEL +0 -0
- {nkululeko-0.88.1.dist-info → nkululeko-0.88.2.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.88.
|
1
|
+
VERSION="0.88.2"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/experiment.py
CHANGED
@@ -107,8 +107,7 @@ class Experiment:
|
|
107
107
|
# print keys/column
|
108
108
|
dbs = ",".join(list(self.datasets.keys()))
|
109
109
|
labels = self.util.config_val("DATA", "labels", False)
|
110
|
-
auto_labels = list(
|
111
|
-
next(iter(self.datasets.values())).df[self.target].unique())
|
110
|
+
auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
|
112
111
|
if labels:
|
113
112
|
self.labels = ast.literal_eval(labels)
|
114
113
|
self.util.debug(f"Using target labels (from config): {labels}")
|
@@ -158,8 +157,7 @@ class Experiment:
|
|
158
157
|
data.split()
|
159
158
|
data.prepare_labels()
|
160
159
|
self.df_test = pd.concat(
|
161
|
-
[self.df_test, self.util.make_segmented_index(
|
162
|
-
data.df_test)]
|
160
|
+
[self.df_test, self.util.make_segmented_index(data.df_test)]
|
163
161
|
)
|
164
162
|
self.df_test.is_labeled = data.is_labeled
|
165
163
|
self.df_test.got_gender = self.got_gender
|
@@ -260,8 +258,7 @@ class Experiment:
|
|
260
258
|
test_cats = self.df_test[self.target].unique()
|
261
259
|
else:
|
262
260
|
# if there is no target, copy a dummy label
|
263
|
-
self.df_test = self._add_random_target(
|
264
|
-
self.df_test).astype("str")
|
261
|
+
self.df_test = self._add_random_target(self.df_test).astype("str")
|
265
262
|
train_cats = self.df_train[self.target].unique()
|
266
263
|
# print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
|
267
264
|
# print(f"train_cats with target {self.target}: {train_cats}")
|
@@ -269,8 +266,7 @@ class Experiment:
|
|
269
266
|
if type(test_cats) == np.ndarray:
|
270
267
|
self.util.debug(f"Categories test (nd.array): {test_cats}")
|
271
268
|
else:
|
272
|
-
self.util.debug(
|
273
|
-
f"Categories test (list): {list(test_cats)}")
|
269
|
+
self.util.debug(f"Categories test (list): {list(test_cats)}")
|
274
270
|
if type(train_cats) == np.ndarray:
|
275
271
|
self.util.debug(f"Categories train (nd.array): {train_cats}")
|
276
272
|
else:
|
@@ -293,8 +289,7 @@ class Experiment:
|
|
293
289
|
|
294
290
|
target_factor = self.util.config_val("DATA", "target_divide_by", False)
|
295
291
|
if target_factor:
|
296
|
-
self.df_test[self.target] = self.df_test[self.target] /
|
297
|
-
float(target_factor)
|
292
|
+
self.df_test[self.target] = self.df_test[self.target] / float(target_factor)
|
298
293
|
self.df_train[self.target] = self.df_train[self.target] / float(
|
299
294
|
target_factor
|
300
295
|
)
|
@@ -317,16 +312,14 @@ class Experiment:
|
|
317
312
|
def plot_distribution(self, df_labels):
|
318
313
|
"""Plot the distribution of samples and speaker per target class and biological sex"""
|
319
314
|
plot = Plots()
|
320
|
-
sample_selection = self.util.config_val(
|
321
|
-
"EXPL", "sample_selection", "all")
|
315
|
+
sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
|
322
316
|
plot.plot_distributions(df_labels)
|
323
317
|
if self.got_speaker:
|
324
318
|
plot.plot_distributions_speaker(df_labels)
|
325
319
|
|
326
320
|
def extract_test_feats(self):
|
327
321
|
self.feats_test = pd.DataFrame()
|
328
|
-
feats_name = "_".join(ast.literal_eval(
|
329
|
-
glob_conf.config["DATA"]["tests"]))
|
322
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["tests"]))
|
330
323
|
feats_types = self.util.config_val_list("FEATS", "type", ["os"])
|
331
324
|
self.feature_extractor = FeatureExtractor(
|
332
325
|
self.df_test, feats_types, feats_name, "test"
|
@@ -343,8 +336,7 @@ class Experiment:
|
|
343
336
|
|
344
337
|
"""
|
345
338
|
df_train, df_test = self.df_train, self.df_test
|
346
|
-
feats_name = "_".join(ast.literal_eval(
|
347
|
-
glob_conf.config["DATA"]["databases"]))
|
339
|
+
feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
|
348
340
|
self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
|
349
341
|
feats_types = self.util.config_val("FEATS", "type", "os")
|
350
342
|
# Ensure feats_types is always a list of strings
|
@@ -385,8 +377,7 @@ class Experiment:
|
|
385
377
|
f"test feats ({self.feats_test.shape[0]}) != test labels"
|
386
378
|
f" ({self.df_test.shape[0]})"
|
387
379
|
)
|
388
|
-
self.df_test = self.df_test[self.df_test.index.isin(
|
389
|
-
self.feats_test.index)]
|
380
|
+
self.df_test = self.df_test[self.df_test.index.isin(self.feats_test.index)]
|
390
381
|
self.util.warn(f"new test labels shape: {self.df_test.shape[0]}")
|
391
382
|
|
392
383
|
self._check_scale()
|
@@ -401,8 +392,7 @@ class Experiment:
|
|
401
392
|
"""Augment the selected samples."""
|
402
393
|
from nkululeko.augmenting.augmenter import Augmenter
|
403
394
|
|
404
|
-
sample_selection = self.util.config_val(
|
405
|
-
"AUGMENT", "sample_selection", "all")
|
395
|
+
sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
|
406
396
|
if sample_selection == "all":
|
407
397
|
df = pd.concat([self.df_train, self.df_test])
|
408
398
|
elif sample_selection == "train":
|
@@ -497,8 +487,7 @@ class Experiment:
|
|
497
487
|
"""
|
498
488
|
from nkululeko.augmenting.randomsplicer import Randomsplicer
|
499
489
|
|
500
|
-
sample_selection = self.util.config_val(
|
501
|
-
"AUGMENT", "sample_selection", "all")
|
490
|
+
sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
|
502
491
|
if sample_selection == "all":
|
503
492
|
df = pd.concat([self.df_train, self.df_test])
|
504
493
|
elif sample_selection == "train":
|
@@ -519,8 +508,7 @@ class Experiment:
|
|
519
508
|
plot_feats = eval(
|
520
509
|
self.util.config_val("EXPL", "feature_distributions", "False")
|
521
510
|
)
|
522
|
-
sample_selection = self.util.config_val(
|
523
|
-
"EXPL", "sample_selection", "all")
|
511
|
+
sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
|
524
512
|
# get the data labels
|
525
513
|
if sample_selection == "all":
|
526
514
|
df_labels = pd.concat([self.df_train, self.df_test])
|
@@ -583,8 +571,7 @@ class Experiment:
|
|
583
571
|
for scat_target in scat_targets:
|
584
572
|
if self.util.is_categorical(df_labels[scat_target]):
|
585
573
|
for scatter in scatters:
|
586
|
-
plots.scatter_plot(
|
587
|
-
df_feats, df_labels, scat_target, scatter)
|
574
|
+
plots.scatter_plot(df_feats, df_labels, scat_target, scatter)
|
588
575
|
else:
|
589
576
|
self.util.debug(
|
590
577
|
f"{self.name}: binning continuous variable to categories"
|
@@ -669,15 +656,15 @@ class Experiment:
|
|
669
656
|
)
|
670
657
|
return
|
671
658
|
best = self.get_best_report(self.reports)
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
659
|
+
if best.is_classification:
|
660
|
+
truths = best.truths
|
661
|
+
preds = best.preds
|
662
|
+
else:
|
663
|
+
truths = best.truths_cont
|
664
|
+
preds = best.preds_cont
|
676
665
|
speakers = self.df_test.speaker.values
|
677
|
-
|
678
|
-
|
679
|
-
data={"truth": truths, "pred": preds, "speaker": speakers})
|
680
|
-
plot_name = "result_combined_per_speaker"
|
666
|
+
df = pd.DataFrame(data={"truths": truths, "preds": preds, "speakers": speakers})
|
667
|
+
plot_name = f"{self.util.get_exp_name()}_speakercombined_{function}"
|
681
668
|
self.util.debug(
|
682
669
|
f"plotting speaker combination ({function}) confusion matrix to"
|
683
670
|
f" {plot_name}"
|
@@ -692,13 +679,13 @@ class Experiment:
|
|
692
679
|
|
693
680
|
def demo(self, file, is_list, outfile):
|
694
681
|
model = self.runmgr.get_best_model()
|
695
|
-
|
682
|
+
lab_enc = None
|
696
683
|
try:
|
697
|
-
|
684
|
+
lab_enc = self.label_encoder
|
698
685
|
except AttributeError:
|
699
686
|
pass
|
700
687
|
demo = Demo_predictor(
|
701
|
-
model, file, is_list, self.feature_extractor,
|
688
|
+
model, file, is_list, self.feature_extractor, lab_enc, outfile
|
702
689
|
)
|
703
690
|
demo.run_demo()
|
704
691
|
|
nkululeko/reporting/reporter.py
CHANGED
@@ -34,23 +34,24 @@ from nkululeko.utils.util import Util
|
|
34
34
|
|
35
35
|
|
36
36
|
class Reporter:
|
37
|
-
def
|
37
|
+
def _set_metric(self):
|
38
38
|
if self.util.exp_is_classification():
|
39
|
-
self.
|
40
|
-
self.
|
39
|
+
self.metric = "uar"
|
40
|
+
self.METRIC = "UAR"
|
41
|
+
self.result.metric = self.METRIC
|
41
42
|
self.is_classification = True
|
42
43
|
else:
|
43
44
|
self.is_classification = False
|
44
|
-
self.
|
45
|
-
if self.
|
46
|
-
self.
|
47
|
-
self.result.
|
48
|
-
elif self.
|
49
|
-
self.
|
50
|
-
self.result.
|
51
|
-
elif self.
|
52
|
-
self.
|
53
|
-
self.result.
|
45
|
+
self.metric = self.util.config_val("MODEL", "measure", "mse")
|
46
|
+
if self.metric == "mse":
|
47
|
+
self.METRIC = "MSE"
|
48
|
+
self.result.metric = self.METRIC
|
49
|
+
elif self.metric == "mae":
|
50
|
+
self.METRIC = "MAE"
|
51
|
+
self.result.metric = self.METRIC
|
52
|
+
elif self.metric == "ccc":
|
53
|
+
self.METRIC = "CCC"
|
54
|
+
self.result.metric = self.METRIC
|
54
55
|
|
55
56
|
def __init__(self, truths, preds, run, epoch, probas=None):
|
56
57
|
"""Initialization with ground truth und predictions vector.
|
@@ -70,60 +71,70 @@ class Reporter:
|
|
70
71
|
self.result = Result(0, 0, 0, 0, "unknown")
|
71
72
|
self.run = run
|
72
73
|
self.epoch = epoch
|
73
|
-
self.
|
74
|
+
self._set_metric()
|
74
75
|
self.filenameadd = ""
|
75
76
|
self.cont_to_cat = False
|
76
77
|
if len(self.truths) > 0 and len(self.preds) > 0:
|
77
78
|
if self.util.exp_is_classification():
|
78
|
-
uar,
|
79
|
-
self.preds,
|
80
|
-
unweighted_average_recall,
|
81
|
-
self.truths,
|
82
|
-
num_bootstraps=1000,
|
83
|
-
alpha=5,
|
79
|
+
uar, upper, lower = self._get_test_result(
|
80
|
+
self.truths, self.preds, "uar"
|
84
81
|
)
|
85
82
|
self.result.test = uar
|
86
83
|
self.result.set_upper_lower(upper, lower)
|
87
84
|
self.result.loss = 1 - accuracy(self.truths, self.preds)
|
88
85
|
else:
|
89
86
|
# regression experiment
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
alpha=5,
|
97
|
-
)
|
98
|
-
elif self.measure == "mae":
|
99
|
-
test_result, (upper, lower) = evaluate_with_conf_int(
|
100
|
-
self.preds,
|
101
|
-
mean_absolute_error,
|
102
|
-
self.truths,
|
103
|
-
num_bootstraps=1000,
|
104
|
-
alpha=5,
|
105
|
-
)
|
106
|
-
elif self.measure == "ccc":
|
107
|
-
test_result, (upper, lower) = evaluate_with_conf_int(
|
108
|
-
self.preds,
|
109
|
-
concordance_cc,
|
110
|
-
self.truths,
|
111
|
-
num_bootstraps=1000,
|
112
|
-
alpha=5,
|
113
|
-
)
|
114
|
-
|
115
|
-
if math.isnan(self.result.test):
|
116
|
-
self.util.debug(f"Truth: {self.truths}")
|
117
|
-
self.util.debug(f"Predict.: {self.preds}")
|
118
|
-
self.util.debug("Result is NAN: setting to -1")
|
119
|
-
self.result.test = -1
|
120
|
-
else:
|
121
|
-
self.util.error(f"unknown measure: {self.measure}")
|
122
|
-
|
87
|
+
# keep the original values for further use, they will be binned later
|
88
|
+
self.truths_cont = self.truths
|
89
|
+
self.preds_cont = self.preds
|
90
|
+
test_result, upper, lower = self._get_test_result(
|
91
|
+
self.truths, self.preds, self.metric
|
92
|
+
)
|
123
93
|
self.result.test = test_result
|
124
94
|
self.result.set_upper_lower(upper, lower)
|
125
95
|
# train and loss are being set by the model
|
126
|
-
|
96
|
+
|
97
|
+
def _get_test_result(self, truths, preds, metric):
|
98
|
+
if metric == "uar":
|
99
|
+
test_result, (upper, lower) = evaluate_with_conf_int(
|
100
|
+
preds,
|
101
|
+
unweighted_average_recall,
|
102
|
+
truths,
|
103
|
+
num_bootstraps=1000,
|
104
|
+
alpha=5,
|
105
|
+
)
|
106
|
+
elif metric == "mse":
|
107
|
+
test_result, (upper, lower) = evaluate_with_conf_int(
|
108
|
+
preds,
|
109
|
+
mean_squared_error,
|
110
|
+
truths,
|
111
|
+
num_bootstraps=1000,
|
112
|
+
alpha=5,
|
113
|
+
)
|
114
|
+
elif metric == "mae":
|
115
|
+
test_result, (upper, lower) = evaluate_with_conf_int(
|
116
|
+
preds,
|
117
|
+
mean_absolute_error,
|
118
|
+
truths,
|
119
|
+
num_bootstraps=1000,
|
120
|
+
alpha=5,
|
121
|
+
)
|
122
|
+
elif metric == "ccc":
|
123
|
+
test_result, (upper, lower) = evaluate_with_conf_int(
|
124
|
+
preds,
|
125
|
+
concordance_cc,
|
126
|
+
truths,
|
127
|
+
num_bootstraps=1000,
|
128
|
+
alpha=5,
|
129
|
+
)
|
130
|
+
if math.isnan(test_result):
|
131
|
+
self.util.debug(f"Truth: {self.truths}")
|
132
|
+
self.util.debug(f"Predict.: {self.preds}")
|
133
|
+
self.util.debug("Result is NAN: setting to -1")
|
134
|
+
test_result = -1
|
135
|
+
else:
|
136
|
+
self.util.error(f"unknown metric: {self.metric}")
|
137
|
+
return test_result, upper, lower
|
127
138
|
|
128
139
|
def print_probabilities(self):
|
129
140
|
"""Print the probabilities per class to a file in the store."""
|
@@ -195,31 +206,49 @@ class Reporter:
|
|
195
206
|
def plot_per_speaker(self, result_df, plot_name, function):
|
196
207
|
"""Plot a confusion matrix with the mode category per speakers.
|
197
208
|
|
209
|
+
If the function is mode and the values continuous, bin first
|
210
|
+
|
198
211
|
Args:
|
199
212
|
result_df: a pandas dataframe with columns: preds, truths and speaker.
|
200
213
|
plot_name: name for the figure.
|
201
214
|
function: either mode or mean.
|
202
215
|
"""
|
203
|
-
|
204
|
-
|
205
|
-
|
216
|
+
if function == "mode" and not self.is_classification:
|
217
|
+
truths, preds = result_df["truths"].values, result_df["preds"].values
|
218
|
+
truths, preds = self.util._bin_distributions(truths, preds)
|
219
|
+
result_df["truths"], result_df["preds"] = truths, preds
|
220
|
+
speakers = result_df.speakers.unique()
|
221
|
+
preds_speakers = np.zeros(0)
|
222
|
+
truths_speakers = np.zeros(0)
|
206
223
|
for s in speakers:
|
207
|
-
s_df = result_df[result_df.
|
208
|
-
|
209
|
-
|
224
|
+
s_df = result_df[result_df.speakers == s]
|
225
|
+
s_truth = s_df.truths.iloc[0]
|
226
|
+
s_pred = None
|
210
227
|
if function == "mode":
|
211
|
-
|
228
|
+
s_pred = s_df.preds.mode().iloc[-1]
|
212
229
|
elif function == "mean":
|
213
|
-
|
230
|
+
s_pred = s_df.preds.mean()
|
214
231
|
else:
|
215
|
-
self.util.error(f"
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
232
|
+
self.util.error(f"unknown function {function}")
|
233
|
+
preds_speakers = np.append(preds_speakers, s_pred)
|
234
|
+
truths_speakers = np.append(truths_speakers, s_truth)
|
235
|
+
test_result, upper, lower = self._get_test_result(
|
236
|
+
result_df.truths.values, result_df.preds.values, self.metric
|
237
|
+
)
|
238
|
+
test_result = Result(test_result, None, None, None, self.METRIC)
|
239
|
+
test_result.set_upper_lower(upper, lower)
|
240
|
+
result_msg = f"Speaker combination result: {test_result.test_result_str()}"
|
241
|
+
self.util.debug(result_msg)
|
242
|
+
if function == "mean":
|
243
|
+
truths_speakers, preds_speakers = self.util._bin_distributions(
|
244
|
+
truths_speakers, preds_speakers
|
245
|
+
)
|
246
|
+
self._plot_confmat(
|
247
|
+
truths_speakers,
|
248
|
+
preds_speakers.astype("int"),
|
249
|
+
plot_name,
|
250
|
+
test_result=test_result,
|
251
|
+
)
|
223
252
|
|
224
253
|
def _plot_scatter(self, truths, preds, plot_name, epoch=None):
|
225
254
|
# print(truths)
|
@@ -227,13 +256,10 @@ class Reporter:
|
|
227
256
|
if epoch is None:
|
228
257
|
epoch = self.epoch
|
229
258
|
fig_dir = self.util.get_path("fig_dir")
|
230
|
-
fig = plt.figure() # figsize=[5, 5]
|
231
|
-
|
232
259
|
pcc = pearsonr(self.truths, self.preds)[0]
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
plt.scatter(truths, preds, cmap="Blues")
|
260
|
+
reg_res = self.result.test_result_str()
|
261
|
+
fig = plt.figure()
|
262
|
+
plt.scatter(truths, preds)
|
237
263
|
plt.xlabel("truth")
|
238
264
|
plt.ylabel("prediction")
|
239
265
|
|
@@ -258,11 +284,11 @@ class Reporter:
|
|
258
284
|
)
|
259
285
|
)
|
260
286
|
|
261
|
-
def _plot_confmat(self, truths, preds, plot_name, epoch=None):
|
262
|
-
# print(truths)
|
263
|
-
# print(preds)
|
287
|
+
def _plot_confmat(self, truths, preds, plot_name, epoch=None, test_result=None):
|
264
288
|
if epoch is None:
|
265
289
|
epoch = self.epoch
|
290
|
+
if test_result is None:
|
291
|
+
test_result = self.result
|
266
292
|
fig_dir = self.util.get_path("fig_dir")
|
267
293
|
labels = glob_conf.labels
|
268
294
|
fig = plt.figure() # figsize=[5, 5]
|
@@ -295,12 +321,15 @@ class Reporter:
|
|
295
321
|
|
296
322
|
reg_res = ""
|
297
323
|
if not self.is_classification:
|
298
|
-
reg_res = f"{
|
324
|
+
reg_res = f"{test_result.test_result_str()}"
|
325
|
+
self.util.debug(
|
326
|
+
f"Best result at epoch {epoch}: {test_result.test_result_str()}"
|
327
|
+
)
|
299
328
|
|
300
|
-
uar_str =
|
301
|
-
acc_str =
|
302
|
-
up_str =
|
303
|
-
low_str =
|
329
|
+
uar_str = self.util.to_3_digits_str(uar)
|
330
|
+
acc_str = self.util.to_3_digits_str(acc)
|
331
|
+
up_str = self.util.to_3_digits_str(upper)
|
332
|
+
low_str = self.util.to_3_digits_str(lower)
|
304
333
|
|
305
334
|
if epoch != 0:
|
306
335
|
plt.title(
|
@@ -427,7 +456,7 @@ class Reporter:
|
|
427
456
|
ax = df.plot()
|
428
457
|
fig = ax.figure
|
429
458
|
plt.xlabel("epochs")
|
430
|
-
plt.ylabel(f"{self.
|
459
|
+
plt.ylabel(f"{self.METRIC}")
|
431
460
|
plot_path = f"{fig_dir}{plot_name}.{self.format}"
|
432
461
|
plt.savefig(plot_path)
|
433
462
|
self.util.debug(f"plotted epoch progression to {plot_path}")
|
@@ -464,7 +493,7 @@ class Reporter:
|
|
464
493
|
plt.plot(losses, "black", label="losses")
|
465
494
|
plt.plot(losses_eval, "grey", label="losses_eval")
|
466
495
|
plt.xlabel("epochs")
|
467
|
-
plt.ylabel(f"{self.
|
496
|
+
plt.ylabel(f"{self.METRIC}")
|
468
497
|
plt.legend()
|
469
498
|
plt.savefig(f"{fig_dir}{out_name}.{self.format}")
|
470
499
|
plt.close()
|
nkululeko/reporting/result.py
CHANGED
@@ -1,13 +1,15 @@
|
|
1
1
|
# result.py
|
2
|
+
from nkululeko.utils.util import Util
|
2
3
|
|
3
4
|
|
4
5
|
class Result:
|
5
|
-
def __init__(self, test, train, loss, loss_eval,
|
6
|
+
def __init__(self, test, train, loss, loss_eval, metric):
|
6
7
|
self.test = test
|
7
8
|
self.train = train
|
8
9
|
self.loss = loss
|
9
10
|
self.loss_eval = loss_eval
|
10
|
-
self.
|
11
|
+
self.metric = metric
|
12
|
+
self.util = Util("Result")
|
11
13
|
|
12
14
|
def get_result(self):
|
13
15
|
return self.test
|
@@ -18,10 +20,16 @@ class Result:
|
|
18
20
|
self.lower = lower
|
19
21
|
|
20
22
|
def get_test_result(self):
|
21
|
-
return f"test: {self.test:.3f} {self.
|
23
|
+
return f"test: {self.test:.3f} {self.metric}"
|
22
24
|
|
23
25
|
def to_string(self):
|
24
26
|
return (
|
25
|
-
f"test: {self.test} {self.
|
26
|
-
f" {self.train} {self.
|
27
|
+
f"test: {self.test} {self.metric}, train:"
|
28
|
+
f" {self.train} {self.metric}, loss: {self.loss}, eval-loss: {self.loss_eval}"
|
27
29
|
)
|
30
|
+
|
31
|
+
def test_result_str(self):
|
32
|
+
result_s = self.util.to_3_digits_str(self.test)
|
33
|
+
up_str = self.util.to_3_digits_str(self.upper)
|
34
|
+
low_str = self.util.to_3_digits_str(self.lower)
|
35
|
+
return f"{self.metric}: {result_s} ({up_str}/{low_str})"
|
nkululeko/utils/util.py
CHANGED
@@ -50,9 +50,7 @@ class Util:
|
|
50
50
|
self.got_data_roots = False
|
51
51
|
|
52
52
|
def get_path(self, entry):
|
53
|
-
"""
|
54
|
-
This method allows the user to get the directory path for the given argument.
|
55
|
-
"""
|
53
|
+
"""This method allows the user to get the directory path for the given argument."""
|
56
54
|
if self.config is None:
|
57
55
|
# If no configuration file is provided, use default paths
|
58
56
|
if entry == "fig_dir":
|
@@ -139,15 +137,11 @@ class Util:
|
|
139
137
|
)
|
140
138
|
|
141
139
|
def get_name(self):
|
142
|
-
"""
|
143
|
-
Get the name of the experiment
|
144
|
-
"""
|
140
|
+
"""Get the name of the experiment."""
|
145
141
|
return self.config["EXP"]["name"]
|
146
142
|
|
147
143
|
def get_exp_dir(self):
|
148
|
-
"""
|
149
|
-
Get the experiment directory
|
150
|
-
"""
|
144
|
+
"""Get the experiment directory."""
|
151
145
|
root = os.path.join(self.config["EXP"]["root"], "")
|
152
146
|
name = self.config["EXP"]["name"]
|
153
147
|
dir_name = f"{root}{name}"
|
@@ -176,15 +170,11 @@ class Util:
|
|
176
170
|
return ""
|
177
171
|
|
178
172
|
def get_data_name(self):
|
179
|
-
"""
|
180
|
-
Get a string as name from all databases that are useed
|
181
|
-
"""
|
173
|
+
"""Get a string as name from all databases that are useed."""
|
182
174
|
return "_".join(ast.literal_eval(self.config["DATA"]["databases"]))
|
183
175
|
|
184
176
|
def get_feattype_name(self):
|
185
|
-
"""
|
186
|
-
Get a string as name from all feature sets that are used
|
187
|
-
"""
|
177
|
+
"""Get a string as name from all feature sets that are used."""
|
188
178
|
return "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
|
189
179
|
|
190
180
|
def get_exp_name(self, only_train=False, only_data=False):
|
@@ -303,9 +293,9 @@ class Util:
|
|
303
293
|
return ast.literal_eval(self.config["DATA"]["labels"])
|
304
294
|
|
305
295
|
def continuous_to_categorical(self, series):
|
306
|
-
"""
|
307
|
-
|
308
|
-
|
296
|
+
"""Discretize a categorical variable.
|
297
|
+
|
298
|
+
Uses the labels and bins from the ini if present
|
309
299
|
|
310
300
|
:param series: a pandas series
|
311
301
|
:return a pandas series with discretized values as categories
|
@@ -321,11 +311,23 @@ class Util:
|
|
321
311
|
labels = ["0_low", "1_middle", "2_high"]
|
322
312
|
result = np.digitize(series, bins) - 1
|
323
313
|
result = pd.Series(result)
|
324
|
-
for i,
|
325
|
-
result = result.replace(i, str(
|
314
|
+
for i, lab in enumerate(labels):
|
315
|
+
result = result.replace(i, str(lab))
|
326
316
|
result = result.astype("category")
|
327
317
|
return result
|
328
318
|
|
319
|
+
def _bin_distributions(self, truths, preds):
|
320
|
+
try:
|
321
|
+
bins = ast.literal_eval(self.config["DATA"]["bins"])
|
322
|
+
except KeyError:
|
323
|
+
# if no binning is given, simply take three bins, based on truth
|
324
|
+
b1 = np.quantile(truths, 0.33)
|
325
|
+
b2 = np.quantile(truths, 0.66)
|
326
|
+
bins = [-1000000, b1, b2, 1000000]
|
327
|
+
truths = np.digitize(truths, bins) - 1
|
328
|
+
preds = np.digitize(preds, bins) - 1
|
329
|
+
return truths, preds
|
330
|
+
|
329
331
|
def print_best_results(self, best_reports):
|
330
332
|
res_dir = self.get_res_dir()
|
331
333
|
# go one level up above the "run" level
|
@@ -416,5 +418,10 @@ class Util:
|
|
416
418
|
self.error(f"unknown measure: {measure}")
|
417
419
|
|
418
420
|
def to_3_digits(self, x):
|
421
|
+
"""Given a float, return this to 3 digits."""
|
419
422
|
x = float(x)
|
420
423
|
return (int(x * 1000)) / 1000.0
|
424
|
+
|
425
|
+
def to_3_digits_str(self, x):
|
426
|
+
"""Given a float, return this to 3 digits as string without integer number."""
|
427
|
+
return str(self.to_3_digits(x))[1:]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.88.
|
3
|
+
Version: 0.88.2
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -356,6 +356,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
356
356
|
Changelog
|
357
357
|
=========
|
358
358
|
|
359
|
+
Version 0.88.2
|
360
|
+
--------------
|
361
|
+
* changed combine speaker results to show speakers not samples
|
362
|
+
|
359
363
|
Version 0.88.1
|
360
364
|
--------------
|
361
365
|
* added obligatory scatter plot for regression
|
@@ -2,12 +2,12 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
3
|
nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=92td3PSYccIF_YkZhW6EMRo70neUjL_2Wj7JXyHzoq4,39
|
6
6
|
nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
|
9
9
|
nkululeko/ensemble.py,sha256=huRbXUuabm6QYxGBHjkwEU95e-0qxtO0Z6UdXFgtaMY,4947
|
10
|
-
nkululeko/experiment.py,sha256=
|
10
|
+
nkululeko/experiment.py,sha256=wXZnb_cfOqF8b0Zqzu2bbrEgCCpG_zPkDbD-Usw5sRs,31283
|
11
11
|
nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
|
12
12
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
13
13
|
nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
|
@@ -98,17 +98,17 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
|
|
98
98
|
nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
|
99
99
|
nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
|
100
100
|
nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
|
101
|
-
nkululeko/reporting/reporter.py,sha256=
|
102
|
-
nkululeko/reporting/result.py,sha256=
|
101
|
+
nkululeko/reporting/reporter.py,sha256=77u9t3v_ilqOEToISPPcRffCQuawhgGO3xKnVFGs_pg,19237
|
102
|
+
nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
|
103
103
|
nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
104
104
|
nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
|
105
105
|
nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5EvWlcWQ,3301
|
106
106
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
107
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
108
108
|
nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
|
109
|
-
nkululeko/utils/util.py,sha256=
|
110
|
-
nkululeko-0.88.
|
111
|
-
nkululeko-0.88.
|
112
|
-
nkululeko-0.88.
|
113
|
-
nkululeko-0.88.
|
114
|
-
nkululeko-0.88.
|
109
|
+
nkululeko/utils/util.py,sha256=BNd9JpoVakPbyysKBsJSCnqlbPlUKHUrcWYcwEnOdVA,15128
|
110
|
+
nkululeko-0.88.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
111
|
+
nkululeko-0.88.2.dist-info/METADATA,sha256=VL3DswyjLpnRvaQkV8jDGw7OszOv-pfQC_i9j57lyLs,39119
|
112
|
+
nkululeko-0.88.2.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
113
|
+
nkululeko-0.88.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
114
|
+
nkululeko-0.88.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|