nkululeko 0.86.7__py3-none-any.whl → 0.87.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset_csv.py +12 -14
- nkululeko/demo.py +4 -8
- nkululeko/experiment.py +39 -21
- nkululeko/feature_extractor.py +10 -4
- nkululeko/modelrunner.py +5 -5
- nkululeko/models/model.py +23 -3
- nkululeko/models/model_cnn.py +41 -22
- nkululeko/models/model_mlp.py +37 -17
- nkululeko/models/model_mlp_regression.py +3 -1
- nkululeko/plots.py +25 -37
- nkululeko/reporting/reporter.py +69 -6
- nkululeko/runmanager.py +8 -11
- nkululeko/test_predictor.py +1 -6
- nkululeko/utils/stats.py +11 -7
- nkululeko/utils/util.py +22 -16
- {nkululeko-0.86.7.dist-info → nkululeko-0.87.0.dist-info}/METADATA +17 -1
- {nkululeko-0.86.7.dist-info → nkululeko-0.87.0.dist-info}/RECORD +21 -21
- {nkululeko-0.86.7.dist-info → nkululeko-0.87.0.dist-info}/WHEEL +1 -1
- {nkululeko-0.86.7.dist-info → nkululeko-0.87.0.dist-info}/LICENSE +0 -0
- {nkululeko-0.86.7.dist-info → nkululeko-0.87.0.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.87.0"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/data/dataset_csv.py
CHANGED
@@ -23,6 +23,9 @@ class Dataset_CSV(Dataset):
|
|
23
23
|
root = os.path.dirname(data_file)
|
24
24
|
audio_path = self.util.config_val_data(self.name, "audio_path", "./")
|
25
25
|
df = pd.read_csv(data_file)
|
26
|
+
# trim all string values
|
27
|
+
df_obj = df.select_dtypes("object")
|
28
|
+
df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
|
26
29
|
# special treatment for segmented dataframes with only one column:
|
27
30
|
if "start" in df.columns and len(df.columns) == 4:
|
28
31
|
index = audformat.segmented_index(
|
@@ -49,8 +52,7 @@ class Dataset_CSV(Dataset):
|
|
49
52
|
.map(lambda x: root + "/" + audio_path + "/" + x)
|
50
53
|
.values
|
51
54
|
)
|
52
|
-
df = df.set_index(df.index.set_levels(
|
53
|
-
file_index, level="file"))
|
55
|
+
df = df.set_index(df.index.set_levels(file_index, level="file"))
|
54
56
|
else:
|
55
57
|
if not isinstance(df, pd.DataFrame):
|
56
58
|
df = pd.DataFrame(df)
|
@@ -59,27 +61,24 @@ class Dataset_CSV(Dataset):
|
|
59
61
|
lambda x: root + "/" + audio_path + "/" + x
|
60
62
|
)
|
61
63
|
)
|
62
|
-
else:
|
64
|
+
else: # absolute path is True
|
63
65
|
if audformat.index_type(df.index) == "segmented":
|
64
66
|
file_index = (
|
65
|
-
df.index.levels[0]
|
66
|
-
.map(lambda x: audio_path + "/" + x)
|
67
|
-
.values
|
67
|
+
df.index.levels[0].map(lambda x: audio_path + "/" + x).values
|
68
68
|
)
|
69
|
-
df = df.set_index(df.index.set_levels(
|
70
|
-
file_index, level="file"))
|
69
|
+
df = df.set_index(df.index.set_levels(file_index, level="file"))
|
71
70
|
else:
|
72
71
|
if not isinstance(df, pd.DataFrame):
|
73
72
|
df = pd.DataFrame(df)
|
74
|
-
df = df.set_index(
|
75
|
-
lambda x: audio_path + "/" + x
|
73
|
+
df = df.set_index(
|
74
|
+
df.index.to_series().apply(lambda x: audio_path + "/" + x)
|
75
|
+
)
|
76
76
|
|
77
77
|
self.df = df
|
78
78
|
self.db = None
|
79
79
|
self.got_target = True
|
80
80
|
self.is_labeled = self.got_target
|
81
|
-
self.start_fresh = eval(
|
82
|
-
self.util.config_val("DATA", "no_reuse", "False"))
|
81
|
+
self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
|
83
82
|
is_index = False
|
84
83
|
try:
|
85
84
|
if self.is_labeled and not "class_label" in self.df.columns:
|
@@ -106,8 +105,7 @@ class Dataset_CSV(Dataset):
|
|
106
105
|
f" {self.got_gender}, got age: {self.got_age}"
|
107
106
|
)
|
108
107
|
self.util.debug(r_string)
|
109
|
-
glob_conf.report.add_item(ReportItem(
|
110
|
-
"Data", "Loaded report", r_string))
|
108
|
+
glob_conf.report.add_item(ReportItem("Data", "Loaded report", r_string))
|
111
109
|
|
112
110
|
def prepare(self):
|
113
111
|
super().prepare()
|
nkululeko/demo.py
CHANGED
@@ -30,10 +30,8 @@ from transformers import pipeline
|
|
30
30
|
|
31
31
|
|
32
32
|
def main(src_dir):
|
33
|
-
parser = argparse.ArgumentParser(
|
34
|
-
|
35
|
-
parser.add_argument("--config", default="exp.ini",
|
36
|
-
help="The base configuration")
|
33
|
+
parser = argparse.ArgumentParser(description="Call the nkululeko DEMO framework.")
|
34
|
+
parser.add_argument("--config", default="exp.ini", help="The base configuration")
|
37
35
|
parser.add_argument(
|
38
36
|
"--file", help="A file that should be processed (16kHz mono wav)"
|
39
37
|
)
|
@@ -84,8 +82,7 @@ def main(src_dir):
|
|
84
82
|
)
|
85
83
|
|
86
84
|
def print_pipe(files, outfile):
|
87
|
-
"""
|
88
|
-
Prints the pipeline output for a list of files, and optionally writes the results to an output file.
|
85
|
+
"""Prints the pipeline output for a list of files, and optionally writes the results to an output file.
|
89
86
|
|
90
87
|
Args:
|
91
88
|
files (list): A list of file paths to process through the pipeline.
|
@@ -108,8 +105,7 @@ def main(src_dir):
|
|
108
105
|
f.write("\n".join(results))
|
109
106
|
|
110
107
|
if util.get_model_type() == "finetune":
|
111
|
-
model_path = os.path.join(
|
112
|
-
util.get_exp_dir(), "models", "run_0", "torch")
|
108
|
+
model_path = os.path.join(util.get_exp_dir(), "models", "run_0", "torch")
|
113
109
|
pipe = pipeline("audio-classification", model=model_path)
|
114
110
|
if args.file is not None:
|
115
111
|
print_pipe([args.file], args.outfile)
|
nkululeko/experiment.py
CHANGED
@@ -5,13 +5,13 @@ import pickle
|
|
5
5
|
import random
|
6
6
|
import time
|
7
7
|
|
8
|
+
import audeer
|
9
|
+
import audformat
|
8
10
|
import numpy as np
|
9
11
|
import pandas as pd
|
10
12
|
from sklearn.preprocessing import LabelEncoder
|
11
13
|
|
12
|
-
import
|
13
|
-
import audformat
|
14
|
-
|
14
|
+
import nkululeko.glob_conf as glob_conf
|
15
15
|
from nkululeko.data.dataset import Dataset
|
16
16
|
from nkululeko.data.dataset_csv import Dataset_CSV
|
17
17
|
from nkululeko.demo_predictor import Demo_predictor
|
@@ -19,8 +19,6 @@ from nkululeko.feat_extract.feats_analyser import FeatureAnalyser
|
|
19
19
|
from nkululeko.feature_extractor import FeatureExtractor
|
20
20
|
from nkululeko.file_checker import FileChecker
|
21
21
|
from nkululeko.filter_data import DataFilter
|
22
|
-
from nkululeko.filter_data import filter_min_dur
|
23
|
-
import nkululeko.glob_conf as glob_conf
|
24
22
|
from nkululeko.plots import Plots
|
25
23
|
from nkululeko.reporting.report import Report
|
26
24
|
from nkululeko.runmanager import Runmanager
|
@@ -109,7 +107,8 @@ class Experiment:
|
|
109
107
|
# print keys/column
|
110
108
|
dbs = ",".join(list(self.datasets.keys()))
|
111
109
|
labels = self.util.config_val("DATA", "labels", False)
|
112
|
-
auto_labels = list(
|
110
|
+
auto_labels = list(
|
111
|
+
next(iter(self.datasets.values())).df[self.target].unique())
|
113
112
|
if labels:
|
114
113
|
self.labels = ast.literal_eval(labels)
|
115
114
|
self.util.debug(f"Using target labels (from config): {labels}")
|
@@ -159,7 +158,8 @@ class Experiment:
|
|
159
158
|
data.split()
|
160
159
|
data.prepare_labels()
|
161
160
|
self.df_test = pd.concat(
|
162
|
-
[self.df_test, self.util.make_segmented_index(
|
161
|
+
[self.df_test, self.util.make_segmented_index(
|
162
|
+
data.df_test)]
|
163
163
|
)
|
164
164
|
self.df_test.is_labeled = data.is_labeled
|
165
165
|
self.df_test.got_gender = self.got_gender
|
@@ -260,7 +260,8 @@ class Experiment:
|
|
260
260
|
test_cats = self.df_test[self.target].unique()
|
261
261
|
else:
|
262
262
|
# if there is no target, copy a dummy label
|
263
|
-
self.df_test = self._add_random_target(
|
263
|
+
self.df_test = self._add_random_target(
|
264
|
+
self.df_test).astype("str")
|
264
265
|
train_cats = self.df_train[self.target].unique()
|
265
266
|
# print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
|
266
267
|
# print(f"train_cats with target {self.target}: {train_cats}")
|
@@ -268,7 +269,8 @@ class Experiment:
|
|
268
269
|
if type(test_cats) == np.ndarray:
|
269
270
|
self.util.debug(f"Categories test (nd.array): {test_cats}")
|
270
271
|
else:
|
271
|
-
self.util.debug(
|
272
|
+
self.util.debug(
|
273
|
+
f"Categories test (list): {list(test_cats)}")
|
272
274
|
if type(train_cats) == np.ndarray:
|
273
275
|
self.util.debug(f"Categories train (nd.array): {train_cats}")
|
274
276
|
else:
|
@@ -291,7 +293,8 @@ class Experiment:
|
|
291
293
|
|
292
294
|
target_factor = self.util.config_val("DATA", "target_divide_by", False)
|
293
295
|
if target_factor:
|
294
|
-
self.df_test[self.target] = self.df_test[self.target] /
|
296
|
+
self.df_test[self.target] = self.df_test[self.target] / \
|
297
|
+
float(target_factor)
|
295
298
|
self.df_train[self.target] = self.df_train[self.target] / float(
|
296
299
|
target_factor
|
297
300
|
)
|
@@ -314,14 +317,16 @@ class Experiment:
|
|
314
317
|
def plot_distribution(self, df_labels):
|
315
318
|
"""Plot the distribution of samples and speaker per target class and biological sex"""
|
316
319
|
plot = Plots()
|
317
|
-
sample_selection = self.util.config_val(
|
320
|
+
sample_selection = self.util.config_val(
|
321
|
+
"EXPL", "sample_selection", "all")
|
318
322
|
plot.plot_distributions(df_labels)
|
319
323
|
if self.got_speaker:
|
320
324
|
plot.plot_distributions_speaker(df_labels)
|
321
325
|
|
322
326
|
def extract_test_feats(self):
|
323
327
|
self.feats_test = pd.DataFrame()
|
324
|
-
feats_name = "_".join(ast.literal_eval(
|
328
|
+
feats_name = "_".join(ast.literal_eval(
|
329
|
+
glob_conf.config["DATA"]["tests"]))
|
325
330
|
feats_types = self.util.config_val_list("FEATS", "type", ["os"])
|
326
331
|
self.feature_extractor = FeatureExtractor(
|
327
332
|
self.df_test, feats_types, feats_name, "test"
|
@@ -338,9 +343,17 @@ class Experiment:
|
|
338
343
|
|
339
344
|
"""
|
340
345
|
df_train, df_test = self.df_train, self.df_test
|
341
|
-
feats_name = "_".join(ast.literal_eval(
|
346
|
+
feats_name = "_".join(ast.literal_eval(
|
347
|
+
glob_conf.config["DATA"]["databases"]))
|
342
348
|
self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
|
343
|
-
feats_types = self.util.
|
349
|
+
feats_types = self.util.config_val("FEATS", "type", "os")
|
350
|
+
# Ensure feats_types is always a list of strings
|
351
|
+
if isinstance(feats_types, str):
|
352
|
+
if feats_types.startswith("[") and feats_types.endswith("]"):
|
353
|
+
feats_types = ast.literal_eval(feats_types)
|
354
|
+
else:
|
355
|
+
feats_types = [feats_types]
|
356
|
+
# print(f"feats_types: {feats_types}")
|
344
357
|
# for some models no features are needed
|
345
358
|
if len(feats_types) == 0:
|
346
359
|
self.util.debug("no feature extractor specified.")
|
@@ -372,7 +385,8 @@ class Experiment:
|
|
372
385
|
f"test feats ({self.feats_test.shape[0]}) != test labels"
|
373
386
|
f" ({self.df_test.shape[0]})"
|
374
387
|
)
|
375
|
-
self.df_test = self.df_test[self.df_test.index.isin(
|
388
|
+
self.df_test = self.df_test[self.df_test.index.isin(
|
389
|
+
self.feats_test.index)]
|
376
390
|
self.util.warn(f"new test labels shape: {self.df_test.shape[0]}")
|
377
391
|
|
378
392
|
self._check_scale()
|
@@ -387,7 +401,8 @@ class Experiment:
|
|
387
401
|
"""Augment the selected samples."""
|
388
402
|
from nkululeko.augmenting.augmenter import Augmenter
|
389
403
|
|
390
|
-
sample_selection = self.util.config_val(
|
404
|
+
sample_selection = self.util.config_val(
|
405
|
+
"AUGMENT", "sample_selection", "all")
|
391
406
|
if sample_selection == "all":
|
392
407
|
df = pd.concat([self.df_train, self.df_test])
|
393
408
|
elif sample_selection == "train":
|
@@ -482,7 +497,8 @@ class Experiment:
|
|
482
497
|
"""
|
483
498
|
from nkululeko.augmenting.randomsplicer import Randomsplicer
|
484
499
|
|
485
|
-
sample_selection = self.util.config_val(
|
500
|
+
sample_selection = self.util.config_val(
|
501
|
+
"AUGMENT", "sample_selection", "all")
|
486
502
|
if sample_selection == "all":
|
487
503
|
df = pd.concat([self.df_train, self.df_test])
|
488
504
|
elif sample_selection == "train":
|
@@ -503,7 +519,8 @@ class Experiment:
|
|
503
519
|
plot_feats = eval(
|
504
520
|
self.util.config_val("EXPL", "feature_distributions", "False")
|
505
521
|
)
|
506
|
-
sample_selection = self.util.config_val(
|
522
|
+
sample_selection = self.util.config_val(
|
523
|
+
"EXPL", "sample_selection", "all")
|
507
524
|
# get the data labels
|
508
525
|
if sample_selection == "all":
|
509
526
|
df_labels = pd.concat([self.df_train, self.df_test])
|
@@ -566,7 +583,8 @@ class Experiment:
|
|
566
583
|
for scat_target in scat_targets:
|
567
584
|
if self.util.is_categorical(df_labels[scat_target]):
|
568
585
|
for scatter in scatters:
|
569
|
-
plots.scatter_plot(
|
586
|
+
plots.scatter_plot(
|
587
|
+
df_feats, df_labels, scat_target, scatter)
|
570
588
|
else:
|
571
589
|
self.util.debug(
|
572
590
|
f"{self.name}: binning continuous variable to categories"
|
@@ -657,7 +675,8 @@ class Experiment:
|
|
657
675
|
preds = best.preds
|
658
676
|
speakers = self.df_test.speaker.values
|
659
677
|
print(f"{len(truths)} {len(preds)} {len(speakers) }")
|
660
|
-
df = pd.DataFrame(
|
678
|
+
df = pd.DataFrame(
|
679
|
+
data={"truth": truths, "pred": preds, "speaker": speakers})
|
661
680
|
plot_name = "result_combined_per_speaker"
|
662
681
|
self.util.debug(
|
663
682
|
f"plotting speaker combination ({function}) confusion matrix to"
|
@@ -733,7 +752,6 @@ class Experiment:
|
|
733
752
|
if model.is_ann():
|
734
753
|
print("converting to onnx from torch")
|
735
754
|
else:
|
736
|
-
from skl2onnx import to_onnx
|
737
755
|
|
738
756
|
print("converting to onnx from sklearn")
|
739
757
|
# save the rest
|
nkululeko/feature_extractor.py
CHANGED
@@ -39,16 +39,20 @@ class FeatureExtractor:
|
|
39
39
|
self.feats = pd.DataFrame()
|
40
40
|
for feats_type in self.feats_types:
|
41
41
|
store_name = f"{self.data_name}_{feats_type}"
|
42
|
-
self.feat_extractor = self._get_feat_extractor(
|
42
|
+
self.feat_extractor = self._get_feat_extractor(
|
43
|
+
store_name, feats_type)
|
43
44
|
self.feat_extractor.extract()
|
44
45
|
self.feat_extractor.filter()
|
45
|
-
self.feats = pd.concat(
|
46
|
+
self.feats = pd.concat(
|
47
|
+
[self.feats, self.feat_extractor.df], axis=1)
|
46
48
|
return self.feats
|
47
49
|
|
48
50
|
def extract_sample(self, signal, sr):
|
49
51
|
return self.feat_extractor.extract_sample(signal, sr)
|
50
52
|
|
51
53
|
def _get_feat_extractor(self, store_name, feats_type):
|
54
|
+
if isinstance(feats_type, list) and len(feats_type) == 1:
|
55
|
+
feats_type = feats_type[0]
|
52
56
|
feat_extractor_class = self._get_feat_extractor_class(feats_type)
|
53
57
|
if feat_extractor_class is None:
|
54
58
|
self.util.error(f"unknown feats_type: {feats_type}")
|
@@ -103,13 +107,15 @@ class FeatureExtractor:
|
|
103
107
|
prefix, _, ext = feats_type.partition("-")
|
104
108
|
from importlib import import_module
|
105
109
|
|
106
|
-
module = import_module(
|
110
|
+
module = import_module(
|
111
|
+
f"nkululeko.feat_extract.feats_{prefix.lower()}")
|
107
112
|
class_name = f"{prefix.capitalize()}"
|
108
113
|
return getattr(module, class_name)
|
109
114
|
|
110
115
|
def _get_feat_extractor_by_name(self, feats_type):
|
111
116
|
from importlib import import_module
|
112
117
|
|
113
|
-
module = import_module(
|
118
|
+
module = import_module(
|
119
|
+
f"nkululeko.feat_extract.feats_{feats_type.lower()}")
|
114
120
|
class_name = f"{feats_type.capitalize()}Set"
|
115
121
|
return getattr(module, class_name)
|
nkululeko/modelrunner.py
CHANGED
@@ -85,7 +85,7 @@ class Modelrunner:
|
|
85
85
|
f"run: {self.run} epoch: {epoch}: result: {test_score_metric}"
|
86
86
|
)
|
87
87
|
# print(f"performance: {performance.split(' ')[1]}")
|
88
|
-
performance = float(test_score_metric.split(
|
88
|
+
performance = float(test_score_metric.split(" ")[1])
|
89
89
|
if performance > self.best_performance:
|
90
90
|
self.best_performance = performance
|
91
91
|
self.best_epoch = epoch
|
@@ -204,15 +204,15 @@ class Modelrunner:
|
|
204
204
|
self.df_train, self.df_test, self.feats_train, self.feats_test
|
205
205
|
)
|
206
206
|
elif model_type == "cnn":
|
207
|
-
from nkululeko.models.model_cnn import
|
207
|
+
from nkululeko.models.model_cnn import CNNModel
|
208
208
|
|
209
|
-
self.model =
|
209
|
+
self.model = CNNModel(
|
210
210
|
self.df_train, self.df_test, self.feats_train, self.feats_test
|
211
211
|
)
|
212
212
|
elif model_type == "mlp":
|
213
|
-
from nkululeko.models.model_mlp import
|
213
|
+
from nkululeko.models.model_mlp import MLPModel
|
214
214
|
|
215
|
-
self.model =
|
215
|
+
self.model = MLPModel(
|
216
216
|
self.df_train, self.df_test, self.feats_train, self.feats_test
|
217
217
|
)
|
218
218
|
elif model_type == "mlp_reg":
|
nkululeko/models/model.py
CHANGED
@@ -247,8 +247,25 @@ class Model:
|
|
247
247
|
self.clf.fit(feats, labels)
|
248
248
|
|
249
249
|
def get_predictions(self):
|
250
|
-
predictions = self.clf.predict(self.feats_test.to_numpy())
|
251
|
-
|
250
|
+
# predictions = self.clf.predict(self.feats_test.to_numpy())
|
251
|
+
if self.util.exp_is_classification():
|
252
|
+
# make a dataframe for the class probabilities
|
253
|
+
proba_d = {}
|
254
|
+
for c in self.clf.classes_:
|
255
|
+
proba_d[c] = []
|
256
|
+
# get the class probabilities
|
257
|
+
predictions = self.clf.predict_proba(self.feats_test.to_numpy())
|
258
|
+
# pred = self.clf.predict(features)
|
259
|
+
for i, c in enumerate(self.clf.classes_):
|
260
|
+
proba_d[c] = list(predictions.T[i])
|
261
|
+
probas = pd.DataFrame(proba_d)
|
262
|
+
probas = probas.set_index(self.feats_test.index)
|
263
|
+
predictions = probas.idxmax(axis=1).values
|
264
|
+
else:
|
265
|
+
predictions = self.clf.predict(self.feats_test.to_numpy())
|
266
|
+
probas = None
|
267
|
+
|
268
|
+
return predictions, probas
|
252
269
|
|
253
270
|
def predict(self):
|
254
271
|
if self.feats_test.isna().to_numpy().any():
|
@@ -263,13 +280,16 @@ class Model:
|
|
263
280
|
)
|
264
281
|
return report
|
265
282
|
"""Predict the whole eval feature set"""
|
266
|
-
predictions = self.get_predictions()
|
283
|
+
predictions, probas = self.get_predictions()
|
284
|
+
|
267
285
|
report = Reporter(
|
268
286
|
self.df_test[self.target].to_numpy().astype(float),
|
269
287
|
predictions,
|
270
288
|
self.run,
|
271
289
|
self.epoch,
|
290
|
+
probas=probas,
|
272
291
|
)
|
292
|
+
report.print_probabilities()
|
273
293
|
return report
|
274
294
|
|
275
295
|
def get_type(self):
|
nkululeko/models/model_cnn.py
CHANGED
@@ -5,33 +5,40 @@ Inspired by code from Su Lei
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
+
import ast
|
9
|
+
from collections import OrderedDict
|
10
|
+
|
11
|
+
import numpy as np
|
12
|
+
import pandas as pd
|
13
|
+
from PIL import Image
|
14
|
+
from sklearn.metrics import recall_score
|
8
15
|
import torch
|
9
16
|
import torch.nn as nn
|
10
17
|
import torch.nn.functional as F
|
11
|
-
import torchvision
|
12
|
-
import torchvision.transforms as transforms
|
13
18
|
from torch.utils.data import Dataset
|
14
|
-
import
|
15
|
-
import numpy as np
|
16
|
-
from sklearn.metrics import recall_score
|
17
|
-
from collections import OrderedDict
|
18
|
-
from PIL import Image
|
19
|
-
from traitlets import default
|
19
|
+
import torchvision.transforms as transforms
|
20
20
|
|
21
|
-
from nkululeko.utils.util import Util
|
22
21
|
import nkululeko.glob_conf as glob_conf
|
22
|
+
from nkululeko.losses.loss_softf1loss import SoftF1Loss
|
23
23
|
from nkululeko.models.model import Model
|
24
24
|
from nkululeko.reporting.reporter import Reporter
|
25
|
-
from nkululeko.
|
25
|
+
from nkululeko.utils.util import Util
|
26
26
|
|
27
27
|
|
28
|
-
class
|
29
|
-
"""CNN = convolutional neural net"""
|
28
|
+
class CNNModel(Model):
|
29
|
+
"""CNN = convolutional neural net."""
|
30
30
|
|
31
31
|
is_classifier = True
|
32
32
|
|
33
33
|
def __init__(self, df_train, df_test, feats_train, feats_test):
|
34
|
-
"""Constructor taking
|
34
|
+
"""Constructor, taking all dataframes.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
df_train (pd.DataFrame): The train labels.
|
38
|
+
df_test (pd.DataFrame): The test labels.
|
39
|
+
feats_train (pd.DataFrame): The train features.
|
40
|
+
feats_test (pd.DataFrame): The test features.
|
41
|
+
"""
|
35
42
|
super().__init__(df_train, df_test, feats_train, feats_test)
|
36
43
|
super().set_model_type("ann")
|
37
44
|
self.name = "cnn"
|
@@ -147,7 +154,20 @@ class CNN_model(Model):
|
|
147
154
|
self.optimizer.step()
|
148
155
|
self.loss = (np.asarray(losses)).mean()
|
149
156
|
|
150
|
-
def
|
157
|
+
def get_probas(self, logits):
|
158
|
+
# make a dataframe for probabilites (logits)
|
159
|
+
proba_d = {}
|
160
|
+
classes = self.df_test[self.target].unique()
|
161
|
+
classes.sort()
|
162
|
+
for c in classes:
|
163
|
+
proba_d[c] = []
|
164
|
+
for i, c in enumerate(classes):
|
165
|
+
proba_d[c] = list(logits.numpy().T[i])
|
166
|
+
probas = pd.DataFrame(proba_d)
|
167
|
+
probas = probas.set_index(self.df_test.index)
|
168
|
+
return probas
|
169
|
+
|
170
|
+
def evaluate(self, model, loader, device):
|
151
171
|
logits = torch.zeros(len(loader.dataset), self.class_num)
|
152
172
|
targets = torch.zeros(len(loader.dataset))
|
153
173
|
model.eval()
|
@@ -169,14 +189,15 @@ class CNN_model(Model):
|
|
169
189
|
self.loss_eval = (np.asarray(losses)).mean()
|
170
190
|
predictions = logits.argmax(dim=1)
|
171
191
|
uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
|
172
|
-
return uar, targets, predictions
|
192
|
+
return uar, targets, predictions, logits
|
173
193
|
|
174
194
|
def predict(self):
|
175
|
-
_, truths, predictions = self.
|
195
|
+
_, truths, predictions, logits = self.evaluate(
|
176
196
|
self.model, self.testloader, self.device
|
177
197
|
)
|
178
|
-
uar, _, _ = self.
|
179
|
-
|
198
|
+
uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
|
199
|
+
probas = self.get_probas(logits)
|
200
|
+
report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
|
180
201
|
try:
|
181
202
|
report.result.loss = self.loss
|
182
203
|
except AttributeError: # if the model was loaded from disk the loss is unknown
|
@@ -189,13 +210,11 @@ class CNN_model(Model):
|
|
189
210
|
return report
|
190
211
|
|
191
212
|
def get_predictions(self):
|
192
|
-
_,
|
193
|
-
self.model, self.testloader, self.device
|
194
|
-
)
|
213
|
+
_, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
|
195
214
|
return predictions.numpy()
|
196
215
|
|
197
216
|
def predict_sample(self, features):
|
198
|
-
"""Predict one sample"""
|
217
|
+
"""Predict one sample."""
|
199
218
|
with torch.no_grad():
|
200
219
|
logits = self.model(torch.from_numpy(features).to(self.device))
|
201
220
|
a = logits.numpy()
|
nkululeko/models/model_mlp.py
CHANGED
@@ -1,25 +1,33 @@
|
|
1
1
|
# model_mlp.py
|
2
|
+
import ast
|
3
|
+
from collections import OrderedDict
|
4
|
+
|
5
|
+
import numpy as np
|
2
6
|
import pandas as pd
|
7
|
+
from sklearn.metrics import recall_score
|
8
|
+
import torch
|
3
9
|
|
4
|
-
from nkululeko.utils.util import Util
|
5
10
|
import nkululeko.glob_conf as glob_conf
|
11
|
+
from nkululeko.losses.loss_softf1loss import SoftF1Loss
|
6
12
|
from nkululeko.models.model import Model
|
7
13
|
from nkululeko.reporting.reporter import Reporter
|
8
|
-
import
|
9
|
-
import ast
|
10
|
-
import numpy as np
|
11
|
-
from sklearn.metrics import recall_score
|
12
|
-
from collections import OrderedDict
|
13
|
-
from nkululeko.losses.loss_softf1loss import SoftF1Loss
|
14
|
+
from nkululeko.utils.util import Util
|
14
15
|
|
15
16
|
|
16
|
-
class
|
17
|
+
class MLPModel(Model):
|
17
18
|
"""MLP = multi layer perceptron."""
|
18
19
|
|
19
20
|
is_classifier = True
|
20
21
|
|
21
22
|
def __init__(self, df_train, df_test, feats_train, feats_test):
|
22
|
-
"""Constructor taking
|
23
|
+
"""Constructor, taking all dataframes.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
df_train (pd.DataFrame): The train labels.
|
27
|
+
df_test (pd.DataFrame): The test labels.
|
28
|
+
feats_train (pd.DataFrame): The train features.
|
29
|
+
feats_test (pd.DataFrame): The test features.
|
30
|
+
"""
|
23
31
|
super().__init__(df_train, df_test, feats_train, feats_test)
|
24
32
|
super().set_model_type("ann")
|
25
33
|
self.name = "mlp"
|
@@ -97,7 +105,7 @@ class MLP_model(Model):
|
|
97
105
|
self.optimizer.step()
|
98
106
|
self.loss = (np.asarray(losses)).mean()
|
99
107
|
|
100
|
-
def
|
108
|
+
def evaluate(self, model, loader, device):
|
101
109
|
logits = torch.zeros(len(loader.dataset), self.class_num)
|
102
110
|
targets = torch.zeros(len(loader.dataset))
|
103
111
|
model.eval()
|
@@ -119,14 +127,28 @@ class MLP_model(Model):
|
|
119
127
|
self.loss_eval = (np.asarray(losses)).mean()
|
120
128
|
predictions = logits.argmax(dim=1)
|
121
129
|
uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
|
122
|
-
return uar, targets, predictions
|
130
|
+
return uar, targets, predictions, logits
|
131
|
+
|
132
|
+
def get_probas(self, logits):
|
133
|
+
# make a dataframe for probabilites (logits)
|
134
|
+
proba_d = {}
|
135
|
+
classes = self.df_test[self.target].unique()
|
136
|
+
classes.sort()
|
137
|
+
for c in classes:
|
138
|
+
proba_d[c] = []
|
139
|
+
for i, c in enumerate(classes):
|
140
|
+
proba_d[c] = list(logits.numpy().T[i])
|
141
|
+
probas = pd.DataFrame(proba_d)
|
142
|
+
probas = probas.set_index(self.df_test.index)
|
143
|
+
return probas
|
123
144
|
|
124
145
|
def predict(self):
|
125
|
-
_, truths, predictions = self.
|
146
|
+
_, truths, predictions, logits = self.evaluate(
|
126
147
|
self.model, self.testloader, self.device
|
127
148
|
)
|
128
|
-
uar, _, _ = self.
|
129
|
-
|
149
|
+
uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
|
150
|
+
probas = self.get_probas(logits)
|
151
|
+
report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
|
130
152
|
try:
|
131
153
|
report.result.loss = self.loss
|
132
154
|
except AttributeError: # if the model was loaded from disk the loss is unknown
|
@@ -139,9 +161,7 @@ class MLP_model(Model):
|
|
139
161
|
return report
|
140
162
|
|
141
163
|
def get_predictions(self):
|
142
|
-
_,
|
143
|
-
self.model, self.testloader, self.device
|
144
|
-
)
|
164
|
+
_, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
|
145
165
|
return predictions.numpy()
|
146
166
|
|
147
167
|
def get_loader(self, df_x, df_y, shuffle):
|
@@ -97,7 +97,9 @@ class MLP_Reg_model(Model):
|
|
97
97
|
self.model, self.testloader, self.device
|
98
98
|
)
|
99
99
|
result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
|
100
|
-
report = Reporter(
|
100
|
+
report = Reporter(
|
101
|
+
truths.numpy(), predictions.numpy(), None, self.run, self.epoch
|
102
|
+
)
|
101
103
|
try:
|
102
104
|
report.result.loss = self.loss
|
103
105
|
except AttributeError: # if the model was loaded from disk the loss is unknown
|
nkululeko/plots.py
CHANGED
@@ -48,7 +48,7 @@ class Plots:
|
|
48
48
|
)
|
49
49
|
ax.set_ylabel(f"number of speakers")
|
50
50
|
ax.set_xlabel("number of samples")
|
51
|
-
self.
|
51
|
+
self.save_plot(
|
52
52
|
ax,
|
53
53
|
"Samples per speaker",
|
54
54
|
f"Samples per speaker ({df_speakers.shape[0]})",
|
@@ -70,9 +70,9 @@ class Plots:
|
|
70
70
|
rot=0,
|
71
71
|
)
|
72
72
|
)
|
73
|
-
ax.set_ylabel(
|
73
|
+
ax.set_ylabel("number of speakers")
|
74
74
|
ax.set_xlabel("number of samples")
|
75
|
-
self.
|
75
|
+
self.save_plot(
|
76
76
|
ax,
|
77
77
|
"Sample value counts",
|
78
78
|
f"Samples per speaker ({df_speakers.shape[0]})",
|
@@ -96,7 +96,7 @@ class Plots:
|
|
96
96
|
binned_data = self.util.continuous_to_categorical(df[class_label])
|
97
97
|
ax = binned_data.value_counts().plot(kind="bar")
|
98
98
|
filename_binned = f"{class_label}_discreet"
|
99
|
-
self.
|
99
|
+
self.save_plot(
|
100
100
|
ax,
|
101
101
|
"Sample value counts",
|
102
102
|
filename_binned,
|
@@ -106,7 +106,7 @@ class Plots:
|
|
106
106
|
dist_type = self.util.config_val("EXPL", "dist_type", "hist")
|
107
107
|
ax = df[class_label].plot(kind=dist_type)
|
108
108
|
|
109
|
-
self.
|
109
|
+
self.save_plot(
|
110
110
|
ax,
|
111
111
|
"Sample value counts",
|
112
112
|
filename,
|
@@ -131,17 +131,17 @@ class Plots:
|
|
131
131
|
df, class_label, att1, self.target, type_s
|
132
132
|
)
|
133
133
|
else:
|
134
|
-
ax, caption = self.
|
134
|
+
ax, caption = self.plotcatcont(
|
135
135
|
df, class_label, att1, att1, type_s
|
136
136
|
)
|
137
137
|
else:
|
138
138
|
if self.util.is_categorical(df[att1]):
|
139
|
-
ax, caption = self.
|
139
|
+
ax, caption = self.plotcatcont(
|
140
140
|
df, att1, class_label, att1, type_s
|
141
141
|
)
|
142
142
|
else:
|
143
143
|
ax, caption = self._plot2cont(df, class_label, att1, type_s)
|
144
|
-
self.
|
144
|
+
self.save_plot(
|
145
145
|
ax,
|
146
146
|
caption,
|
147
147
|
f"Correlation of {self.target} and {att[0]}",
|
@@ -171,15 +171,11 @@ class Plots:
|
|
171
171
|
ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
|
172
172
|
else:
|
173
173
|
# class_label = cat, att1 = cat, att2 = cont
|
174
|
-
ax, caption = self.
|
175
|
-
df, att1, att2, att1, type_s
|
176
|
-
)
|
174
|
+
ax, caption = self.plotcatcont(df, att1, att2, att1, type_s)
|
177
175
|
else:
|
178
176
|
if self.util.is_categorical(df[att2]):
|
179
177
|
# class_label = cat, att1 = cont, att2 = cat
|
180
|
-
ax, caption = self.
|
181
|
-
df, att2, att1, att2, type_s
|
182
|
-
)
|
178
|
+
ax, caption = self.plotcatcont(df, att2, att1, att2, type_s)
|
183
179
|
else:
|
184
180
|
# class_label = cat, att1 = cont, att2 = cont
|
185
181
|
ax, caption = self._plot2cont_cat(
|
@@ -205,7 +201,7 @@ class Plots:
|
|
205
201
|
# class_label = cont, att1 = cont, att2 = cont
|
206
202
|
ax, caption = self._plot2cont(df, att1, att2, type_s)
|
207
203
|
|
208
|
-
self.
|
204
|
+
self.save_plot(
|
209
205
|
ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
|
210
206
|
)
|
211
207
|
|
@@ -215,16 +211,16 @@ class Plots:
|
|
215
211
|
f" {att} has more than 2 values. Perhaps you forgot to state a list of lists?"
|
216
212
|
)
|
217
213
|
|
218
|
-
def
|
214
|
+
def save_plot(self, ax, caption, header, filename, type_s):
|
219
215
|
# one up because of the runs
|
220
216
|
fig_dir = self.util.get_path("fig_dir") + "../"
|
221
|
-
|
217
|
+
fig_plots = ax.figure
|
222
218
|
# avoid warning
|
223
219
|
# plt.tight_layout()
|
224
220
|
img_path = f"{fig_dir}{filename}_{type_s}.{self.format}"
|
225
221
|
plt.savefig(img_path)
|
226
|
-
plt.close(
|
227
|
-
|
222
|
+
plt.close(fig_plots)
|
223
|
+
self.util.debug(f"Saved plot to {img_path}")
|
228
224
|
glob_conf.report.add_item(
|
229
225
|
ReportItem(
|
230
226
|
Header.HEADER_EXPLORE,
|
@@ -244,35 +240,29 @@ class Plots:
|
|
244
240
|
return att, df
|
245
241
|
|
246
242
|
def _plot2cont_cat(self, df, cont1, cont2, cat, ylab):
|
247
|
-
"""
|
248
|
-
plot relation of two continuous distributions with one categorical
|
249
|
-
"""
|
243
|
+
"""Plot relation of two continuous distributions with one categorical."""
|
250
244
|
pearson = stats.pearsonr(df[cont1], df[cont2])
|
251
245
|
# trunc to three digits
|
252
246
|
pearson = int(pearson[0] * 1000) / 1000
|
253
247
|
pearson_string = f"PCC: {pearson}"
|
254
248
|
ax = sns.lmplot(data=df, x=cont1, y=cont2, hue=cat)
|
255
249
|
caption = f"{ylab} {df.shape[0]}. {pearson_string}"
|
256
|
-
ax.
|
250
|
+
ax.figure.suptitle(caption)
|
257
251
|
return ax, caption
|
258
252
|
|
259
253
|
def _plot2cont(self, df, col1, col2, ylab):
|
260
|
-
"""
|
261
|
-
plot relation of two continuous distributions
|
262
|
-
"""
|
254
|
+
"""Plot relation of two continuous distributions."""
|
263
255
|
pearson = stats.pearsonr(df[col1], df[col2])
|
264
256
|
# trunc to three digits
|
265
257
|
pearson = int(pearson[0] * 1000) / 1000
|
266
258
|
pearson_string = f"PCC: {pearson}"
|
267
259
|
ax = sns.lmplot(data=df, x=col1, y=col2)
|
268
260
|
caption = f"{ylab} {df.shape[0]}. {pearson_string}"
|
269
|
-
ax.
|
261
|
+
ax.figure.suptitle(caption)
|
270
262
|
return ax, caption
|
271
263
|
|
272
|
-
def
|
273
|
-
"""
|
274
|
-
plot relation of categorical distribution with continuous
|
275
|
-
"""
|
264
|
+
def plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
|
265
|
+
"""Plot relation of categorical distribution with continuous."""
|
276
266
|
dist_type = self.util.config_val("EXPL", "dist_type", "hist")
|
277
267
|
cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
|
278
268
|
if dist_type == "hist":
|
@@ -287,13 +277,11 @@ class Plots:
|
|
287
277
|
)
|
288
278
|
ax.set(xlabel=f"{cont_col}")
|
289
279
|
caption = f"{ylab} {df.shape[0]}. {cat_str} ({cats}):" f" {es}"
|
290
|
-
ax.
|
280
|
+
ax.figure.suptitle(caption)
|
291
281
|
return ax, caption
|
292
282
|
|
293
283
|
def _plot2cat(self, df, col1, col2, xlab, ylab):
|
294
|
-
"""
|
295
|
-
plot relation of 2 categorical distributions
|
296
|
-
"""
|
284
|
+
"""Plot relation of 2 categorical distributions."""
|
297
285
|
crosstab = pd.crosstab(index=df[col1], columns=df[col2])
|
298
286
|
res_pval = stats.chi2_contingency(crosstab)
|
299
287
|
res_pval = int(res_pval[1] * 1000) / 1000
|
@@ -320,8 +308,8 @@ class Plots:
|
|
320
308
|
max = self.util.to_3_digits(df.duration.max())
|
321
309
|
title = f"Duration distr. for {sample_selection} {df.shape[0]}. min={min}, max={max}"
|
322
310
|
ax.set_title(title)
|
323
|
-
ax.set_xlabel(
|
324
|
-
ax.set_ylabel(
|
311
|
+
ax.set_xlabel("duration")
|
312
|
+
ax.set_ylabel("number of samples")
|
325
313
|
fig = ax.figure
|
326
314
|
# plt.tight_layout()
|
327
315
|
img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
|
nkululeko/reporting/reporter.py
CHANGED
@@ -2,16 +2,21 @@ import ast
|
|
2
2
|
import glob
|
3
3
|
import json
|
4
4
|
import math
|
5
|
+
import os
|
5
6
|
|
6
7
|
from confidence_intervals import evaluate_with_conf_int
|
7
8
|
import matplotlib.pyplot as plt
|
8
9
|
import numpy as np
|
10
|
+
from scipy.special import softmax
|
11
|
+
from scipy.stats import entropy
|
9
12
|
from scipy.stats import pearsonr
|
10
|
-
from sklearn.metrics import ConfusionMatrixDisplay
|
13
|
+
from sklearn.metrics import ConfusionMatrixDisplay
|
14
|
+
from sklearn.metrics import auc
|
11
15
|
from sklearn.metrics import classification_report
|
12
16
|
from sklearn.metrics import confusion_matrix
|
13
17
|
from sklearn.metrics import r2_score
|
14
|
-
from sklearn.metrics import
|
18
|
+
from sklearn.metrics import roc_auc_score
|
19
|
+
from sklearn.metrics import roc_curve
|
15
20
|
from torch import is_tensor
|
16
21
|
|
17
22
|
from audmetric import accuracy
|
@@ -21,6 +26,7 @@ from audmetric import mean_squared_error
|
|
21
26
|
from audmetric import unweighted_average_recall
|
22
27
|
|
23
28
|
import nkululeko.glob_conf as glob_conf
|
29
|
+
from nkululeko.plots import Plots
|
24
30
|
from nkululeko.reporting.defines import Header
|
25
31
|
from nkululeko.reporting.report_item import ReportItem
|
26
32
|
from nkululeko.reporting.result import Result
|
@@ -46,9 +52,18 @@ class Reporter:
|
|
46
52
|
self.MEASURE = "CCC"
|
47
53
|
self.result.measure = self.MEASURE
|
48
54
|
|
49
|
-
def __init__(self, truths, preds, run, epoch):
|
50
|
-
"""Initialization with ground truth und predictions vector.
|
55
|
+
def __init__(self, truths, preds, run, epoch, probas=None):
|
56
|
+
"""Initialization with ground truth und predictions vector.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
truths (list): the ground truth
|
60
|
+
preds (list): the predictions
|
61
|
+
run (int): number of run
|
62
|
+
epoch (int): number of epoch
|
63
|
+
probas (pd.Dataframe, optional): probabilities per class. Defaults to None.
|
64
|
+
"""
|
51
65
|
self.util = Util("reporter")
|
66
|
+
self.probas = probas
|
52
67
|
self.format = self.util.config_val("PLOT", "format", "png")
|
53
68
|
self.truths = np.asarray(truths)
|
54
69
|
self.preds = np.asarray(preds)
|
@@ -108,6 +123,47 @@ class Reporter:
|
|
108
123
|
self.result.test = test_result
|
109
124
|
self.result.set_upper_lower(upper, lower)
|
110
125
|
# train and loss are being set by the model
|
126
|
+
# print out the class probilities
|
127
|
+
|
128
|
+
def print_probabilities(self):
|
129
|
+
"""Print the probabilities per class to a file in the store."""
|
130
|
+
if (
|
131
|
+
self.util.exp_is_classification()
|
132
|
+
and self.probas is not None
|
133
|
+
and "uncertainty" not in self.probas
|
134
|
+
):
|
135
|
+
probas = self.probas
|
136
|
+
probas["predicted"] = self.preds
|
137
|
+
probas["truth"] = self.truths
|
138
|
+
# softmax the probabilities or logits
|
139
|
+
uncertainty = probas.apply(softmax, axis=1)
|
140
|
+
try:
|
141
|
+
le = glob_conf.label_encoder
|
142
|
+
mapping = dict(zip(le.classes_, range(len(le.classes_))))
|
143
|
+
mapping_reverse = {value: key for key, value in mapping.items()}
|
144
|
+
probas = probas.rename(columns=mapping_reverse)
|
145
|
+
probas["predicted"] = probas["predicted"].map(mapping_reverse)
|
146
|
+
probas["truth"] = probas["truth"].map(mapping_reverse)
|
147
|
+
except AttributeError as ae:
|
148
|
+
self.util.debug(f"Can't label categories: {ae}")
|
149
|
+
# compute entropy per sample
|
150
|
+
uncertainty = uncertainty.apply(entropy)
|
151
|
+
# scale it to 0-1
|
152
|
+
max_ent = math.log(len(glob_conf.labels))
|
153
|
+
uncertainty = (uncertainty - uncertainty.min()) / (
|
154
|
+
max_ent - uncertainty.min()
|
155
|
+
)
|
156
|
+
probas["uncertainty"] = uncertainty
|
157
|
+
probas["correct"] = probas.predicted == probas.truth
|
158
|
+
sp = os.path.join(self.util.get_path("store"), "pred_df.csv")
|
159
|
+
self.probas = probas
|
160
|
+
probas.to_csv(sp)
|
161
|
+
self.util.debug(f"Saved probabilities to {sp}")
|
162
|
+
plots = Plots()
|
163
|
+
ax, caption = plots.plotcatcont(
|
164
|
+
probas, "correct", "uncertainty", "uncertainty", "correct"
|
165
|
+
)
|
166
|
+
plots.save_plot(ax, caption, "Uncertainty", "uncertainty", "samples")
|
111
167
|
|
112
168
|
def set_id(self, run, epoch):
|
113
169
|
"""Make the report identifiable with run and epoch index."""
|
@@ -123,6 +179,12 @@ class Reporter:
|
|
123
179
|
self.preds = np.digitize(self.preds, bins) - 1
|
124
180
|
|
125
181
|
def plot_confmatrix(self, plot_name, epoch=None):
|
182
|
+
"""Plot a confusionmatrix to the store.
|
183
|
+
|
184
|
+
Args:
|
185
|
+
plot_name (str): name for the image file.
|
186
|
+
epoch (int, optional): Number of epoch. Defaults to None.
|
187
|
+
"""
|
126
188
|
if not self.util.exp_is_classification():
|
127
189
|
self.continuous_to_categorical()
|
128
190
|
self._plot_confmat(self.truths, self.preds, plot_name, epoch)
|
@@ -212,10 +274,11 @@ class Reporter:
|
|
212
274
|
)
|
213
275
|
img_path = f"{fig_dir}{plot_name}{self.filenameadd}.{self.format}"
|
214
276
|
plt.savefig(img_path)
|
277
|
+
self.util.debug(f"Saved confusion plot to {img_path}")
|
215
278
|
fig.clear()
|
216
279
|
plt.close(fig)
|
217
|
-
plt.
|
218
|
-
plt.
|
280
|
+
plt.close()
|
281
|
+
plt.clf()
|
219
282
|
glob_conf.report.add_item(
|
220
283
|
ReportItem(
|
221
284
|
Header.HEADER_RESULTS,
|
nkululeko/runmanager.py
CHANGED
@@ -11,7 +11,7 @@ from nkululeko.utils.util import Util
|
|
11
11
|
|
12
12
|
|
13
13
|
class Runmanager:
|
14
|
-
"""Class to manage the runs of the experiment (e.g. when results differ caused by random initialization)"""
|
14
|
+
"""Class to manage the runs of the experiment (e.g. when results differ caused by random initialization)."""
|
15
15
|
|
16
16
|
model = None # The underlying model
|
17
17
|
df_train, df_test, feats_train, feats_test = (
|
@@ -23,15 +23,14 @@ class Runmanager:
|
|
23
23
|
reports = []
|
24
24
|
|
25
25
|
def __init__(self, df_train, df_test, feats_train, feats_test):
|
26
|
-
"""Constructor setting up the dataframes
|
26
|
+
"""Constructor setting up the dataframes.
|
27
|
+
|
27
28
|
Args:
|
28
29
|
df_train: train dataframe
|
29
30
|
df_test: test dataframe
|
30
31
|
feats_train: train features
|
31
32
|
feats_train: test features
|
32
33
|
|
33
|
-
Returns:
|
34
|
-
|
35
34
|
"""
|
36
35
|
self.df_train, self.df_test, self.feats_train, self.feats_test = (
|
37
36
|
df_train,
|
@@ -46,7 +45,7 @@ class Runmanager:
|
|
46
45
|
# self._select_model(model_type)
|
47
46
|
|
48
47
|
def do_runs(self):
|
49
|
-
"""Start the runs"""
|
48
|
+
"""Start the runs."""
|
50
49
|
self.best_results = [] # keep the best result per run
|
51
50
|
self.last_epochs = [] # keep the epoch of best result per run
|
52
51
|
# for all runs
|
@@ -105,15 +104,13 @@ class Runmanager:
|
|
105
104
|
)
|
106
105
|
self.print_model(best_report, plot_name)
|
107
106
|
# finally, print out the numbers for this run
|
108
|
-
# self.reports[-1].print_results(
|
109
|
-
# int(self.util.config_val("EXP", "epochs", 1))
|
110
|
-
# )
|
111
107
|
best_report.print_results(best_report.epoch)
|
108
|
+
best_report.print_probabilities()
|
112
109
|
self.best_results.append(best_report)
|
113
110
|
self.last_epochs.append(last_epoch)
|
114
111
|
|
115
112
|
def print_best_result_runs(self):
|
116
|
-
"""Print the best result for all runs"""
|
113
|
+
"""Print the best result for all runs."""
|
117
114
|
best_report = self.get_best_result(self.best_results)
|
118
115
|
self.util.debug(
|
119
116
|
f"best result all runs with run {best_report.run} and"
|
@@ -177,7 +174,7 @@ class Runmanager:
|
|
177
174
|
return self.load_model(best_report)
|
178
175
|
|
179
176
|
def get_best_result(self, reports):
|
180
|
-
best_r = Reporter([], [], 0, 0)
|
177
|
+
best_r = Reporter([], [], None, 0, 0)
|
181
178
|
if self.util.high_is_good():
|
182
179
|
best_r = self.search_best_result(reports, "ascending")
|
183
180
|
else:
|
@@ -185,7 +182,7 @@ class Runmanager:
|
|
185
182
|
return best_r
|
186
183
|
|
187
184
|
def search_best_result(self, reports, order):
|
188
|
-
best_r = Reporter([], [], 0, 0)
|
185
|
+
best_r = Reporter([], [], None, 0, 0)
|
189
186
|
if order == "ascending":
|
190
187
|
best_result = 0
|
191
188
|
for r in reports:
|
nkululeko/test_predictor.py
CHANGED
@@ -56,18 +56,13 @@ class TestPredictor:
|
|
56
56
|
else:
|
57
57
|
test_dbs = ast.literal_eval(glob_conf.config["DATA"]["tests"])
|
58
58
|
test_dbs_string = "_".join(test_dbs)
|
59
|
-
predictions = self.model.get_predictions()
|
59
|
+
predictions, _ = self.model.get_predictions()
|
60
60
|
report = self.model.predict()
|
61
61
|
result = report.result.get_result()
|
62
62
|
report.set_filename_add(f"test-{test_dbs_string}")
|
63
63
|
self.util.print_best_results([report])
|
64
64
|
report.plot_confmatrix(self.util.get_plot_name(), 0)
|
65
65
|
report.print_results(0)
|
66
|
-
# print(predictions)
|
67
|
-
# df = pd.DataFrame(index=self.orig_df.index)
|
68
|
-
# df["speaker"] = self.orig_df["speaker"]
|
69
|
-
# df["gender"] = self.orig_df["gender"]
|
70
|
-
# df[self.target] = self.orig_df[self.target]
|
71
66
|
df = self.orig_df.copy()
|
72
67
|
df["predictions"] = self.label_encoder.inverse_transform(predictions)
|
73
68
|
target = self.util.config_val("DATA", "target", "emotion")
|
nkululeko/utils/stats.py
CHANGED
@@ -70,12 +70,16 @@ def get_effect_size(df, target, variable):
|
|
70
70
|
cats[c] = df[df[target] == c][variable].values
|
71
71
|
combos = all_combinations(categories)
|
72
72
|
results = {}
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
73
|
+
if len(categories) == 1:
|
74
|
+
cat_s = cohens_D_to_string(0)
|
75
|
+
return categories[0], cat_s, 0
|
76
|
+
else:
|
77
|
+
for combo in combos:
|
78
|
+
one = combo[0]
|
79
|
+
other = combo[1]
|
80
|
+
results[f"{one}-{other}"] = cohen_d(cats[one], cats[other])
|
81
|
+
max_cat = max(results, key=results.get)
|
82
|
+
cat_s = cohens_D_to_string(float(results[max_cat]))
|
79
83
|
return max_cat, cat_s, results[max_cat]
|
80
84
|
|
81
85
|
|
@@ -92,7 +96,7 @@ def cohens_D_to_string(val):
|
|
92
96
|
|
93
97
|
|
94
98
|
def normalize(values):
|
95
|
-
"""Do a z-transformation of a distribution.
|
99
|
+
"""Do a z-transformation of a distribution.
|
96
100
|
|
97
101
|
So that mean = 0 and variance = 1
|
98
102
|
"""
|
nkululeko/utils/util.py
CHANGED
@@ -5,15 +5,15 @@ import os.path
|
|
5
5
|
import pickle
|
6
6
|
import sys
|
7
7
|
|
8
|
-
import numpy as np
|
9
|
-
import pandas as pd
|
10
|
-
|
11
8
|
import audeer
|
12
9
|
import audformat
|
10
|
+
import numpy as np
|
11
|
+
import pandas as pd
|
13
12
|
|
14
13
|
|
15
14
|
class Util:
|
16
|
-
# a list of words that need not to be warned upon if default values are
|
15
|
+
# a list of words that need not to be warned upon if default values are
|
16
|
+
# used
|
17
17
|
stopvals = [
|
18
18
|
"all",
|
19
19
|
False,
|
@@ -40,7 +40,8 @@ class Util:
|
|
40
40
|
self.got_data_roots = self.config_val(
|
41
41
|
"DATA", "root_folders", False)
|
42
42
|
if self.got_data_roots:
|
43
|
-
# if there is a global data rootfolder file, read from
|
43
|
+
# if there is a global data rootfolder file, read from
|
44
|
+
# there
|
44
45
|
if not os.path.isfile(self.got_data_roots):
|
45
46
|
self.error(f"no such file: {self.got_data_roots}")
|
46
47
|
self.data_roots = configparser.ConfigParser()
|
@@ -107,16 +108,17 @@ class Util:
|
|
107
108
|
if self.got_data_roots:
|
108
109
|
try:
|
109
110
|
if len(key) > 0:
|
110
|
-
return self.data_roots["DATA"][dataset +
|
111
|
+
return self.data_roots["DATA"][dataset +
|
112
|
+
"." + key].strip("'\"")
|
111
113
|
else:
|
112
114
|
return self.data_roots["DATA"][dataset].strip("'\"")
|
113
115
|
except KeyError:
|
114
|
-
if not
|
116
|
+
if default not in self.stopvals:
|
115
117
|
self.debug(
|
116
|
-
f"value for {key} not found, using default:"
|
117
|
-
|
118
|
+
f"value for {key} not found, using default:"
|
119
|
+
f" {default}")
|
118
120
|
return default
|
119
|
-
if not
|
121
|
+
if default not in self.stopvals:
|
120
122
|
self.debug(
|
121
123
|
f"value for {key} not found, using default: {default}")
|
122
124
|
return default
|
@@ -182,7 +184,7 @@ class Util:
|
|
182
184
|
|
183
185
|
def get_feattype_name(self):
|
184
186
|
"""
|
185
|
-
Get a string as name from all feature sets that are
|
187
|
+
Get a string as name from all feature sets that are used
|
186
188
|
"""
|
187
189
|
return "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
|
188
190
|
|
@@ -205,7 +207,12 @@ class Util:
|
|
205
207
|
def get_model_description(self):
|
206
208
|
mt = ""
|
207
209
|
mt = f'{self.config["MODEL"]["type"]}'
|
208
|
-
ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
|
210
|
+
# ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
|
211
|
+
ft_value = self.config["FEATS"]["type"]
|
212
|
+
if isinstance(ft_value, str) and ft_value.startswith("[") and ft_value.endswith("]"):
|
213
|
+
ft = "_".join(ast.literal_eval(ft_value))
|
214
|
+
else:
|
215
|
+
ft = ft_value
|
209
216
|
ft += "_"
|
210
217
|
layer_string = ""
|
211
218
|
layer_s = self.config_val("MODEL", "layers", False)
|
@@ -230,9 +237,8 @@ class Util:
|
|
230
237
|
["FEATS", "wav2vec2.layer"],
|
231
238
|
]
|
232
239
|
for option in options:
|
233
|
-
return_string += self._get_value_descript(
|
234
|
-
".", "-"
|
235
|
-
)
|
240
|
+
return_string += self._get_value_descript(
|
241
|
+
option[0], option[1]).replace(".", "-")
|
236
242
|
return return_string
|
237
243
|
|
238
244
|
def get_plot_name(self):
|
@@ -286,7 +292,7 @@ class Util:
|
|
286
292
|
try:
|
287
293
|
return ast.literal_eval(self.config[section][key])
|
288
294
|
except KeyError:
|
289
|
-
if not
|
295
|
+
if default not in self.stopvals:
|
290
296
|
self.debug(
|
291
297
|
f"value for {key} not found, using default: {default}")
|
292
298
|
return default
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.87.0
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -51,6 +51,7 @@ Requires-Dist: pylatex
|
|
51
51
|
- [t-SNE plots](#t-sne-plots)
|
52
52
|
- [Data distribution](#data-distribution)
|
53
53
|
- [Bias checking](#bias-checking)
|
54
|
+
- [Uncertainty](#uncertainty)
|
54
55
|
- [Documentation](#documentation)
|
55
56
|
- [Installation](#installation)
|
56
57
|
- [Usage](#usage)
|
@@ -113,6 +114,13 @@ In cases you might wonder if there's bias in your data. You can try to detect th
|
|
113
114
|
|
114
115
|
<img src="meta/images/emotion-pesq.png" width="500px"/>
|
115
116
|
|
117
|
+
### Uncertainty
|
118
|
+
Nkululeko estimates uncertainty of model decision (only for classifiers) with entropy over the class-probabilities or logits per sample.
|
119
|
+
|
120
|
+
<img src="meta/images/uncertainty.png" width="500px"/>
|
121
|
+
|
122
|
+
|
123
|
+
|
116
124
|
## Documentation
|
117
125
|
The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
|
118
126
|
|
@@ -343,6 +351,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
343
351
|
Changelog
|
344
352
|
=========
|
345
353
|
|
354
|
+
Version 0.87.0
|
355
|
+
--------------
|
356
|
+
* added class probability output and uncertainty analysis
|
357
|
+
|
358
|
+
Version 0.86.8
|
359
|
+
--------------
|
360
|
+
* handle single feature sets as strings in the config
|
361
|
+
|
346
362
|
Version 0.86.7
|
347
363
|
--------------
|
348
364
|
* handles now audformat tables where the target is in a file index
|
@@ -2,30 +2,30 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
3
|
nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
6
|
-
nkululeko/demo.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=qVowcvAZL-g-Bsp_4yBCOQDkCoW-S-1wrRG5XgnjnX0,39
|
6
|
+
nkululeko/demo.py,sha256=Sqbu3o6Pzdr_UlYxWM8Mn3l5uCXsw429yJbtkVDUYHU,5087
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
|
9
|
-
nkululeko/experiment.py,sha256=
|
9
|
+
nkululeko/experiment.py,sha256=s9PIjm45dR9yzmHu_69JpBjX9qMVzi5wIgPfMR3F44A,31530
|
10
10
|
nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
|
11
11
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
12
|
-
nkululeko/feature_extractor.py,sha256=
|
12
|
+
nkululeko/feature_extractor.py,sha256=rL-TybLmjZz5uxT9LNTORaDat9FKp_1qloxbyMrinyE,4141
|
13
13
|
nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
|
14
14
|
nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
|
15
15
|
nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
|
16
|
-
nkululeko/modelrunner.py,sha256=
|
16
|
+
nkululeko/modelrunner.py,sha256=rpWQRXERiDZ-i_7CwsqynI87vawtsaPihsonDMPe9PU,11151
|
17
17
|
nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
|
18
18
|
nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
|
19
19
|
nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
|
20
|
-
nkululeko/plots.py,sha256=
|
20
|
+
nkululeko/plots.py,sha256=WsI_dtPKfrYPsKymHRmIhqj33aZzTcE8fF_EwLkm_5A,22899
|
21
21
|
nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
|
22
22
|
nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
|
23
|
-
nkululeko/runmanager.py,sha256=
|
23
|
+
nkululeko/runmanager.py,sha256=eRMJidkoJhkU5NdIKoozv3vovU-8tqfn-7zqr2JZcnE,7533
|
24
24
|
nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
|
25
25
|
nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
|
26
26
|
nkululeko/syllable_nuclei.py,sha256=Sky-C__MeUDaxqHnDl2TGLLYOYvsahD35TUjWGeG31k,10047
|
27
27
|
nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
|
28
|
-
nkululeko/test_predictor.py,sha256=
|
28
|
+
nkululeko/test_predictor.py,sha256=KaGef_r4mXW89f0aUiYDw8IiBe2ciGt14HNkR-S14lU,2985
|
29
29
|
nkululeko/test_pretrain.py,sha256=ZWl-bR6nmeSmXkGAIE6zyfQEjN8Zg0rIxfaS-O6Zbas,8465
|
30
30
|
nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
31
|
nkululeko/augmenting/augmenter.py,sha256=XAt0dpmlnKxqyysqCgV3rcz-pRIvOz7rU7dmGDCVAzs,2905
|
@@ -46,7 +46,7 @@ nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzW
|
|
46
46
|
nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
|
47
47
|
nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
48
|
nkululeko/data/dataset.py,sha256=hUD0NqWCfRaSHG8JNs1MsPb0zjUZAf8FJkg_c0ebq0Q,28046
|
49
|
-
nkululeko/data/dataset_csv.py,sha256=
|
49
|
+
nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo,4588
|
50
50
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
51
|
nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
|
52
52
|
nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
|
@@ -75,15 +75,15 @@ nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
75
75
|
nkululeko/losses/loss_ccc.py,sha256=NOK0y0fxKUnU161B5geap6Fmn8QzoPl2MqtPiV8IuJE,976
|
76
76
|
nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv59HKTCo,1309
|
77
77
|
nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
|
-
nkululeko/models/model.py,sha256=
|
78
|
+
nkululeko/models/model.py,sha256=JXrd0fbU0JhTxUDrs0kOEHF9rtPJBxBeO6zcrHAzk8k,12475
|
79
79
|
nkululeko/models/model_bayes.py,sha256=WJFZ8wFKwWATz6MhmjeZIi1Pal1viU549WL_PjXDSy8,406
|
80
|
-
nkululeko/models/model_cnn.py,sha256=
|
80
|
+
nkululeko/models/model_cnn.py,sha256=NreR2LrKMyBYHyIJEL6wm3UQ4mA5HleZfpUyA5wNYpA,10629
|
81
81
|
nkululeko/models/model_gmm.py,sha256=hZ9UO36KNf48qa3J-xkWIicIj9-TApmt21zNES2vEOs,649
|
82
82
|
nkululeko/models/model_knn.py,sha256=KlnrJfwiVnmXZrAaYGFrKA2f5sznvTzSJQ8-5etOP0k,599
|
83
83
|
nkululeko/models/model_knn_reg.py,sha256=j7YFfVm6xOR2d9yBYdQiwwqYfqkX0JynX_qLCvkr1fk,610
|
84
84
|
nkululeko/models/model_lin_reg.py,sha256=0D7mSnSwK82lNWDMwHYRyq3FmGa6y-DHDGg4qUe85q4,422
|
85
|
-
nkululeko/models/model_mlp.py,sha256=
|
86
|
-
nkululeko/models/model_mlp_regression.py,sha256=
|
85
|
+
nkululeko/models/model_mlp.py,sha256=VE0CI19qMyRbI-THDkMeJ7JbWf4z7CmZ4MMs1FIQgtM,10557
|
86
|
+
nkululeko/models/model_mlp_regression.py,sha256=7oK2zQhhCegSqiBUe6eU7Av8MJ_DPLA9skixJcHaVfg,10232
|
87
87
|
nkululeko/models/model_svm.py,sha256=rsME3KvKvNG7bdE5lbvYUu85WZhaASZxxmdNDIVJRZ4,940
|
88
88
|
nkululeko/models/model_svr.py,sha256=_YZeksqB3eBENGlg3g9RwYFlk9rQQ-XCeNBKLlGGVoE,725
|
89
89
|
nkululeko/models/model_tree.py,sha256=rf16faUm4o2LJgkoYpeY998b8DQIvXZ73_m1IS3TnnE,417
|
@@ -96,17 +96,17 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
|
|
96
96
|
nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
|
97
97
|
nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
|
98
98
|
nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
|
99
|
-
nkululeko/reporting/reporter.py,sha256=
|
99
|
+
nkululeko/reporting/reporter.py,sha256=6zW3PmQrwVJO5orBVA-fiaIhnzGrFymC861DSd8nSjc,16806
|
100
100
|
nkululeko/reporting/result.py,sha256=nSN5or-Py2GPRWHkWpGRh7UCi1W0er7WLEHz8fYLk-A,742
|
101
101
|
nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
102
102
|
nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
|
103
103
|
nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5EvWlcWQ,3301
|
104
104
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
105
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
106
|
-
nkululeko/utils/stats.py,sha256=
|
107
|
-
nkululeko/utils/util.py,sha256=
|
108
|
-
nkululeko-0.
|
109
|
-
nkululeko-0.
|
110
|
-
nkululeko-0.
|
111
|
-
nkululeko-0.
|
112
|
-
nkululeko-0.
|
106
|
+
nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
|
107
|
+
nkululeko/utils/util.py,sha256=ZCS02mE2c3_h9_q4hpsSm4XAooCranqRF_5pY-6055E,14432
|
108
|
+
nkululeko-0.87.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
109
|
+
nkululeko-0.87.0.dist-info/METADATA,sha256=DPO61pORcuEhRsDwB5S5VJ8CK_piJeh-I5kKJc8eNJE,38442
|
110
|
+
nkululeko-0.87.0.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
111
|
+
nkululeko-0.87.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
112
|
+
nkululeko-0.87.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|