nkululeko 0.83.0__py3-none-any.whl → 0.83.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +8 -4
- nkululeko/feat_extract/feats_agender_agender.py +4 -2
- nkululeko/feat_extract/feats_squim.py +8 -3
- nkululeko/feat_extract/feats_wav2vec2.py +8 -7
- nkululeko/feat_extract/feats_whisper.py +6 -3
- nkululeko/models/model_cnn.py +14 -6
- nkululeko/models/model_mlp.py +16 -7
- nkululeko/models/model_mlp_regression.py +15 -7
- nkululeko/nkuluflag.py +19 -6
- nkululeko/plots.py +30 -15
- nkululeko/test.py +20 -15
- nkululeko/test_predictor.py +3 -0
- {nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/METADATA +9 -1
- {nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/RECORD +18 -19
- nkululeko/reporter.py +0 -324
- {nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/LICENSE +0 -0
- {nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/WHEEL +0 -0
- {nkululeko-0.83.0.dist-info → nkululeko-0.83.2.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.83.
|
1
|
+
VERSION="0.83.2"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/experiment.py
CHANGED
@@ -675,12 +675,16 @@ class Experiment:
|
|
675
675
|
test_predictor = TestPredictor(
|
676
676
|
model, self.df_test, self.label_encoder, result_name
|
677
677
|
)
|
678
|
-
test_predictor.predict_and_store()
|
678
|
+
result = test_predictor.predict_and_store()
|
679
|
+
return result
|
679
680
|
|
680
681
|
def load(self, filename):
|
681
|
-
|
682
|
-
|
683
|
-
|
682
|
+
try:
|
683
|
+
f = open(filename, "rb")
|
684
|
+
tmp_dict = pickle.load(f)
|
685
|
+
f.close()
|
686
|
+
except EOFError as eof:
|
687
|
+
self.util.error(f"can't open file {filename}: {eof}")
|
684
688
|
self.__dict__.update(tmp_dict)
|
685
689
|
glob_conf.set_labels(self.labels)
|
686
690
|
|
@@ -28,9 +28,11 @@ class AgenderAgenderSet(Featureset):
|
|
28
28
|
if not os.path.isdir(model_root):
|
29
29
|
cache_root = audeer.mkdir("cache")
|
30
30
|
model_root = audeer.mkdir(model_root)
|
31
|
-
archive_path = audeer.download_url(
|
31
|
+
archive_path = audeer.download_url(
|
32
|
+
model_url, cache_root, verbose=True)
|
32
33
|
audeer.extract_archive(archive_path, model_root)
|
33
|
-
|
34
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
35
|
+
device = self.util.config_val("MODEL", "device", cuda)
|
34
36
|
self.model = audonnx.load(model_root, device=device)
|
35
37
|
# pytorch_total_params = sum(p.numel() for p in self.model.parameters())
|
36
38
|
# self.util.debug(
|
@@ -28,12 +28,17 @@ from nkululeko.utils.util import Util
|
|
28
28
|
|
29
29
|
|
30
30
|
class SquimSet(Featureset):
|
31
|
-
"""Class to predict SQUIM features"""
|
31
|
+
"""Class to predict SQUIM features."""
|
32
32
|
|
33
33
|
def __init__(self, name, data_df, feats_type):
|
34
|
-
"""Constructor.
|
34
|
+
"""Constructor.
|
35
|
+
|
36
|
+
Is_train is needed to distinguish from test/dev sets,
|
37
|
+
because they use the codebook from the training.
|
38
|
+
"""
|
35
39
|
super().__init__(name, data_df, feats_type)
|
36
|
-
|
40
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
41
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
37
42
|
self.model_initialized = False
|
38
43
|
|
39
44
|
def init_model(self):
|
@@ -21,7 +21,11 @@ class Wav2vec2(Featureset):
|
|
21
21
|
"""Class to extract wav2vec2 embeddings"""
|
22
22
|
|
23
23
|
def __init__(self, name, data_df, feat_type):
|
24
|
-
"""Constructor.
|
24
|
+
"""Constructor.
|
25
|
+
|
26
|
+
If_train is needed to distinguish from test/dev sets,
|
27
|
+
because they use the codebook from the training
|
28
|
+
"""
|
25
29
|
super().__init__(name, data_df, feat_type)
|
26
30
|
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
27
31
|
self.device = self.util.config_val("MODEL", "device", cuda)
|
@@ -39,8 +43,7 @@ class Wav2vec2(Featureset):
|
|
39
43
|
)
|
40
44
|
config = transformers.AutoConfig.from_pretrained(model_path)
|
41
45
|
layer_num = config.num_hidden_layers
|
42
|
-
hidden_layer = int(self.util.config_val(
|
43
|
-
"FEATS", "wav2vec2.layer", "0"))
|
46
|
+
hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
|
44
47
|
config.num_hidden_layers = layer_num - hidden_layer
|
45
48
|
self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
|
46
49
|
self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
|
@@ -55,8 +58,7 @@ class Wav2vec2(Featureset):
|
|
55
58
|
"""Extract the features or load them from disk if present."""
|
56
59
|
store = self.util.get_path("store")
|
57
60
|
storage = f"{store}{self.name}.pkl"
|
58
|
-
extract = self.util.config_val(
|
59
|
-
"FEATS", "needs_feature_extraction", False)
|
61
|
+
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
|
60
62
|
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
61
63
|
if extract or no_reuse or not os.path.isfile(storage):
|
62
64
|
if not self.model_initialized:
|
@@ -77,8 +79,7 @@ class Wav2vec2(Featureset):
|
|
77
79
|
emb = self.get_embeddings(signal, sampling_rate, file)
|
78
80
|
emb_series[idx] = emb
|
79
81
|
# print(f"emb_series shape: {emb_series.shape}")
|
80
|
-
self.df = pd.DataFrame(
|
81
|
-
emb_series.values.tolist(), index=self.data_df.index)
|
82
|
+
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
|
82
83
|
# print(f"df shape: {self.df.shape}")
|
83
84
|
self.df.to_pickle(storage)
|
84
85
|
try:
|
@@ -32,19 +32,22 @@ class Whisper(Featureset):
|
|
32
32
|
model_name = f"openai/{self.feat_type}"
|
33
33
|
self.model = WhisperModel.from_pretrained(model_name).to(self.device)
|
34
34
|
print(f"intialized Whisper model on {self.device}")
|
35
|
-
self.feature_extractor = AutoFeatureExtractor.from_pretrained(
|
35
|
+
self.feature_extractor = AutoFeatureExtractor.from_pretrained(
|
36
|
+
model_name)
|
36
37
|
self.model_initialized = True
|
37
38
|
|
38
39
|
def extract(self):
|
39
40
|
"""Extract the features or load them from disk if present."""
|
40
41
|
store = self.util.get_path("store")
|
41
42
|
storage = f"{store}{self.name}.pkl"
|
42
|
-
extract = self.util.config_val(
|
43
|
+
extract = self.util.config_val(
|
44
|
+
"FEATS", "needs_feature_extraction", False)
|
43
45
|
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
44
46
|
if extract or no_reuse or not os.path.isfile(storage):
|
45
47
|
if not self.model_initialized:
|
46
48
|
self.init_model()
|
47
|
-
self.util.debug(
|
49
|
+
self.util.debug(
|
50
|
+
"extracting whisper embeddings, this might take a while...")
|
48
51
|
emb_series = []
|
49
52
|
for (file, start, end), _ in audeer.progress_bar(
|
50
53
|
self.data_df.iterrows(),
|
nkululeko/models/model_cnn.py
CHANGED
@@ -16,6 +16,7 @@ import numpy as np
|
|
16
16
|
from sklearn.metrics import recall_score
|
17
17
|
from collections import OrderedDict
|
18
18
|
from PIL import Image
|
19
|
+
from traitlets import default
|
19
20
|
|
20
21
|
from nkululeko.utils.util import Util
|
21
22
|
import nkululeko.glob_conf as glob_conf
|
@@ -48,6 +49,7 @@ class CNN_model(Model):
|
|
48
49
|
self.util.error(f"unknown loss function: {criterion}")
|
49
50
|
self.util.debug(f"using model with cross entropy loss function")
|
50
51
|
# set up the model
|
52
|
+
# cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
51
53
|
self.device = self.util.config_val("MODEL", "device", "cpu")
|
52
54
|
try:
|
53
55
|
layers_string = glob_conf.config["MODEL"]["layers"]
|
@@ -84,7 +86,8 @@ class CNN_model(Model):
|
|
84
86
|
train_set = self.Dataset_image(
|
85
87
|
feats_train, df_train, self.target, transformations
|
86
88
|
)
|
87
|
-
test_set = self.Dataset_image(
|
89
|
+
test_set = self.Dataset_image(
|
90
|
+
feats_test, df_test, self.target, transformations)
|
88
91
|
# Define data loaders
|
89
92
|
self.trainloader = torch.utils.data.DataLoader(
|
90
93
|
train_set,
|
@@ -137,7 +140,8 @@ class CNN_model(Model):
|
|
137
140
|
losses = []
|
138
141
|
for images, labels in self.trainloader:
|
139
142
|
logits = self.model(images.to(self.device))
|
140
|
-
loss = self.criterion(logits, labels.to(
|
143
|
+
loss = self.criterion(logits, labels.to(
|
144
|
+
self.device, dtype=torch.int64))
|
141
145
|
losses.append(loss.item())
|
142
146
|
self.optimizer.zero_grad()
|
143
147
|
loss.backward()
|
@@ -165,14 +169,16 @@ class CNN_model(Model):
|
|
165
169
|
|
166
170
|
self.loss_eval = (np.asarray(losses)).mean()
|
167
171
|
predictions = logits.argmax(dim=1)
|
168
|
-
uar = recall_score(
|
172
|
+
uar = recall_score(
|
173
|
+
targets.numpy(), predictions.numpy(), average="macro")
|
169
174
|
return uar, targets, predictions
|
170
175
|
|
171
176
|
def predict(self):
|
172
177
|
_, truths, predictions = self.evaluate_model(
|
173
178
|
self.model, self.testloader, self.device
|
174
179
|
)
|
175
|
-
uar, _, _ = self.evaluate_model(
|
180
|
+
uar, _, _ = self.evaluate_model(
|
181
|
+
self.model, self.trainloader, self.device)
|
176
182
|
report = Reporter(truths, predictions, self.run, self.epoch)
|
177
183
|
try:
|
178
184
|
report.result.loss = self.loss
|
@@ -209,7 +215,8 @@ class CNN_model(Model):
|
|
209
215
|
dir = self.util.get_path("model_dir")
|
210
216
|
# name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
|
211
217
|
name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
|
212
|
-
|
218
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
219
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
213
220
|
layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
|
214
221
|
self.store_path = dir + name
|
215
222
|
drop = self.util.config_val("MODEL", "drop", False)
|
@@ -222,7 +229,8 @@ class CNN_model(Model):
|
|
222
229
|
def load_path(self, path, run, epoch):
|
223
230
|
self.set_id(run, epoch)
|
224
231
|
with open(path, "rb") as handle:
|
225
|
-
|
232
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
233
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
226
234
|
layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
|
227
235
|
self.store_path = path
|
228
236
|
drop = self.util.config_val("MODEL", "drop", False)
|
nkululeko/models/model_mlp.py
CHANGED
@@ -34,8 +34,9 @@ class MLP_model(Model):
|
|
34
34
|
else:
|
35
35
|
self.util.error(f"unknown loss function: {criterion}")
|
36
36
|
self.util.debug(f"using model with cross entropy loss function")
|
37
|
-
# set up the model
|
38
|
-
|
37
|
+
# set up the model, use GPU if availabe
|
38
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
39
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
39
40
|
try:
|
40
41
|
layers_string = glob_conf.config["MODEL"]["layers"]
|
41
42
|
except KeyError as ke:
|
@@ -86,7 +87,8 @@ class MLP_model(Model):
|
|
86
87
|
losses = []
|
87
88
|
for features, labels in self.trainloader:
|
88
89
|
logits = self.model(features.to(self.device))
|
89
|
-
loss = self.criterion(logits, labels.to(
|
90
|
+
loss = self.criterion(logits, labels.to(
|
91
|
+
self.device, dtype=torch.int64))
|
90
92
|
losses.append(loss.item())
|
91
93
|
self.optimizer.zero_grad()
|
92
94
|
loss.backward()
|
@@ -114,14 +116,16 @@ class MLP_model(Model):
|
|
114
116
|
|
115
117
|
self.loss_eval = (np.asarray(losses)).mean()
|
116
118
|
predictions = logits.argmax(dim=1)
|
117
|
-
uar = recall_score(
|
119
|
+
uar = recall_score(
|
120
|
+
targets.numpy(), predictions.numpy(), average="macro")
|
118
121
|
return uar, targets, predictions
|
119
122
|
|
120
123
|
def predict(self):
|
121
124
|
_, truths, predictions = self.evaluate_model(
|
122
125
|
self.model, self.testloader, self.device
|
123
126
|
)
|
124
|
-
uar, _, _ = self.evaluate_model(
|
127
|
+
uar, _, _ = self.evaluate_model(
|
128
|
+
self.model, self.trainloader, self.device)
|
125
129
|
report = Reporter(truths, predictions, self.run, self.epoch)
|
126
130
|
try:
|
127
131
|
report.result.loss = self.loss
|
@@ -179,6 +183,9 @@ class MLP_model(Model):
|
|
179
183
|
features = np.reshape(features, (-1, 1)).T
|
180
184
|
logits = self.model(features.to(self.device))
|
181
185
|
# logits = self.model(features)
|
186
|
+
# if tensor conver to cpu
|
187
|
+
if isinstance(logits, torch.Tensor):
|
188
|
+
logits = logits.cpu()
|
182
189
|
a = logits.numpy()
|
183
190
|
res = {}
|
184
191
|
for i in range(len(a[0])):
|
@@ -196,7 +203,8 @@ class MLP_model(Model):
|
|
196
203
|
dir = self.util.get_path("model_dir")
|
197
204
|
# name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
|
198
205
|
name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
|
199
|
-
|
206
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
207
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
200
208
|
layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
|
201
209
|
self.store_path = dir + name
|
202
210
|
drop = self.util.config_val("MODEL", "drop", False)
|
@@ -211,7 +219,8 @@ class MLP_model(Model):
|
|
211
219
|
def load_path(self, path, run, epoch):
|
212
220
|
self.set_id(run, epoch)
|
213
221
|
with open(path, "rb") as handle:
|
214
|
-
|
222
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
223
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
215
224
|
layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
|
216
225
|
self.store_path = path
|
217
226
|
drop = self.util.config_val("MODEL", "drop", False)
|
@@ -9,6 +9,7 @@ import torch
|
|
9
9
|
from audmetric import concordance_cc
|
10
10
|
from audmetric import mean_absolute_error
|
11
11
|
from audmetric import mean_squared_error
|
12
|
+
from traitlets import default
|
12
13
|
|
13
14
|
import nkululeko.glob_conf as glob_conf
|
14
15
|
from nkululeko.losses.loss_ccc import ConcordanceCorCoeff
|
@@ -40,7 +41,8 @@ class MLP_Reg_model(Model):
|
|
40
41
|
self.util.error(f"unknown loss function: {criterion}")
|
41
42
|
self.util.debug(f"training model with {criterion} loss function")
|
42
43
|
# set up the model
|
43
|
-
|
44
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
45
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
44
46
|
layers_string = glob_conf.config["MODEL"]["layers"]
|
45
47
|
self.util.debug(f"using layers {layers_string}")
|
46
48
|
try:
|
@@ -50,7 +52,8 @@ class MLP_Reg_model(Model):
|
|
50
52
|
drop = self.util.config_val("MODEL", "drop", False)
|
51
53
|
if drop:
|
52
54
|
self.util.debug(f"training with dropout: {drop}")
|
53
|
-
self.model = self.MLP(
|
55
|
+
self.model = self.MLP(
|
56
|
+
feats_train.shape[1], layers, 1, drop).to(self.device)
|
54
57
|
self.learning_rate = float(
|
55
58
|
self.util.config_val("MODEL", "learning_rate", 0.0001)
|
56
59
|
)
|
@@ -93,8 +96,10 @@ class MLP_Reg_model(Model):
|
|
93
96
|
_, truths, predictions = self.evaluate_model(
|
94
97
|
self.model, self.testloader, self.device
|
95
98
|
)
|
96
|
-
result, _, _ = self.evaluate_model(
|
97
|
-
|
99
|
+
result, _, _ = self.evaluate_model(
|
100
|
+
self.model, self.trainloader, self.device)
|
101
|
+
report = Reporter(truths.numpy(), predictions.numpy(),
|
102
|
+
self.run, self.epoch)
|
98
103
|
try:
|
99
104
|
report.result.loss = self.loss
|
100
105
|
except AttributeError: # if the model was loaded from disk the loss is unknown
|
@@ -128,9 +133,11 @@ class MLP_Reg_model(Model):
|
|
128
133
|
|
129
134
|
def __getitem__(self, item):
|
130
135
|
index = self.df.index[item]
|
131
|
-
features = self.df_features.loc[index, :].values.astype(
|
136
|
+
features = self.df_features.loc[index, :].values.astype(
|
137
|
+
"float32").squeeze()
|
132
138
|
labels = (
|
133
|
-
np.array([self.df.loc[index, self.label]]
|
139
|
+
np.array([self.df.loc[index, self.label]]
|
140
|
+
).astype("float32").squeeze()
|
134
141
|
)
|
135
142
|
return features, labels
|
136
143
|
|
@@ -187,7 +194,8 @@ class MLP_Reg_model(Model):
|
|
187
194
|
end_index = (index + 1) * loader.batch_size
|
188
195
|
if end_index > len(loader.dataset):
|
189
196
|
end_index = len(loader.dataset)
|
190
|
-
logits[start_index:end_index] = model(
|
197
|
+
logits[start_index:end_index] = model(
|
198
|
+
features.to(device)).reshape(-1)
|
191
199
|
targets[start_index:end_index] = labels
|
192
200
|
loss = self.criterion(
|
193
201
|
logits[start_index:end_index].to(
|
nkululeko/nkuluflag.py
CHANGED
@@ -2,13 +2,16 @@ import argparse
|
|
2
2
|
import configparser
|
3
3
|
import os
|
4
4
|
import os.path
|
5
|
+
import sys
|
5
6
|
|
6
7
|
from nkululeko.nkululeko import doit as nkulu
|
8
|
+
from nkululeko.test import do_it as test_mod
|
7
9
|
|
8
10
|
|
9
|
-
def
|
11
|
+
def doit(cla):
|
10
12
|
parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
|
11
13
|
parser.add_argument("--config", help="The base configuration")
|
14
|
+
parser.add_argument("--mod", default="nkulu", help="Which nkululeko module to call")
|
12
15
|
parser.add_argument("--data", help="The databases", nargs="*", action="append")
|
13
16
|
parser.add_argument(
|
14
17
|
"--label", nargs="*", help="The labels for the target", action="append"
|
@@ -25,20 +28,23 @@ def do_it(src_dir):
|
|
25
28
|
parser.add_argument("--model", default="xgb", help="The model type")
|
26
29
|
parser.add_argument("--feat", default="['os']", help="The feature type")
|
27
30
|
parser.add_argument("--set", help="The opensmile set")
|
28
|
-
parser.add_argument("--with_os", help="To add os features")
|
29
31
|
parser.add_argument("--target", help="The target designation")
|
30
32
|
parser.add_argument("--epochs", help="The number of epochs")
|
31
33
|
parser.add_argument("--runs", help="The number of runs")
|
32
34
|
parser.add_argument("--learning_rate", help="The learning rate")
|
33
35
|
parser.add_argument("--drop", help="The dropout rate [0:1]")
|
34
36
|
|
35
|
-
args = parser.parse_args()
|
37
|
+
args = parser.parse_args(cla)
|
36
38
|
|
37
39
|
if args.config is not None:
|
38
40
|
config_file = args.config
|
39
41
|
else:
|
40
42
|
print("ERROR: need config file")
|
41
43
|
quit(-1)
|
44
|
+
|
45
|
+
if args.mod is not None:
|
46
|
+
nkulu_mod = args.mod
|
47
|
+
|
42
48
|
# test if config is there
|
43
49
|
if not os.path.isfile(config_file):
|
44
50
|
print(f"ERROR: no such file {config_file}")
|
@@ -86,10 +92,17 @@ def do_it(src_dir):
|
|
86
92
|
with open(tmp_config, "w") as tmp_file:
|
87
93
|
config.write(tmp_file)
|
88
94
|
|
89
|
-
result, last_epoch =
|
95
|
+
result, last_epoch = 0, 0
|
96
|
+
if nkulu_mod == "nkulu":
|
97
|
+
result, last_epoch = nkulu(tmp_config)
|
98
|
+
elif nkulu_mod == "test":
|
99
|
+
result, last_epoch = test_mod(tmp_config, "test_results.csv")
|
100
|
+
else:
|
101
|
+
print(f"ERROR: unknown module: {nkulu_mod}, should be [nkulu | test]")
|
90
102
|
return result, last_epoch
|
91
103
|
|
92
104
|
|
93
105
|
if __name__ == "__main__":
|
94
|
-
|
95
|
-
|
106
|
+
cla = sys.argv
|
107
|
+
cla.pop(0)
|
108
|
+
doit(cla) # sys.argv[1])
|
nkululeko/plots.py
CHANGED
@@ -28,7 +28,8 @@ class Plots:
|
|
28
28
|
df_speaker["samplenum"] = df_speaker.shape[0]
|
29
29
|
df_speakers = pd.concat([df_speakers, df_speaker.head(1)])
|
30
30
|
# plot the distribution of samples per speaker
|
31
|
-
|
31
|
+
# one up because of the runs
|
32
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
32
33
|
self.util.debug(f"plotting samples per speaker")
|
33
34
|
if "gender" in df_speakers:
|
34
35
|
filename = f"samples_value_counts"
|
@@ -137,7 +138,8 @@ class Plots:
|
|
137
138
|
df, att1, class_label, att1, type_s
|
138
139
|
)
|
139
140
|
else:
|
140
|
-
ax, caption = self._plot2cont(
|
141
|
+
ax, caption = self._plot2cont(
|
142
|
+
df, class_label, att1, type_s)
|
141
143
|
self._save_plot(
|
142
144
|
ax,
|
143
145
|
caption,
|
@@ -150,7 +152,8 @@ class Plots:
|
|
150
152
|
att1 = att[0]
|
151
153
|
att2 = att[1]
|
152
154
|
if att1 == self.target or att2 == self.target:
|
153
|
-
self.util.debug(
|
155
|
+
self.util.debug(
|
156
|
+
f"no need to correlate {self.target} with itself")
|
154
157
|
return
|
155
158
|
if att1 not in df:
|
156
159
|
self.util.error(f"unknown feature: {att1}")
|
@@ -165,7 +168,8 @@ class Plots:
|
|
165
168
|
if self.util.is_categorical(df[att1]):
|
166
169
|
if self.util.is_categorical(df[att2]):
|
167
170
|
# class_label = cat, att1 = cat, att2 = cat
|
168
|
-
ax, caption = self._plot2cat(
|
171
|
+
ax, caption = self._plot2cat(
|
172
|
+
df, att1, att2, att1, type_s)
|
169
173
|
else:
|
170
174
|
# class_label = cat, att1 = cat, att2 = cont
|
171
175
|
ax, caption = self._plotcatcont(
|
@@ -186,7 +190,8 @@ class Plots:
|
|
186
190
|
if self.util.is_categorical(df[att1]):
|
187
191
|
if self.util.is_categorical(df[att2]):
|
188
192
|
# class_label = cont, att1 = cat, att2 = cat
|
189
|
-
ax, caption = self._plot2cat(
|
193
|
+
ax, caption = self._plot2cat(
|
194
|
+
df, att1, att2, att1, type_s)
|
190
195
|
else:
|
191
196
|
# class_label = cont, att1 = cat, att2 = cont
|
192
197
|
ax, caption = self._plot2cont_cat(
|
@@ -200,7 +205,8 @@ class Plots:
|
|
200
205
|
)
|
201
206
|
else:
|
202
207
|
# class_label = cont, att1 = cont, att2 = cont
|
203
|
-
ax, caption = self._plot2cont(
|
208
|
+
ax, caption = self._plot2cont(
|
209
|
+
df, att1, att2, type_s)
|
204
210
|
|
205
211
|
self._save_plot(
|
206
212
|
ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
|
@@ -213,7 +219,8 @@ class Plots:
|
|
213
219
|
)
|
214
220
|
|
215
221
|
def _save_plot(self, ax, caption, header, filename, type_s):
|
216
|
-
|
222
|
+
# one up because of the runs
|
223
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
217
224
|
fig = ax.figure
|
218
225
|
# avoid warning
|
219
226
|
# plt.tight_layout()
|
@@ -231,7 +238,8 @@ class Plots:
|
|
231
238
|
)
|
232
239
|
|
233
240
|
def _check_binning(self, att, df):
|
234
|
-
bin_reals_att = eval(self.util.config_val(
|
241
|
+
bin_reals_att = eval(self.util.config_val(
|
242
|
+
"EXPL", f"{att}.bin_reals", "False"))
|
235
243
|
if bin_reals_att:
|
236
244
|
self.util.debug(f"binning continuous variable {att} to categories")
|
237
245
|
att_new = f"{att}_binned"
|
@@ -305,7 +313,8 @@ class Plots:
|
|
305
313
|
return ax, caption
|
306
314
|
|
307
315
|
def plot_durations(self, df, filename, sample_selection, caption=""):
|
308
|
-
|
316
|
+
# one up because of the runs
|
317
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
309
318
|
try:
|
310
319
|
ax = sns.histplot(df, x="duration", hue="class_label", kde=True)
|
311
320
|
except AttributeError as ae:
|
@@ -333,7 +342,8 @@ class Plots:
|
|
333
342
|
|
334
343
|
def describe_df(self, name, df, target, filename):
|
335
344
|
"""Make a stacked barplot of samples and speakers per sex and target values. speaker, gender and target columns must be present"""
|
336
|
-
fig_dir = self.util.get_path(
|
345
|
+
fig_dir = self.util.get_path(
|
346
|
+
"fig_dir") + "../" # one up because of the runs
|
337
347
|
sampl_num = df.shape[0]
|
338
348
|
sex_col = "gender"
|
339
349
|
if target == "gender":
|
@@ -380,8 +390,10 @@ class Plots:
|
|
380
390
|
|
381
391
|
def scatter_plot(self, feats, label_df, label, dimred_type):
|
382
392
|
dim_num = int(self.util.config_val("EXPL", "scatter.dim", 2))
|
383
|
-
|
384
|
-
|
393
|
+
# one up because of the runs
|
394
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
395
|
+
sample_selection = self.util.config_val(
|
396
|
+
"EXPL", "sample_selection", "all")
|
385
397
|
filename = f"{label}_{self.util.get_feattype_name()}_{sample_selection}_{dimred_type}_{str(dim_num)}d"
|
386
398
|
filename = f"{fig_dir}{filename}.{self.format}"
|
387
399
|
self.util.debug(f"computing {dimred_type}, this might take a while...")
|
@@ -423,7 +435,8 @@ class Plots:
|
|
423
435
|
|
424
436
|
if dim_num == 2:
|
425
437
|
plot_data = np.vstack((data.T, labels)).T
|
426
|
-
plot_df = pd.DataFrame(
|
438
|
+
plot_df = pd.DataFrame(
|
439
|
+
data=plot_data, columns=("Dim_1", "Dim_2", "label"))
|
427
440
|
# plt.tight_layout()
|
428
441
|
ax = (
|
429
442
|
sns.FacetGrid(plot_df, hue="label", height=6)
|
@@ -515,7 +528,8 @@ class Plots:
|
|
515
528
|
def plot_feature(self, title, feature, label, df_labels, df_features):
|
516
529
|
# remove fullstops in the name
|
517
530
|
feature_name = feature.replace(".", "-")
|
518
|
-
|
531
|
+
# one up because of the runs
|
532
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
519
533
|
filename = f"{fig_dir}feat_dist_{title}_{feature_name}.{self.format}"
|
520
534
|
if self.util.is_categorical(df_labels[label]):
|
521
535
|
df_plot = pd.DataFrame(
|
@@ -554,7 +568,8 @@ class Plots:
|
|
554
568
|
tree.plot_tree(model, feature_names=list(features.columns), ax=ax)
|
555
569
|
# plt.tight_layout()
|
556
570
|
# print(ax)
|
557
|
-
|
571
|
+
# one up because of the runs
|
572
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
558
573
|
exp_name = self.util.get_exp_name(only_data=True)
|
559
574
|
format = self.util.config_val("PLOT", "format", "png")
|
560
575
|
filename = f"{fig_dir}{exp_name}EXPL_tree-plot.{format}"
|
nkululeko/test.py
CHANGED
@@ -10,20 +10,7 @@ from nkululeko.experiment import Experiment
|
|
10
10
|
from nkululeko.utils.util import Util
|
11
11
|
|
12
12
|
|
13
|
-
def
|
14
|
-
parser = argparse.ArgumentParser(
|
15
|
-
description="Call the nkululeko TEST framework.")
|
16
|
-
parser.add_argument("--config", default="exp.ini",
|
17
|
-
help="The base configuration")
|
18
|
-
parser.add_argument(
|
19
|
-
"--outfile",
|
20
|
-
default="my_results.csv",
|
21
|
-
help="File name to store the predictions",
|
22
|
-
)
|
23
|
-
|
24
|
-
args = parser.parse_args()
|
25
|
-
|
26
|
-
config_file = args.config
|
13
|
+
def do_it(config_file, outfile):
|
27
14
|
|
28
15
|
# test if the configuration file exists
|
29
16
|
if not os.path.isfile(config_file):
|
@@ -48,10 +35,28 @@ def main(src_dir):
|
|
48
35
|
expr.load(f"{util.get_save_name()}")
|
49
36
|
expr.fill_tests()
|
50
37
|
expr.extract_test_feats()
|
51
|
-
expr.predict_test_and_save(
|
38
|
+
result = expr.predict_test_and_save(outfile)
|
52
39
|
|
53
40
|
print("DONE")
|
54
41
|
|
42
|
+
return result, 0
|
43
|
+
|
44
|
+
|
45
|
+
def main(src_dir):
|
46
|
+
parser = argparse.ArgumentParser(description="Call the nkululeko TEST framework.")
|
47
|
+
parser.add_argument("--config", default="exp.ini", help="The base configuration")
|
48
|
+
parser.add_argument(
|
49
|
+
"--outfile",
|
50
|
+
default="my_results.csv",
|
51
|
+
help="File name to store the predictions",
|
52
|
+
)
|
53
|
+
args = parser.parse_args()
|
54
|
+
if args.config is not None:
|
55
|
+
config_file = args.config
|
56
|
+
else:
|
57
|
+
config_file = f"{src_dir}/exp.ini"
|
58
|
+
do_it(config_file, args.outfile)
|
59
|
+
|
55
60
|
|
56
61
|
if __name__ == "__main__":
|
57
62
|
cwd = os.path.dirname(os.path.abspath(__file__))
|
nkululeko/test_predictor.py
CHANGED
@@ -29,6 +29,7 @@ class TestPredictor:
|
|
29
29
|
|
30
30
|
def predict_and_store(self):
|
31
31
|
label_data = self.util.config_val("DATA", "label_data", False)
|
32
|
+
result = 0
|
32
33
|
if label_data:
|
33
34
|
data = Dataset(label_data)
|
34
35
|
data.load()
|
@@ -57,6 +58,7 @@ class TestPredictor:
|
|
57
58
|
test_dbs_string = "_".join(test_dbs)
|
58
59
|
predictions = self.model.get_predictions()
|
59
60
|
report = self.model.predict()
|
61
|
+
result = report.result.get_result()
|
60
62
|
report.set_filename_add(f"test-{test_dbs_string}")
|
61
63
|
self.util.print_best_results([report])
|
62
64
|
report.plot_confmatrix(self.util.get_plot_name(), 0)
|
@@ -74,3 +76,4 @@ class TestPredictor:
|
|
74
76
|
df = df.rename(columns={"class_label": target})
|
75
77
|
df.to_csv(self.name)
|
76
78
|
self.util.debug(f"results stored in {self.name}")
|
79
|
+
return result
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.83.
|
3
|
+
Version: 0.83.2
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -333,6 +333,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
333
333
|
Changelog
|
334
334
|
=========
|
335
335
|
|
336
|
+
Version 0.83.2
|
337
|
+
--------------
|
338
|
+
* added default cuda if present and not stated
|
339
|
+
|
340
|
+
Version 0.83.1
|
341
|
+
--------------
|
342
|
+
* add test module to nkuluflag
|
343
|
+
|
336
344
|
Version 0.83.0
|
337
345
|
--------------
|
338
346
|
* test module now prints out reports
|
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
3
|
nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=VE94aCLZ8N-hTKIgb4OLo1s9l_Fxncl9iTNis0eotFw,39
|
6
6
|
nkululeko/demo.py,sha256=55kNFA2helMhOxD4yZuKg1JWDtlUUpxm-6uAnroIydI,3264
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=-ggSHc3DXxRzjzcGB4qFBOMvKsfUdTkkde50BDrS9dA,4755
|
9
|
-
nkululeko/experiment.py,sha256=
|
9
|
+
nkululeko/experiment.py,sha256=WyLiOJ_VxlaXoS1cwXruzYV9OESMjjedcFNreKE1Z8I,29728
|
10
10
|
nkululeko/explore.py,sha256=2wdoGRqldvsN1zCiWk0quSDgHHHUoF2UZOWQ1r-2OLM,2310
|
11
11
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
12
12
|
nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
|
@@ -15,18 +15,17 @@ nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
|
|
15
15
|
nkululeko/glob_conf.py,sha256=iHiVSxDYgmYwdx6z0HuGUMSWrfZfufPHxHb60q2dLRY,453
|
16
16
|
nkululeko/modelrunner.py,sha256=GwDXcE2gDQXat4W0-HhHQ1BcUNCRBXMBQ4QycfHp_5c,9288
|
17
17
|
nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
|
18
|
-
nkululeko/nkuluflag.py,sha256=
|
18
|
+
nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
|
19
19
|
nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
|
20
|
-
nkululeko/plots.py,sha256=
|
20
|
+
nkululeko/plots.py,sha256=nd9tF_61DyAx7oGZF8gTrHXazkgFjFe4eClxu1nQ_XU,23276
|
21
21
|
nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
|
22
|
-
nkululeko/reporter.py,sha256=8mlIaKep4hM-tdRv8t98tK80rx3zOmVGXSORhiPc3as,12483
|
23
22
|
nkululeko/resample.py,sha256=3WbxkwgyTe_fW38046Rjxk3knOkFdhqn2C4nfhbUurQ,2287
|
24
23
|
nkululeko/runmanager.py,sha256=eTM1DNQKt1lxYhzt4vZyZluPXW9sWlIJHNQzex4lkJU,7624
|
25
24
|
nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
|
26
25
|
nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
|
27
26
|
nkululeko/syllable_nuclei.py,sha256=Sky-C__MeUDaxqHnDl2TGLLYOYvsahD35TUjWGeG31k,10047
|
28
|
-
nkululeko/test.py,sha256=
|
29
|
-
nkululeko/test_predictor.py,sha256=
|
27
|
+
nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
|
28
|
+
nkululeko/test_predictor.py,sha256=_w5J8CxH6hmW3mLTKbdfmywl5QpdNAnW1Y8TE5GtlfE,3237
|
30
29
|
nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
30
|
nkululeko/augmenting/augmenter.py,sha256=XAt0dpmlnKxqyysqCgV3rcz-pRIvOz7rU7dmGDCVAzs,2905
|
32
31
|
nkululeko/augmenting/randomsplicer.py,sha256=Z5rxdKKUpuncLWuTS6xVfVKUeVbeiYU_dLRHQ5fcg4Y,2669
|
@@ -49,7 +48,7 @@ nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,276
|
|
49
48
|
nkululeko/data/dataset_csv.py,sha256=uLa7jW4w2ft299NkpXZMD361kPHF8oSYoIZ_ucxhuOM,3884
|
50
49
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
50
|
nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
|
52
|
-
nkululeko/feat_extract/feats_agender_agender.py,sha256=
|
51
|
+
nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
|
53
52
|
nkululeko/feat_extract/feats_analyser.py,sha256=_5oz4y-NZCEBgfNP2GZ9WNqQR50Hbykm0TvDVomWP0U,11399
|
54
53
|
nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
|
55
54
|
nkululeko/feat_extract/feats_audmodel.py,sha256=VjBNgAoxsHJhwr6Kwt9CxX6SaCM4RK_OV-GU2W5-bhU,3187
|
@@ -64,11 +63,11 @@ nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq
|
|
64
63
|
nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
|
65
64
|
nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
|
66
65
|
nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgsYI038QlRY,4803
|
67
|
-
nkululeko/feat_extract/feats_squim.py,sha256=
|
66
|
+
nkululeko/feat_extract/feats_squim.py,sha256=Y31YmDmscuG0YozvxyBZIutO3id8t7IZJWCfKucw-6M,4617
|
68
67
|
nkululeko/feat_extract/feats_trill.py,sha256=HXQBaPWTX0iNEjBY7RD8uyFeYjDieHqv8ZilE0Jb-Pg,3319
|
69
|
-
nkululeko/feat_extract/feats_wav2vec2.py,sha256=
|
68
|
+
nkululeko/feat_extract/feats_wav2vec2.py,sha256=9WUMfyddB_3nx79g7mZoQrRynhM1uEBWuOotRq8bxoU,5268
|
70
69
|
nkululeko/feat_extract/feats_wavlm.py,sha256=ulxpGjifUFx2ZgGmY32SmBJGIuvkYHoLb2n1LZ8KMwA,4703
|
71
|
-
nkululeko/feat_extract/feats_whisper.py,sha256=
|
70
|
+
nkululeko/feat_extract/feats_whisper.py,sha256=BFspQBI53HAgw22vBEeFskGwFZA-94Rpl17xM458HRo,4576
|
72
71
|
nkululeko/feat_extract/featureset.py,sha256=HtgW2389rmlRAgFP3F1sSFzq2_iUVr2NhOfIXG9omt0,1448
|
73
72
|
nkululeko/feat_extract/feinberg_praat.py,sha256=EP9pMALjlKdiYInLQdrZ7MmE499Mq-ISRCgqbqL3Rxc,21304
|
74
73
|
nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -77,13 +76,13 @@ nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv5
|
|
77
76
|
nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
77
|
nkululeko/models/model.py,sha256=oAdKq2wY5lYKfpZkQwO46ojYRsj_Z-FR56oR1uHAWI0,11569
|
79
78
|
nkululeko/models/model_bayes.py,sha256=wI7-sCwibqXMCHviu349TYjgJXXNXym-Z6ZM83uxlFQ,378
|
80
|
-
nkululeko/models/model_cnn.py,sha256=
|
79
|
+
nkululeko/models/model_cnn.py,sha256=revCxyeX69DU6OA63YTnF28UaAFV7AmUfqODMCE_pbQ,10002
|
81
80
|
nkululeko/models/model_gmm.py,sha256=onovzGBeguwZ-upXtuDLaBw9sd6fDDQslVBOrz1Z8TE,645
|
82
81
|
nkululeko/models/model_knn.py,sha256=5tGqiPo2JTw9VLmD-MXNZKFJ5RTLA6uv_blJDJ9lScA,573
|
83
82
|
nkululeko/models/model_knn_reg.py,sha256=Fbuk6Ku6eyrbbMEk7rB5dwfhvQOMsdZk6HI_0T0gYPw,580
|
84
83
|
nkululeko/models/model_lin_reg.py,sha256=NBTnY2ULuhUBt5ArYQwskZ2Vq4BBDGkqd9SYBFl7Ql4,392
|
85
|
-
nkululeko/models/model_mlp.py,sha256=
|
86
|
-
nkululeko/models/model_mlp_regression.py,sha256
|
84
|
+
nkululeko/models/model_mlp.py,sha256=IuNGrLPx54-ZmpydH2yJdm2ddCm4rgu59Csv5ikbEpI,9471
|
85
|
+
nkululeko/models/model_mlp_regression.py,sha256=-ailThquUXwLkOj5jlJ4qn1vlb3nSHW5s0KS7GLp4qI,10290
|
87
86
|
nkululeko/models/model_svm.py,sha256=QqwRjfG9I5y-57CcJAMUSbvYzV0DOlDcpDK5f4yQ_qw,914
|
88
87
|
nkululeko/models/model_svr.py,sha256=p-Mb4Bn54yOe1upuHQKNpfj4ttOmQnm9pCB7ECkJkJQ,699
|
89
88
|
nkululeko/models/model_tree.py,sha256=soXjV523eRvRZ-jbX7X_3S73Wto1B9bm7ZzzDmgYzTc,390
|
@@ -104,8 +103,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
103
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
105
104
|
nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
|
106
105
|
nkululeko/utils/util.py,sha256=_Z6OMJ3f-8TdETW9eqJYY5hwNRS5XCt9azzRnqoTTZE,12330
|
107
|
-
nkululeko-0.83.
|
108
|
-
nkululeko-0.83.
|
109
|
-
nkululeko-0.83.
|
110
|
-
nkululeko-0.83.
|
111
|
-
nkululeko-0.83.
|
106
|
+
nkululeko-0.83.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
107
|
+
nkululeko-0.83.2.dist-info/METADATA,sha256=DMkXO8jSm6iR4eETrG2aEK__7MfPhpAvOe6Tf99n_HE,36158
|
108
|
+
nkululeko-0.83.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
109
|
+
nkululeko-0.83.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
110
|
+
nkululeko-0.83.2.dist-info/RECORD,,
|
nkululeko/reporter.py
DELETED
@@ -1,324 +0,0 @@
|
|
1
|
-
"""Reporter module.
|
2
|
-
|
3
|
-
This module contains the Reporter class which is responsible for generating reports.
|
4
|
-
"""
|
5
|
-
|
6
|
-
import ast
|
7
|
-
import glob
|
8
|
-
import json
|
9
|
-
import math
|
10
|
-
|
11
|
-
import matplotlib.pyplot as plt
|
12
|
-
import numpy as np
|
13
|
-
from scipy.stats import pearsonr
|
14
|
-
from sklearn.metrics import ConfusionMatrixDisplay
|
15
|
-
from sklearn.metrics import accuracy_score
|
16
|
-
from sklearn.metrics import classification_report
|
17
|
-
from sklearn.metrics import confusion_matrix
|
18
|
-
from sklearn.metrics import mean_absolute_error
|
19
|
-
from sklearn.metrics import mean_squared_error
|
20
|
-
from sklearn.metrics import r2_score
|
21
|
-
from sklearn.metrics import recall_score
|
22
|
-
from sklearn.utils import resample
|
23
|
-
|
24
|
-
import nkululeko.glob_conf as glob_conf
|
25
|
-
from nkululeko.reporting.defines import Header
|
26
|
-
from nkululeko.reporting.report_item import ReportItem
|
27
|
-
from nkululeko.result import Result
|
28
|
-
from nkululeko.utils.util import Util
|
29
|
-
|
30
|
-
|
31
|
-
class Reporter:
|
32
|
-
def __set_measure(self):
|
33
|
-
if self.util.exp_is_classification():
|
34
|
-
self.MEASURE = "UAR"
|
35
|
-
self.result.measure = self.MEASURE
|
36
|
-
self.is_classification = True
|
37
|
-
else:
|
38
|
-
self.is_classification = False
|
39
|
-
self.measure = self.util.config_val("MODEL", "measure", "mse")
|
40
|
-
if self.measure == "mse":
|
41
|
-
self.MEASURE = "MSE"
|
42
|
-
self.result.measure = self.MEASURE
|
43
|
-
elif self.measure == "mae":
|
44
|
-
self.MEASURE = "MAE"
|
45
|
-
self.result.measure = self.MEASURE
|
46
|
-
elif self.measure == "ccc":
|
47
|
-
self.MEASURE = "CCC"
|
48
|
-
self.result.measure = self.MEASURE
|
49
|
-
|
50
|
-
def __init__(self, truths, preds, run, epoch):
|
51
|
-
"""Initialization with ground truth und predictions vector"""
|
52
|
-
self.util = Util("reporter")
|
53
|
-
self.format = self.util.config_val("PLOT", "format", "png")
|
54
|
-
self.truths = truths
|
55
|
-
self.preds = preds
|
56
|
-
self.result = Result(0, 0, 0, 0, "unknown")
|
57
|
-
self.run = run
|
58
|
-
self.epoch = epoch
|
59
|
-
self.__set_measure()
|
60
|
-
self.cont_to_cat = False
|
61
|
-
if len(self.truths) > 0 and len(self.preds) > 0:
|
62
|
-
if self.util.exp_is_classification():
|
63
|
-
self.result.test = recall_score(
|
64
|
-
self.truths, self.preds, average="macro"
|
65
|
-
)
|
66
|
-
self.result.loss = 1 - accuracy_score(self.truths, self.preds)
|
67
|
-
else:
|
68
|
-
# regression experiment
|
69
|
-
if self.measure == "mse":
|
70
|
-
self.result.test = mean_squared_error(self.truths, self.preds)
|
71
|
-
elif self.measure == "mae":
|
72
|
-
self.result.test = mean_absolute_error(self.truths, self.preds)
|
73
|
-
elif self.measure == "ccc":
|
74
|
-
self.result.test = self.ccc(self.truths, self.preds)
|
75
|
-
if math.isnan(self.result.test):
|
76
|
-
self.util.debug(f"Truth: {self.truths}")
|
77
|
-
self.util.debug(f"Predict.: {self.preds}")
|
78
|
-
self.util.debug(f"Result is NAN: setting to -1")
|
79
|
-
self.result.test = -1
|
80
|
-
else:
|
81
|
-
self.util.error(f"unknown measure: {self.measure}")
|
82
|
-
|
83
|
-
# train and loss are being set by the model
|
84
|
-
|
85
|
-
def set_id(self, run, epoch):
|
86
|
-
"""Make the report identifiable with run and epoch index"""
|
87
|
-
self.run = run
|
88
|
-
self.epoch = epoch
|
89
|
-
|
90
|
-
def continuous_to_categorical(self):
|
91
|
-
if self.cont_to_cat:
|
92
|
-
return
|
93
|
-
self.cont_to_cat = True
|
94
|
-
bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
|
95
|
-
self.truths = np.digitize(self.truths, bins) - 1
|
96
|
-
self.preds = np.digitize(self.preds, bins) - 1
|
97
|
-
|
98
|
-
def plot_confmatrix(self, plot_name, epoch):
|
99
|
-
if not self.util.exp_is_classification():
|
100
|
-
self.continuous_to_categorical()
|
101
|
-
self._plot_confmat(self.truths, self.preds, plot_name, epoch)
|
102
|
-
|
103
|
-
|
104
|
-
def plot_per_speaker(self, result_df, plot_name, function):
|
105
|
-
"""Plot a confusion matrix with the mode category per speakers.
|
106
|
-
|
107
|
-
This function creates a confusion matrix for each speaker in the result_df.
|
108
|
-
The result_df should contain the columns: preds, truths and speaker.
|
109
|
-
|
110
|
-
Args:
|
111
|
-
* result_df: a pandas dataframe with columns: preds, truths and speaker
|
112
|
-
* plot_name: a string with the name of the plot
|
113
|
-
* function: a string with the function to use for each speaker,
|
114
|
-
can be 'mode' or 'mean'
|
115
|
-
|
116
|
-
Returns:
|
117
|
-
* None
|
118
|
-
"""
|
119
|
-
# Initialize empty arrays for predictions and truths
|
120
|
-
pred = np.zeros(0)
|
121
|
-
truth = np.zeros(0)
|
122
|
-
|
123
|
-
# Iterate over each speaker
|
124
|
-
for s in result_df.speaker.unique():
|
125
|
-
# Filter the dataframe for the current speaker
|
126
|
-
s_df = result_df[result_df.speaker == s]
|
127
|
-
|
128
|
-
# Get the mode or mean prediction for the current speaker
|
129
|
-
mode = s_df.pred.mode().iloc[-1]
|
130
|
-
mean = s_df.pred.mean()
|
131
|
-
if function == "mode":
|
132
|
-
s_df.pred = mode
|
133
|
-
elif function == "mean":
|
134
|
-
s_df.pred = mean
|
135
|
-
else:
|
136
|
-
self.util.error(f"unknown function {function}")
|
137
|
-
|
138
|
-
# Append the current speaker's predictions and truths to the arrays
|
139
|
-
pred = np.append(pred, s_df.pred.values)
|
140
|
-
truth = np.append(truth, s_df["truth"].values)
|
141
|
-
|
142
|
-
# If the experiment is not a classification or continuous to categorical conversion was performed,
|
143
|
-
# convert the truths and predictions to categorical
|
144
|
-
if not (self.is_classification or self.cont_to_cat):
|
145
|
-
bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
|
146
|
-
truth = np.digitize(truth, bins) - 1
|
147
|
-
pred = np.digitize(pred, bins) - 1
|
148
|
-
|
149
|
-
# Plot the confusion matrix for the speakers
|
150
|
-
self._plot_confmat(truth, pred.astype("int"), plot_name, 0)
|
151
|
-
|
152
|
-
def _plot_confmat(self, truths, preds, plot_name, epoch):
|
153
|
-
# print(truths)
|
154
|
-
# print(preds)
|
155
|
-
fig_dir = self.util.get_path("fig_dir")
|
156
|
-
labels = glob_conf.labels
|
157
|
-
fig = plt.figure() # figsize=[5, 5]
|
158
|
-
uar = recall_score(truths, preds, average="macro")
|
159
|
-
acc = accuracy_score(truths, preds)
|
160
|
-
cm = confusion_matrix(
|
161
|
-
truths, preds, normalize=None
|
162
|
-
) # normalize must be one of {'true', 'pred', 'all', None}
|
163
|
-
if cm.shape[0] != len(labels):
|
164
|
-
self.util.error(
|
165
|
-
f"mismatch between confmatrix dim ({cm.shape[0]}) and labels"
|
166
|
-
f" length ({len(labels)}: {labels})"
|
167
|
-
)
|
168
|
-
try:
|
169
|
-
disp = ConfusionMatrixDisplay(
|
170
|
-
confusion_matrix=cm, display_labels=labels
|
171
|
-
).plot(cmap="Blues")
|
172
|
-
except ValueError:
|
173
|
-
disp = ConfusionMatrixDisplay(
|
174
|
-
confusion_matrix=cm,
|
175
|
-
display_labels=list(labels).remove("neutral"),
|
176
|
-
).plot(cmap="Blues")
|
177
|
-
|
178
|
-
reg_res = ""
|
179
|
-
if not self.is_classification:
|
180
|
-
reg_res = f", {self.MEASURE}: {self.result.test:.3f}"
|
181
|
-
|
182
|
-
if epoch != 0:
|
183
|
-
plt.title(f"Confusion Matrix, UAR: {uar:.3f}{reg_res}, Epoch: {epoch}")
|
184
|
-
else:
|
185
|
-
plt.title(f"Confusion Matrix, UAR: {uar:.3f}{reg_res}")
|
186
|
-
img_path = f"{fig_dir}{plot_name}.{self.format}"
|
187
|
-
plt.savefig(img_path)
|
188
|
-
fig.clear()
|
189
|
-
plt.close(fig)
|
190
|
-
plt.savefig(img_path)
|
191
|
-
plt.close(fig)
|
192
|
-
glob_conf.report.add_item(
|
193
|
-
ReportItem(
|
194
|
-
Header.HEADER_RESULTS,
|
195
|
-
self.util.get_model_description(),
|
196
|
-
"Confusion matrix",
|
197
|
-
img_path,
|
198
|
-
)
|
199
|
-
)
|
200
|
-
|
201
|
-
res_dir = self.util.get_path("res_dir")
|
202
|
-
uar = int(uar * 1000) / 1000.0
|
203
|
-
acc = int(acc * 1000) / 1000.0
|
204
|
-
rpt = f"epoch: {epoch}, UAR: {uar}, ACC: {acc}"
|
205
|
-
# print(rpt)
|
206
|
-
self.util.debug(rpt)
|
207
|
-
file_name = f"{res_dir}{self.util.get_exp_name()}_conf.txt"
|
208
|
-
with open(file_name, "w") as text_file:
|
209
|
-
text_file.write(rpt)
|
210
|
-
|
211
|
-
def print_results(self, epoch):
|
212
|
-
"""Print all evaluation values to text file"""
|
213
|
-
res_dir = self.util.get_path("res_dir")
|
214
|
-
file_name = f"{res_dir}{self.util.get_exp_name()}_{epoch}.txt"
|
215
|
-
if self.util.exp_is_classification():
|
216
|
-
labels = glob_conf.labels
|
217
|
-
try:
|
218
|
-
rpt = classification_report(
|
219
|
-
self.truths,
|
220
|
-
self.preds,
|
221
|
-
target_names=labels,
|
222
|
-
output_dict=True,
|
223
|
-
)
|
224
|
-
except ValueError as e:
|
225
|
-
self.util.debug(
|
226
|
-
"Reporter: caught a ValueError when trying to get"
|
227
|
-
" classification_report: " + e
|
228
|
-
)
|
229
|
-
rpt = self.result.to_string()
|
230
|
-
with open(file_name, "w") as text_file:
|
231
|
-
c_ress = list(range(len(labels)))
|
232
|
-
for i, l in enumerate(labels):
|
233
|
-
c_res = rpt[l]["f1-score"]
|
234
|
-
c_ress[i] = float(f"{c_res:.3f}")
|
235
|
-
self.util.debug(f"labels: {labels}")
|
236
|
-
f1_per_class = f"result per class (F1 score): {c_ress}"
|
237
|
-
self.util.debug(f1_per_class)
|
238
|
-
rpt_str = f"{json.dumps(rpt)}\n{f1_per_class}"
|
239
|
-
text_file.write(rpt_str)
|
240
|
-
glob_conf.report.add_item(
|
241
|
-
ReportItem(
|
242
|
-
Header.HEADER_RESULTS,
|
243
|
-
f"Classification result {self.util.get_model_description()}",
|
244
|
-
rpt_str,
|
245
|
-
)
|
246
|
-
)
|
247
|
-
|
248
|
-
else: # regression
|
249
|
-
result = self.result.test
|
250
|
-
r2 = r2_score(self.truths, self.preds)
|
251
|
-
pcc = pearsonr(self.truths, self.preds)[0]
|
252
|
-
measure = self.util.config_val("MODEL", "measure", "mse")
|
253
|
-
with open(file_name, "w") as text_file:
|
254
|
-
text_file.write(
|
255
|
-
f"{measure}: {result:.3f}, r_2: {r2:.3f}, pcc {pcc:.3f}"
|
256
|
-
)
|
257
|
-
|
258
|
-
def make_conf_animation(self, out_name):
|
259
|
-
import imageio
|
260
|
-
|
261
|
-
fig_dir = self.util.get_path("fig_dir")
|
262
|
-
filenames = glob.glob(fig_dir + f"{self.util.get_plot_name()}*_?_???_cnf.png")
|
263
|
-
images = []
|
264
|
-
for filename in filenames:
|
265
|
-
images.append(imageio.imread(filename))
|
266
|
-
fps = self.util.config_val("PLOT", "fps", "1")
|
267
|
-
try:
|
268
|
-
imageio.mimsave(fig_dir + out_name, images, fps=int(fps))
|
269
|
-
except RuntimeError as e:
|
270
|
-
self.util.error("error writing anim gif: " + e)
|
271
|
-
|
272
|
-
def get_result(self):
|
273
|
-
return self.result
|
274
|
-
|
275
|
-
def plot_epoch_progression(self, reports, out_name):
|
276
|
-
fig_dir = self.util.get_path("fig_dir")
|
277
|
-
results, losses, train_results, losses_eval = [], [], [], []
|
278
|
-
for r in reports:
|
279
|
-
results.append(r.get_result().test)
|
280
|
-
losses.append(r.get_result().loss)
|
281
|
-
train_results.append(r.get_result().train)
|
282
|
-
losses_eval.append(r.get_result().loss_eval)
|
283
|
-
|
284
|
-
# do a plot per run
|
285
|
-
# scale the losses so they fit on the picture
|
286
|
-
losses, results, train_results, losses_eval = (
|
287
|
-
np.asarray(losses),
|
288
|
-
np.asarray(results),
|
289
|
-
np.asarray(train_results),
|
290
|
-
np.asarray(losses_eval),
|
291
|
-
)
|
292
|
-
|
293
|
-
if np.all((results > 1)):
|
294
|
-
# scale down values
|
295
|
-
results = results / 100.0
|
296
|
-
train_results = train_results / 100.0
|
297
|
-
# if np.all((losses < 1)):
|
298
|
-
# scale up values
|
299
|
-
plt.figure(dpi=200)
|
300
|
-
plt.plot(train_results, "green", label="train set")
|
301
|
-
plt.plot(results, "red", label="dev set")
|
302
|
-
plt.plot(losses, "black", label="losses")
|
303
|
-
plt.plot(losses_eval, "grey", label="losses_eval")
|
304
|
-
plt.xlabel("epochs")
|
305
|
-
plt.ylabel(f"{self.MEASURE}")
|
306
|
-
plt.legend()
|
307
|
-
plt.savefig(f"{fig_dir}{out_name}.{self.format}")
|
308
|
-
plt.close()
|
309
|
-
|
310
|
-
@staticmethod
|
311
|
-
def ccc(ground_truth, prediction):
|
312
|
-
mean_gt = np.mean(ground_truth, 0)
|
313
|
-
mean_pred = np.mean(prediction, 0)
|
314
|
-
var_gt = np.var(ground_truth, 0)
|
315
|
-
var_pred = np.var(prediction, 0)
|
316
|
-
v_pred = prediction - mean_pred
|
317
|
-
v_gt = ground_truth - mean_gt
|
318
|
-
cor = sum(v_pred * v_gt) / (np.sqrt(sum(v_pred**2)) * np.sqrt(sum(v_gt**2)))
|
319
|
-
sd_gt = np.std(ground_truth)
|
320
|
-
sd_pred = np.std(prediction)
|
321
|
-
numerator = 2 * cor * sd_gt * sd_pred
|
322
|
-
denominator = var_gt + var_pred + (mean_gt - mean_pred) ** 2
|
323
|
-
ccc = numerator / denominator
|
324
|
-
return ccc
|
File without changes
|
File without changes
|
File without changes
|