nkululeko 0.83.0__py3-none-any.whl → 0.83.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.83.0"
1
+ VERSION="0.83.2"
2
2
  SAMPLING_RATE = 16000
nkululeko/experiment.py CHANGED
@@ -675,12 +675,16 @@ class Experiment:
675
675
  test_predictor = TestPredictor(
676
676
  model, self.df_test, self.label_encoder, result_name
677
677
  )
678
- test_predictor.predict_and_store()
678
+ result = test_predictor.predict_and_store()
679
+ return result
679
680
 
680
681
  def load(self, filename):
681
- f = open(filename, "rb")
682
- tmp_dict = pickle.load(f)
683
- f.close()
682
+ try:
683
+ f = open(filename, "rb")
684
+ tmp_dict = pickle.load(f)
685
+ f.close()
686
+ except EOFError as eof:
687
+ self.util.error(f"can't open file {filename}: {eof}")
684
688
  self.__dict__.update(tmp_dict)
685
689
  glob_conf.set_labels(self.labels)
686
690
 
@@ -28,9 +28,11 @@ class AgenderAgenderSet(Featureset):
28
28
  if not os.path.isdir(model_root):
29
29
  cache_root = audeer.mkdir("cache")
30
30
  model_root = audeer.mkdir(model_root)
31
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
31
+ archive_path = audeer.download_url(
32
+ model_url, cache_root, verbose=True)
32
33
  audeer.extract_archive(archive_path, model_root)
33
- device = self.util.config_val("MODEL", "device", "cpu")
34
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
35
+ device = self.util.config_val("MODEL", "device", cuda)
34
36
  self.model = audonnx.load(model_root, device=device)
35
37
  # pytorch_total_params = sum(p.numel() for p in self.model.parameters())
36
38
  # self.util.debug(
@@ -28,12 +28,17 @@ from nkululeko.utils.util import Util
28
28
 
29
29
 
30
30
  class SquimSet(Featureset):
31
- """Class to predict SQUIM features"""
31
+ """Class to predict SQUIM features."""
32
32
 
33
33
  def __init__(self, name, data_df, feats_type):
34
- """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
34
+ """Constructor.
35
+
36
+ Is_train is needed to distinguish from test/dev sets,
37
+ because they use the codebook from the training.
38
+ """
35
39
  super().__init__(name, data_df, feats_type)
36
- self.device = self.util.config_val("MODEL", "device", "cpu")
40
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
41
+ self.device = self.util.config_val("MODEL", "device", cuda)
37
42
  self.model_initialized = False
38
43
 
39
44
  def init_model(self):
@@ -21,7 +21,11 @@ class Wav2vec2(Featureset):
21
21
  """Class to extract wav2vec2 embeddings"""
22
22
 
23
23
  def __init__(self, name, data_df, feat_type):
24
- """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
24
+ """Constructor.
25
+
26
+ If_train is needed to distinguish from test/dev sets,
27
+ because they use the codebook from the training
28
+ """
25
29
  super().__init__(name, data_df, feat_type)
26
30
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
27
31
  self.device = self.util.config_val("MODEL", "device", cuda)
@@ -39,8 +43,7 @@ class Wav2vec2(Featureset):
39
43
  )
40
44
  config = transformers.AutoConfig.from_pretrained(model_path)
41
45
  layer_num = config.num_hidden_layers
42
- hidden_layer = int(self.util.config_val(
43
- "FEATS", "wav2vec2.layer", "0"))
46
+ hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
44
47
  config.num_hidden_layers = layer_num - hidden_layer
45
48
  self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
46
49
  self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
@@ -55,8 +58,7 @@ class Wav2vec2(Featureset):
55
58
  """Extract the features or load them from disk if present."""
56
59
  store = self.util.get_path("store")
57
60
  storage = f"{store}{self.name}.pkl"
58
- extract = self.util.config_val(
59
- "FEATS", "needs_feature_extraction", False)
61
+ extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
60
62
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
61
63
  if extract or no_reuse or not os.path.isfile(storage):
62
64
  if not self.model_initialized:
@@ -77,8 +79,7 @@ class Wav2vec2(Featureset):
77
79
  emb = self.get_embeddings(signal, sampling_rate, file)
78
80
  emb_series[idx] = emb
79
81
  # print(f"emb_series shape: {emb_series.shape}")
80
- self.df = pd.DataFrame(
81
- emb_series.values.tolist(), index=self.data_df.index)
82
+ self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
82
83
  # print(f"df shape: {self.df.shape}")
83
84
  self.df.to_pickle(storage)
84
85
  try:
@@ -32,19 +32,22 @@ class Whisper(Featureset):
32
32
  model_name = f"openai/{self.feat_type}"
33
33
  self.model = WhisperModel.from_pretrained(model_name).to(self.device)
34
34
  print(f"intialized Whisper model on {self.device}")
35
- self.feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
35
+ self.feature_extractor = AutoFeatureExtractor.from_pretrained(
36
+ model_name)
36
37
  self.model_initialized = True
37
38
 
38
39
  def extract(self):
39
40
  """Extract the features or load them from disk if present."""
40
41
  store = self.util.get_path("store")
41
42
  storage = f"{store}{self.name}.pkl"
42
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
43
+ extract = self.util.config_val(
44
+ "FEATS", "needs_feature_extraction", False)
43
45
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
44
46
  if extract or no_reuse or not os.path.isfile(storage):
45
47
  if not self.model_initialized:
46
48
  self.init_model()
47
- self.util.debug("extracting whisper embeddings, this might take a while...")
49
+ self.util.debug(
50
+ "extracting whisper embeddings, this might take a while...")
48
51
  emb_series = []
49
52
  for (file, start, end), _ in audeer.progress_bar(
50
53
  self.data_df.iterrows(),
@@ -16,6 +16,7 @@ import numpy as np
16
16
  from sklearn.metrics import recall_score
17
17
  from collections import OrderedDict
18
18
  from PIL import Image
19
+ from traitlets import default
19
20
 
20
21
  from nkululeko.utils.util import Util
21
22
  import nkululeko.glob_conf as glob_conf
@@ -48,6 +49,7 @@ class CNN_model(Model):
48
49
  self.util.error(f"unknown loss function: {criterion}")
49
50
  self.util.debug(f"using model with cross entropy loss function")
50
51
  # set up the model
52
+ # cuda = "cuda" if torch.cuda.is_available() else "cpu"
51
53
  self.device = self.util.config_val("MODEL", "device", "cpu")
52
54
  try:
53
55
  layers_string = glob_conf.config["MODEL"]["layers"]
@@ -84,7 +86,8 @@ class CNN_model(Model):
84
86
  train_set = self.Dataset_image(
85
87
  feats_train, df_train, self.target, transformations
86
88
  )
87
- test_set = self.Dataset_image(feats_test, df_test, self.target, transformations)
89
+ test_set = self.Dataset_image(
90
+ feats_test, df_test, self.target, transformations)
88
91
  # Define data loaders
89
92
  self.trainloader = torch.utils.data.DataLoader(
90
93
  train_set,
@@ -137,7 +140,8 @@ class CNN_model(Model):
137
140
  losses = []
138
141
  for images, labels in self.trainloader:
139
142
  logits = self.model(images.to(self.device))
140
- loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
143
+ loss = self.criterion(logits, labels.to(
144
+ self.device, dtype=torch.int64))
141
145
  losses.append(loss.item())
142
146
  self.optimizer.zero_grad()
143
147
  loss.backward()
@@ -165,14 +169,16 @@ class CNN_model(Model):
165
169
 
166
170
  self.loss_eval = (np.asarray(losses)).mean()
167
171
  predictions = logits.argmax(dim=1)
168
- uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
172
+ uar = recall_score(
173
+ targets.numpy(), predictions.numpy(), average="macro")
169
174
  return uar, targets, predictions
170
175
 
171
176
  def predict(self):
172
177
  _, truths, predictions = self.evaluate_model(
173
178
  self.model, self.testloader, self.device
174
179
  )
175
- uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
180
+ uar, _, _ = self.evaluate_model(
181
+ self.model, self.trainloader, self.device)
176
182
  report = Reporter(truths, predictions, self.run, self.epoch)
177
183
  try:
178
184
  report.result.loss = self.loss
@@ -209,7 +215,8 @@ class CNN_model(Model):
209
215
  dir = self.util.get_path("model_dir")
210
216
  # name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
211
217
  name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
212
- self.device = self.util.config_val("MODEL", "device", "cpu")
218
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
219
+ self.device = self.util.config_val("MODEL", "device", cuda)
213
220
  layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
214
221
  self.store_path = dir + name
215
222
  drop = self.util.config_val("MODEL", "drop", False)
@@ -222,7 +229,8 @@ class CNN_model(Model):
222
229
  def load_path(self, path, run, epoch):
223
230
  self.set_id(run, epoch)
224
231
  with open(path, "rb") as handle:
225
- self.device = self.util.config_val("MODEL", "device", "cpu")
232
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
233
+ self.device = self.util.config_val("MODEL", "device", cuda)
226
234
  layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
227
235
  self.store_path = path
228
236
  drop = self.util.config_val("MODEL", "drop", False)
@@ -34,8 +34,9 @@ class MLP_model(Model):
34
34
  else:
35
35
  self.util.error(f"unknown loss function: {criterion}")
36
36
  self.util.debug(f"using model with cross entropy loss function")
37
- # set up the model
38
- self.device = self.util.config_val("MODEL", "device", "cpu")
37
+ # set up the model, use GPU if availabe
38
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
39
+ self.device = self.util.config_val("MODEL", "device", cuda)
39
40
  try:
40
41
  layers_string = glob_conf.config["MODEL"]["layers"]
41
42
  except KeyError as ke:
@@ -86,7 +87,8 @@ class MLP_model(Model):
86
87
  losses = []
87
88
  for features, labels in self.trainloader:
88
89
  logits = self.model(features.to(self.device))
89
- loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
90
+ loss = self.criterion(logits, labels.to(
91
+ self.device, dtype=torch.int64))
90
92
  losses.append(loss.item())
91
93
  self.optimizer.zero_grad()
92
94
  loss.backward()
@@ -114,14 +116,16 @@ class MLP_model(Model):
114
116
 
115
117
  self.loss_eval = (np.asarray(losses)).mean()
116
118
  predictions = logits.argmax(dim=1)
117
- uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
119
+ uar = recall_score(
120
+ targets.numpy(), predictions.numpy(), average="macro")
118
121
  return uar, targets, predictions
119
122
 
120
123
  def predict(self):
121
124
  _, truths, predictions = self.evaluate_model(
122
125
  self.model, self.testloader, self.device
123
126
  )
124
- uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
127
+ uar, _, _ = self.evaluate_model(
128
+ self.model, self.trainloader, self.device)
125
129
  report = Reporter(truths, predictions, self.run, self.epoch)
126
130
  try:
127
131
  report.result.loss = self.loss
@@ -179,6 +183,9 @@ class MLP_model(Model):
179
183
  features = np.reshape(features, (-1, 1)).T
180
184
  logits = self.model(features.to(self.device))
181
185
  # logits = self.model(features)
186
+ # if tensor conver to cpu
187
+ if isinstance(logits, torch.Tensor):
188
+ logits = logits.cpu()
182
189
  a = logits.numpy()
183
190
  res = {}
184
191
  for i in range(len(a[0])):
@@ -196,7 +203,8 @@ class MLP_model(Model):
196
203
  dir = self.util.get_path("model_dir")
197
204
  # name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
198
205
  name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
199
- self.device = self.util.config_val("MODEL", "device", "cpu")
206
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
207
+ self.device = self.util.config_val("MODEL", "device", cuda)
200
208
  layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
201
209
  self.store_path = dir + name
202
210
  drop = self.util.config_val("MODEL", "drop", False)
@@ -211,7 +219,8 @@ class MLP_model(Model):
211
219
  def load_path(self, path, run, epoch):
212
220
  self.set_id(run, epoch)
213
221
  with open(path, "rb") as handle:
214
- self.device = self.util.config_val("MODEL", "device", "cpu")
222
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
223
+ self.device = self.util.config_val("MODEL", "device", cuda)
215
224
  layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
216
225
  self.store_path = path
217
226
  drop = self.util.config_val("MODEL", "drop", False)
@@ -9,6 +9,7 @@ import torch
9
9
  from audmetric import concordance_cc
10
10
  from audmetric import mean_absolute_error
11
11
  from audmetric import mean_squared_error
12
+ from traitlets import default
12
13
 
13
14
  import nkululeko.glob_conf as glob_conf
14
15
  from nkululeko.losses.loss_ccc import ConcordanceCorCoeff
@@ -40,7 +41,8 @@ class MLP_Reg_model(Model):
40
41
  self.util.error(f"unknown loss function: {criterion}")
41
42
  self.util.debug(f"training model with {criterion} loss function")
42
43
  # set up the model
43
- self.device = self.util.config_val("MODEL", "device", "cpu")
44
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
45
+ self.device = self.util.config_val("MODEL", "device", cuda)
44
46
  layers_string = glob_conf.config["MODEL"]["layers"]
45
47
  self.util.debug(f"using layers {layers_string}")
46
48
  try:
@@ -50,7 +52,8 @@ class MLP_Reg_model(Model):
50
52
  drop = self.util.config_val("MODEL", "drop", False)
51
53
  if drop:
52
54
  self.util.debug(f"training with dropout: {drop}")
53
- self.model = self.MLP(feats_train.shape[1], layers, 1, drop).to(self.device)
55
+ self.model = self.MLP(
56
+ feats_train.shape[1], layers, 1, drop).to(self.device)
54
57
  self.learning_rate = float(
55
58
  self.util.config_val("MODEL", "learning_rate", 0.0001)
56
59
  )
@@ -93,8 +96,10 @@ class MLP_Reg_model(Model):
93
96
  _, truths, predictions = self.evaluate_model(
94
97
  self.model, self.testloader, self.device
95
98
  )
96
- result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
97
- report = Reporter(truths.numpy(), predictions.numpy(), self.run, self.epoch)
99
+ result, _, _ = self.evaluate_model(
100
+ self.model, self.trainloader, self.device)
101
+ report = Reporter(truths.numpy(), predictions.numpy(),
102
+ self.run, self.epoch)
98
103
  try:
99
104
  report.result.loss = self.loss
100
105
  except AttributeError: # if the model was loaded from disk the loss is unknown
@@ -128,9 +133,11 @@ class MLP_Reg_model(Model):
128
133
 
129
134
  def __getitem__(self, item):
130
135
  index = self.df.index[item]
131
- features = self.df_features.loc[index, :].values.astype("float32").squeeze()
136
+ features = self.df_features.loc[index, :].values.astype(
137
+ "float32").squeeze()
132
138
  labels = (
133
- np.array([self.df.loc[index, self.label]]).astype("float32").squeeze()
139
+ np.array([self.df.loc[index, self.label]]
140
+ ).astype("float32").squeeze()
134
141
  )
135
142
  return features, labels
136
143
 
@@ -187,7 +194,8 @@ class MLP_Reg_model(Model):
187
194
  end_index = (index + 1) * loader.batch_size
188
195
  if end_index > len(loader.dataset):
189
196
  end_index = len(loader.dataset)
190
- logits[start_index:end_index] = model(features.to(device)).reshape(-1)
197
+ logits[start_index:end_index] = model(
198
+ features.to(device)).reshape(-1)
191
199
  targets[start_index:end_index] = labels
192
200
  loss = self.criterion(
193
201
  logits[start_index:end_index].to(
nkululeko/nkuluflag.py CHANGED
@@ -2,13 +2,16 @@ import argparse
2
2
  import configparser
3
3
  import os
4
4
  import os.path
5
+ import sys
5
6
 
6
7
  from nkululeko.nkululeko import doit as nkulu
8
+ from nkululeko.test import do_it as test_mod
7
9
 
8
10
 
9
- def do_it(src_dir):
11
+ def doit(cla):
10
12
  parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
11
13
  parser.add_argument("--config", help="The base configuration")
14
+ parser.add_argument("--mod", default="nkulu", help="Which nkululeko module to call")
12
15
  parser.add_argument("--data", help="The databases", nargs="*", action="append")
13
16
  parser.add_argument(
14
17
  "--label", nargs="*", help="The labels for the target", action="append"
@@ -25,20 +28,23 @@ def do_it(src_dir):
25
28
  parser.add_argument("--model", default="xgb", help="The model type")
26
29
  parser.add_argument("--feat", default="['os']", help="The feature type")
27
30
  parser.add_argument("--set", help="The opensmile set")
28
- parser.add_argument("--with_os", help="To add os features")
29
31
  parser.add_argument("--target", help="The target designation")
30
32
  parser.add_argument("--epochs", help="The number of epochs")
31
33
  parser.add_argument("--runs", help="The number of runs")
32
34
  parser.add_argument("--learning_rate", help="The learning rate")
33
35
  parser.add_argument("--drop", help="The dropout rate [0:1]")
34
36
 
35
- args = parser.parse_args()
37
+ args = parser.parse_args(cla)
36
38
 
37
39
  if args.config is not None:
38
40
  config_file = args.config
39
41
  else:
40
42
  print("ERROR: need config file")
41
43
  quit(-1)
44
+
45
+ if args.mod is not None:
46
+ nkulu_mod = args.mod
47
+
42
48
  # test if config is there
43
49
  if not os.path.isfile(config_file):
44
50
  print(f"ERROR: no such file {config_file}")
@@ -86,10 +92,17 @@ def do_it(src_dir):
86
92
  with open(tmp_config, "w") as tmp_file:
87
93
  config.write(tmp_file)
88
94
 
89
- result, last_epoch = nkulu(tmp_config)
95
+ result, last_epoch = 0, 0
96
+ if nkulu_mod == "nkulu":
97
+ result, last_epoch = nkulu(tmp_config)
98
+ elif nkulu_mod == "test":
99
+ result, last_epoch = test_mod(tmp_config, "test_results.csv")
100
+ else:
101
+ print(f"ERROR: unknown module: {nkulu_mod}, should be [nkulu | test]")
90
102
  return result, last_epoch
91
103
 
92
104
 
93
105
  if __name__ == "__main__":
94
- cwd = os.path.dirname(os.path.abspath(__file__))
95
- do_it(cwd) # sys.argv[1])
106
+ cla = sys.argv
107
+ cla.pop(0)
108
+ doit(cla) # sys.argv[1])
nkululeko/plots.py CHANGED
@@ -28,7 +28,8 @@ class Plots:
28
28
  df_speaker["samplenum"] = df_speaker.shape[0]
29
29
  df_speakers = pd.concat([df_speakers, df_speaker.head(1)])
30
30
  # plot the distribution of samples per speaker
31
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
31
+ # one up because of the runs
32
+ fig_dir = self.util.get_path("fig_dir") + "../"
32
33
  self.util.debug(f"plotting samples per speaker")
33
34
  if "gender" in df_speakers:
34
35
  filename = f"samples_value_counts"
@@ -137,7 +138,8 @@ class Plots:
137
138
  df, att1, class_label, att1, type_s
138
139
  )
139
140
  else:
140
- ax, caption = self._plot2cont(df, class_label, att1, type_s)
141
+ ax, caption = self._plot2cont(
142
+ df, class_label, att1, type_s)
141
143
  self._save_plot(
142
144
  ax,
143
145
  caption,
@@ -150,7 +152,8 @@ class Plots:
150
152
  att1 = att[0]
151
153
  att2 = att[1]
152
154
  if att1 == self.target or att2 == self.target:
153
- self.util.debug(f"no need to correlate {self.target} with itself")
155
+ self.util.debug(
156
+ f"no need to correlate {self.target} with itself")
154
157
  return
155
158
  if att1 not in df:
156
159
  self.util.error(f"unknown feature: {att1}")
@@ -165,7 +168,8 @@ class Plots:
165
168
  if self.util.is_categorical(df[att1]):
166
169
  if self.util.is_categorical(df[att2]):
167
170
  # class_label = cat, att1 = cat, att2 = cat
168
- ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
171
+ ax, caption = self._plot2cat(
172
+ df, att1, att2, att1, type_s)
169
173
  else:
170
174
  # class_label = cat, att1 = cat, att2 = cont
171
175
  ax, caption = self._plotcatcont(
@@ -186,7 +190,8 @@ class Plots:
186
190
  if self.util.is_categorical(df[att1]):
187
191
  if self.util.is_categorical(df[att2]):
188
192
  # class_label = cont, att1 = cat, att2 = cat
189
- ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
193
+ ax, caption = self._plot2cat(
194
+ df, att1, att2, att1, type_s)
190
195
  else:
191
196
  # class_label = cont, att1 = cat, att2 = cont
192
197
  ax, caption = self._plot2cont_cat(
@@ -200,7 +205,8 @@ class Plots:
200
205
  )
201
206
  else:
202
207
  # class_label = cont, att1 = cont, att2 = cont
203
- ax, caption = self._plot2cont(df, att1, att2, type_s)
208
+ ax, caption = self._plot2cont(
209
+ df, att1, att2, type_s)
204
210
 
205
211
  self._save_plot(
206
212
  ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
@@ -213,7 +219,8 @@ class Plots:
213
219
  )
214
220
 
215
221
  def _save_plot(self, ax, caption, header, filename, type_s):
216
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
222
+ # one up because of the runs
223
+ fig_dir = self.util.get_path("fig_dir") + "../"
217
224
  fig = ax.figure
218
225
  # avoid warning
219
226
  # plt.tight_layout()
@@ -231,7 +238,8 @@ class Plots:
231
238
  )
232
239
 
233
240
  def _check_binning(self, att, df):
234
- bin_reals_att = eval(self.util.config_val("EXPL", f"{att}.bin_reals", "False"))
241
+ bin_reals_att = eval(self.util.config_val(
242
+ "EXPL", f"{att}.bin_reals", "False"))
235
243
  if bin_reals_att:
236
244
  self.util.debug(f"binning continuous variable {att} to categories")
237
245
  att_new = f"{att}_binned"
@@ -305,7 +313,8 @@ class Plots:
305
313
  return ax, caption
306
314
 
307
315
  def plot_durations(self, df, filename, sample_selection, caption=""):
308
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
316
+ # one up because of the runs
317
+ fig_dir = self.util.get_path("fig_dir") + "../"
309
318
  try:
310
319
  ax = sns.histplot(df, x="duration", hue="class_label", kde=True)
311
320
  except AttributeError as ae:
@@ -333,7 +342,8 @@ class Plots:
333
342
 
334
343
  def describe_df(self, name, df, target, filename):
335
344
  """Make a stacked barplot of samples and speakers per sex and target values. speaker, gender and target columns must be present"""
336
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
345
+ fig_dir = self.util.get_path(
346
+ "fig_dir") + "../" # one up because of the runs
337
347
  sampl_num = df.shape[0]
338
348
  sex_col = "gender"
339
349
  if target == "gender":
@@ -380,8 +390,10 @@ class Plots:
380
390
 
381
391
  def scatter_plot(self, feats, label_df, label, dimred_type):
382
392
  dim_num = int(self.util.config_val("EXPL", "scatter.dim", 2))
383
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
384
- sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
393
+ # one up because of the runs
394
+ fig_dir = self.util.get_path("fig_dir") + "../"
395
+ sample_selection = self.util.config_val(
396
+ "EXPL", "sample_selection", "all")
385
397
  filename = f"{label}_{self.util.get_feattype_name()}_{sample_selection}_{dimred_type}_{str(dim_num)}d"
386
398
  filename = f"{fig_dir}{filename}.{self.format}"
387
399
  self.util.debug(f"computing {dimred_type}, this might take a while...")
@@ -423,7 +435,8 @@ class Plots:
423
435
 
424
436
  if dim_num == 2:
425
437
  plot_data = np.vstack((data.T, labels)).T
426
- plot_df = pd.DataFrame(data=plot_data, columns=("Dim_1", "Dim_2", "label"))
438
+ plot_df = pd.DataFrame(
439
+ data=plot_data, columns=("Dim_1", "Dim_2", "label"))
427
440
  # plt.tight_layout()
428
441
  ax = (
429
442
  sns.FacetGrid(plot_df, hue="label", height=6)
@@ -515,7 +528,8 @@ class Plots:
515
528
  def plot_feature(self, title, feature, label, df_labels, df_features):
516
529
  # remove fullstops in the name
517
530
  feature_name = feature.replace(".", "-")
518
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
531
+ # one up because of the runs
532
+ fig_dir = self.util.get_path("fig_dir") + "../"
519
533
  filename = f"{fig_dir}feat_dist_{title}_{feature_name}.{self.format}"
520
534
  if self.util.is_categorical(df_labels[label]):
521
535
  df_plot = pd.DataFrame(
@@ -554,7 +568,8 @@ class Plots:
554
568
  tree.plot_tree(model, feature_names=list(features.columns), ax=ax)
555
569
  # plt.tight_layout()
556
570
  # print(ax)
557
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
571
+ # one up because of the runs
572
+ fig_dir = self.util.get_path("fig_dir") + "../"
558
573
  exp_name = self.util.get_exp_name(only_data=True)
559
574
  format = self.util.config_val("PLOT", "format", "png")
560
575
  filename = f"{fig_dir}{exp_name}EXPL_tree-plot.{format}"
nkululeko/test.py CHANGED
@@ -10,20 +10,7 @@ from nkululeko.experiment import Experiment
10
10
  from nkululeko.utils.util import Util
11
11
 
12
12
 
13
- def main(src_dir):
14
- parser = argparse.ArgumentParser(
15
- description="Call the nkululeko TEST framework.")
16
- parser.add_argument("--config", default="exp.ini",
17
- help="The base configuration")
18
- parser.add_argument(
19
- "--outfile",
20
- default="my_results.csv",
21
- help="File name to store the predictions",
22
- )
23
-
24
- args = parser.parse_args()
25
-
26
- config_file = args.config
13
+ def do_it(config_file, outfile):
27
14
 
28
15
  # test if the configuration file exists
29
16
  if not os.path.isfile(config_file):
@@ -48,10 +35,28 @@ def main(src_dir):
48
35
  expr.load(f"{util.get_save_name()}")
49
36
  expr.fill_tests()
50
37
  expr.extract_test_feats()
51
- expr.predict_test_and_save(args.outfile)
38
+ result = expr.predict_test_and_save(outfile)
52
39
 
53
40
  print("DONE")
54
41
 
42
+ return result, 0
43
+
44
+
45
+ def main(src_dir):
46
+ parser = argparse.ArgumentParser(description="Call the nkululeko TEST framework.")
47
+ parser.add_argument("--config", default="exp.ini", help="The base configuration")
48
+ parser.add_argument(
49
+ "--outfile",
50
+ default="my_results.csv",
51
+ help="File name to store the predictions",
52
+ )
53
+ args = parser.parse_args()
54
+ if args.config is not None:
55
+ config_file = args.config
56
+ else:
57
+ config_file = f"{src_dir}/exp.ini"
58
+ do_it(config_file, args.outfile)
59
+
55
60
 
56
61
  if __name__ == "__main__":
57
62
  cwd = os.path.dirname(os.path.abspath(__file__))
@@ -29,6 +29,7 @@ class TestPredictor:
29
29
 
30
30
  def predict_and_store(self):
31
31
  label_data = self.util.config_val("DATA", "label_data", False)
32
+ result = 0
32
33
  if label_data:
33
34
  data = Dataset(label_data)
34
35
  data.load()
@@ -57,6 +58,7 @@ class TestPredictor:
57
58
  test_dbs_string = "_".join(test_dbs)
58
59
  predictions = self.model.get_predictions()
59
60
  report = self.model.predict()
61
+ result = report.result.get_result()
60
62
  report.set_filename_add(f"test-{test_dbs_string}")
61
63
  self.util.print_best_results([report])
62
64
  report.plot_confmatrix(self.util.get_plot_name(), 0)
@@ -74,3 +76,4 @@ class TestPredictor:
74
76
  df = df.rename(columns={"class_label": target})
75
77
  df.to_csv(self.name)
76
78
  self.util.debug(f"results stored in {self.name}")
79
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.83.0
3
+ Version: 0.83.2
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -333,6 +333,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
333
333
  Changelog
334
334
  =========
335
335
 
336
+ Version 0.83.2
337
+ --------------
338
+ * added default cuda if present and not stated
339
+
340
+ Version 0.83.1
341
+ --------------
342
+ * add test module to nkuluflag
343
+
336
344
  Version 0.83.0
337
345
  --------------
338
346
  * test module now prints out reports
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=NNx53OyRpXv780Ycj6Cdw4bDJfdvEn180CaN2PcmQkY,39
5
+ nkululeko/constants.py,sha256=VE94aCLZ8N-hTKIgb4OLo1s9l_Fxncl9iTNis0eotFw,39
6
6
  nkululeko/demo.py,sha256=55kNFA2helMhOxD4yZuKg1JWDtlUUpxm-6uAnroIydI,3264
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=-ggSHc3DXxRzjzcGB4qFBOMvKsfUdTkkde50BDrS9dA,4755
9
- nkululeko/experiment.py,sha256=SRcB0ni0XLK910NSWTyRAe-Eoa6fVSKDCJlDJKyCzMc,29574
9
+ nkululeko/experiment.py,sha256=WyLiOJ_VxlaXoS1cwXruzYV9OESMjjedcFNreKE1Z8I,29728
10
10
  nkululeko/explore.py,sha256=2wdoGRqldvsN1zCiWk0quSDgHHHUoF2UZOWQ1r-2OLM,2310
11
11
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
12
12
  nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
@@ -15,18 +15,17 @@ nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
15
15
  nkululeko/glob_conf.py,sha256=iHiVSxDYgmYwdx6z0HuGUMSWrfZfufPHxHb60q2dLRY,453
16
16
  nkululeko/modelrunner.py,sha256=GwDXcE2gDQXat4W0-HhHQ1BcUNCRBXMBQ4QycfHp_5c,9288
17
17
  nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
18
- nkululeko/nkuluflag.py,sha256=FCetTfgH69u4AwENgeCKVi3vBIR10Di67SfbupGQqfc,3354
18
+ nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
19
19
  nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
20
- nkululeko/plots.py,sha256=K88ZRPFGX_r03BT742H06Dde20xZYdltv7dxjgUiAFA,23025
20
+ nkululeko/plots.py,sha256=nd9tF_61DyAx7oGZF8gTrHXazkgFjFe4eClxu1nQ_XU,23276
21
21
  nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
22
- nkululeko/reporter.py,sha256=8mlIaKep4hM-tdRv8t98tK80rx3zOmVGXSORhiPc3as,12483
23
22
  nkululeko/resample.py,sha256=3WbxkwgyTe_fW38046Rjxk3knOkFdhqn2C4nfhbUurQ,2287
24
23
  nkululeko/runmanager.py,sha256=eTM1DNQKt1lxYhzt4vZyZluPXW9sWlIJHNQzex4lkJU,7624
25
24
  nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
26
25
  nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
27
26
  nkululeko/syllable_nuclei.py,sha256=Sky-C__MeUDaxqHnDl2TGLLYOYvsahD35TUjWGeG31k,10047
28
- nkululeko/test.py,sha256=JRoLgqQJEhAIGetw-qlOUihSTTQ7O8DYafB0FlQESIQ,1525
29
- nkululeko/test_predictor.py,sha256=L8XKrIweTf-oKeaGuDw_ZhtvzRUxFuWmOhva6jgf7-s,3148
27
+ nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
28
+ nkululeko/test_predictor.py,sha256=_w5J8CxH6hmW3mLTKbdfmywl5QpdNAnW1Y8TE5GtlfE,3237
30
29
  nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
30
  nkululeko/augmenting/augmenter.py,sha256=XAt0dpmlnKxqyysqCgV3rcz-pRIvOz7rU7dmGDCVAzs,2905
32
31
  nkululeko/augmenting/randomsplicer.py,sha256=Z5rxdKKUpuncLWuTS6xVfVKUeVbeiYU_dLRHQ5fcg4Y,2669
@@ -49,7 +48,7 @@ nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,276
49
48
  nkululeko/data/dataset_csv.py,sha256=uLa7jW4w2ft299NkpXZMD361kPHF8oSYoIZ_ucxhuOM,3884
50
49
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
50
  nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
52
- nkululeko/feat_extract/feats_agender_agender.py,sha256=5dA7YA-YGxODovMC7ynMk3bnpPjfs0ApvSfjqvoSZY0,3346
51
+ nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
53
52
  nkululeko/feat_extract/feats_analyser.py,sha256=_5oz4y-NZCEBgfNP2GZ9WNqQR50Hbykm0TvDVomWP0U,11399
54
53
  nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
55
54
  nkululeko/feat_extract/feats_audmodel.py,sha256=VjBNgAoxsHJhwr6Kwt9CxX6SaCM4RK_OV-GU2W5-bhU,3187
@@ -64,11 +63,11 @@ nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq
64
63
  nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
65
64
  nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
66
65
  nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgsYI038QlRY,4803
67
- nkululeko/feat_extract/feats_squim.py,sha256=jToXiwRq5-MQheAP6xczvry1uVIHYUrD8bM7Wb1cnqM,4528
66
+ nkululeko/feat_extract/feats_squim.py,sha256=Y31YmDmscuG0YozvxyBZIutO3id8t7IZJWCfKucw-6M,4617
68
67
  nkululeko/feat_extract/feats_trill.py,sha256=HXQBaPWTX0iNEjBY7RD8uyFeYjDieHqv8ZilE0Jb-Pg,3319
69
- nkululeko/feat_extract/feats_wav2vec2.py,sha256=tFGe4t6MIVxTDQYR8geNCtZ_3ALc-gpi-rmQbF74HfI,5285
68
+ nkululeko/feat_extract/feats_wav2vec2.py,sha256=9WUMfyddB_3nx79g7mZoQrRynhM1uEBWuOotRq8bxoU,5268
70
69
  nkululeko/feat_extract/feats_wavlm.py,sha256=ulxpGjifUFx2ZgGmY32SmBJGIuvkYHoLb2n1LZ8KMwA,4703
71
- nkululeko/feat_extract/feats_whisper.py,sha256=0N7Vj65OVi2PNoB_NrDjWT5lP6xZNKxFOZZIoxkJvcA,4533
70
+ nkululeko/feat_extract/feats_whisper.py,sha256=BFspQBI53HAgw22vBEeFskGwFZA-94Rpl17xM458HRo,4576
72
71
  nkululeko/feat_extract/featureset.py,sha256=HtgW2389rmlRAgFP3F1sSFzq2_iUVr2NhOfIXG9omt0,1448
73
72
  nkululeko/feat_extract/feinberg_praat.py,sha256=EP9pMALjlKdiYInLQdrZ7MmE499Mq-ISRCgqbqL3Rxc,21304
74
73
  nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -77,13 +76,13 @@ nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv5
77
76
  nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
77
  nkululeko/models/model.py,sha256=oAdKq2wY5lYKfpZkQwO46ojYRsj_Z-FR56oR1uHAWI0,11569
79
78
  nkululeko/models/model_bayes.py,sha256=wI7-sCwibqXMCHviu349TYjgJXXNXym-Z6ZM83uxlFQ,378
80
- nkululeko/models/model_cnn.py,sha256=j4NTp7quWqInzOPfpiMrTcfMbXkOsdlFF9ns0tW_ld4,9726
79
+ nkululeko/models/model_cnn.py,sha256=revCxyeX69DU6OA63YTnF28UaAFV7AmUfqODMCE_pbQ,10002
81
80
  nkululeko/models/model_gmm.py,sha256=onovzGBeguwZ-upXtuDLaBw9sd6fDDQslVBOrz1Z8TE,645
82
81
  nkululeko/models/model_knn.py,sha256=5tGqiPo2JTw9VLmD-MXNZKFJ5RTLA6uv_blJDJ9lScA,573
83
82
  nkululeko/models/model_knn_reg.py,sha256=Fbuk6Ku6eyrbbMEk7rB5dwfhvQOMsdZk6HI_0T0gYPw,580
84
83
  nkululeko/models/model_lin_reg.py,sha256=NBTnY2ULuhUBt5ArYQwskZ2Vq4BBDGkqd9SYBFl7Ql4,392
85
- nkululeko/models/model_mlp.py,sha256=lYhGrkqEj6fa6a_tcPrqEoorOpM7t7bjSfFLKEV6pu4,9107
86
- nkululeko/models/model_mlp_regression.py,sha256=NP1yEsqvpDcDBWWzDq7W4SHnXC1kE4fAo4A9aBCq3cY,10083
84
+ nkululeko/models/model_mlp.py,sha256=IuNGrLPx54-ZmpydH2yJdm2ddCm4rgu59Csv5ikbEpI,9471
85
+ nkululeko/models/model_mlp_regression.py,sha256=-ailThquUXwLkOj5jlJ4qn1vlb3nSHW5s0KS7GLp4qI,10290
87
86
  nkululeko/models/model_svm.py,sha256=QqwRjfG9I5y-57CcJAMUSbvYzV0DOlDcpDK5f4yQ_qw,914
88
87
  nkululeko/models/model_svr.py,sha256=p-Mb4Bn54yOe1upuHQKNpfj4ttOmQnm9pCB7ECkJkJQ,699
89
88
  nkululeko/models/model_tree.py,sha256=soXjV523eRvRZ-jbX7X_3S73Wto1B9bm7ZzzDmgYzTc,390
@@ -104,8 +103,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
103
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
105
104
  nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
106
105
  nkululeko/utils/util.py,sha256=_Z6OMJ3f-8TdETW9eqJYY5hwNRS5XCt9azzRnqoTTZE,12330
107
- nkululeko-0.83.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
108
- nkululeko-0.83.0.dist-info/METADATA,sha256=20S7IpMbLE7irV0ikdaFNfdqdBEEywH7jjlJwur8smA,36018
109
- nkululeko-0.83.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
110
- nkululeko-0.83.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
111
- nkululeko-0.83.0.dist-info/RECORD,,
106
+ nkululeko-0.83.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
107
+ nkululeko-0.83.2.dist-info/METADATA,sha256=DMkXO8jSm6iR4eETrG2aEK__7MfPhpAvOe6Tf99n_HE,36158
108
+ nkululeko-0.83.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
109
+ nkululeko-0.83.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
110
+ nkululeko-0.83.2.dist-info/RECORD,,
nkululeko/reporter.py DELETED
@@ -1,324 +0,0 @@
1
- """Reporter module.
2
-
3
- This module contains the Reporter class which is responsible for generating reports.
4
- """
5
-
6
- import ast
7
- import glob
8
- import json
9
- import math
10
-
11
- import matplotlib.pyplot as plt
12
- import numpy as np
13
- from scipy.stats import pearsonr
14
- from sklearn.metrics import ConfusionMatrixDisplay
15
- from sklearn.metrics import accuracy_score
16
- from sklearn.metrics import classification_report
17
- from sklearn.metrics import confusion_matrix
18
- from sklearn.metrics import mean_absolute_error
19
- from sklearn.metrics import mean_squared_error
20
- from sklearn.metrics import r2_score
21
- from sklearn.metrics import recall_score
22
- from sklearn.utils import resample
23
-
24
- import nkululeko.glob_conf as glob_conf
25
- from nkululeko.reporting.defines import Header
26
- from nkululeko.reporting.report_item import ReportItem
27
- from nkululeko.result import Result
28
- from nkululeko.utils.util import Util
29
-
30
-
31
- class Reporter:
32
- def __set_measure(self):
33
- if self.util.exp_is_classification():
34
- self.MEASURE = "UAR"
35
- self.result.measure = self.MEASURE
36
- self.is_classification = True
37
- else:
38
- self.is_classification = False
39
- self.measure = self.util.config_val("MODEL", "measure", "mse")
40
- if self.measure == "mse":
41
- self.MEASURE = "MSE"
42
- self.result.measure = self.MEASURE
43
- elif self.measure == "mae":
44
- self.MEASURE = "MAE"
45
- self.result.measure = self.MEASURE
46
- elif self.measure == "ccc":
47
- self.MEASURE = "CCC"
48
- self.result.measure = self.MEASURE
49
-
50
- def __init__(self, truths, preds, run, epoch):
51
- """Initialization with ground truth und predictions vector"""
52
- self.util = Util("reporter")
53
- self.format = self.util.config_val("PLOT", "format", "png")
54
- self.truths = truths
55
- self.preds = preds
56
- self.result = Result(0, 0, 0, 0, "unknown")
57
- self.run = run
58
- self.epoch = epoch
59
- self.__set_measure()
60
- self.cont_to_cat = False
61
- if len(self.truths) > 0 and len(self.preds) > 0:
62
- if self.util.exp_is_classification():
63
- self.result.test = recall_score(
64
- self.truths, self.preds, average="macro"
65
- )
66
- self.result.loss = 1 - accuracy_score(self.truths, self.preds)
67
- else:
68
- # regression experiment
69
- if self.measure == "mse":
70
- self.result.test = mean_squared_error(self.truths, self.preds)
71
- elif self.measure == "mae":
72
- self.result.test = mean_absolute_error(self.truths, self.preds)
73
- elif self.measure == "ccc":
74
- self.result.test = self.ccc(self.truths, self.preds)
75
- if math.isnan(self.result.test):
76
- self.util.debug(f"Truth: {self.truths}")
77
- self.util.debug(f"Predict.: {self.preds}")
78
- self.util.debug(f"Result is NAN: setting to -1")
79
- self.result.test = -1
80
- else:
81
- self.util.error(f"unknown measure: {self.measure}")
82
-
83
- # train and loss are being set by the model
84
-
85
- def set_id(self, run, epoch):
86
- """Make the report identifiable with run and epoch index"""
87
- self.run = run
88
- self.epoch = epoch
89
-
90
- def continuous_to_categorical(self):
91
- if self.cont_to_cat:
92
- return
93
- self.cont_to_cat = True
94
- bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
95
- self.truths = np.digitize(self.truths, bins) - 1
96
- self.preds = np.digitize(self.preds, bins) - 1
97
-
98
- def plot_confmatrix(self, plot_name, epoch):
99
- if not self.util.exp_is_classification():
100
- self.continuous_to_categorical()
101
- self._plot_confmat(self.truths, self.preds, plot_name, epoch)
102
-
103
-
104
- def plot_per_speaker(self, result_df, plot_name, function):
105
- """Plot a confusion matrix with the mode category per speakers.
106
-
107
- This function creates a confusion matrix for each speaker in the result_df.
108
- The result_df should contain the columns: preds, truths and speaker.
109
-
110
- Args:
111
- * result_df: a pandas dataframe with columns: preds, truths and speaker
112
- * plot_name: a string with the name of the plot
113
- * function: a string with the function to use for each speaker,
114
- can be 'mode' or 'mean'
115
-
116
- Returns:
117
- * None
118
- """
119
- # Initialize empty arrays for predictions and truths
120
- pred = np.zeros(0)
121
- truth = np.zeros(0)
122
-
123
- # Iterate over each speaker
124
- for s in result_df.speaker.unique():
125
- # Filter the dataframe for the current speaker
126
- s_df = result_df[result_df.speaker == s]
127
-
128
- # Get the mode or mean prediction for the current speaker
129
- mode = s_df.pred.mode().iloc[-1]
130
- mean = s_df.pred.mean()
131
- if function == "mode":
132
- s_df.pred = mode
133
- elif function == "mean":
134
- s_df.pred = mean
135
- else:
136
- self.util.error(f"unknown function {function}")
137
-
138
- # Append the current speaker's predictions and truths to the arrays
139
- pred = np.append(pred, s_df.pred.values)
140
- truth = np.append(truth, s_df["truth"].values)
141
-
142
- # If the experiment is not a classification or continuous to categorical conversion was performed,
143
- # convert the truths and predictions to categorical
144
- if not (self.is_classification or self.cont_to_cat):
145
- bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
146
- truth = np.digitize(truth, bins) - 1
147
- pred = np.digitize(pred, bins) - 1
148
-
149
- # Plot the confusion matrix for the speakers
150
- self._plot_confmat(truth, pred.astype("int"), plot_name, 0)
151
-
152
- def _plot_confmat(self, truths, preds, plot_name, epoch):
153
- # print(truths)
154
- # print(preds)
155
- fig_dir = self.util.get_path("fig_dir")
156
- labels = glob_conf.labels
157
- fig = plt.figure() # figsize=[5, 5]
158
- uar = recall_score(truths, preds, average="macro")
159
- acc = accuracy_score(truths, preds)
160
- cm = confusion_matrix(
161
- truths, preds, normalize=None
162
- ) # normalize must be one of {'true', 'pred', 'all', None}
163
- if cm.shape[0] != len(labels):
164
- self.util.error(
165
- f"mismatch between confmatrix dim ({cm.shape[0]}) and labels"
166
- f" length ({len(labels)}: {labels})"
167
- )
168
- try:
169
- disp = ConfusionMatrixDisplay(
170
- confusion_matrix=cm, display_labels=labels
171
- ).plot(cmap="Blues")
172
- except ValueError:
173
- disp = ConfusionMatrixDisplay(
174
- confusion_matrix=cm,
175
- display_labels=list(labels).remove("neutral"),
176
- ).plot(cmap="Blues")
177
-
178
- reg_res = ""
179
- if not self.is_classification:
180
- reg_res = f", {self.MEASURE}: {self.result.test:.3f}"
181
-
182
- if epoch != 0:
183
- plt.title(f"Confusion Matrix, UAR: {uar:.3f}{reg_res}, Epoch: {epoch}")
184
- else:
185
- plt.title(f"Confusion Matrix, UAR: {uar:.3f}{reg_res}")
186
- img_path = f"{fig_dir}{plot_name}.{self.format}"
187
- plt.savefig(img_path)
188
- fig.clear()
189
- plt.close(fig)
190
- plt.savefig(img_path)
191
- plt.close(fig)
192
- glob_conf.report.add_item(
193
- ReportItem(
194
- Header.HEADER_RESULTS,
195
- self.util.get_model_description(),
196
- "Confusion matrix",
197
- img_path,
198
- )
199
- )
200
-
201
- res_dir = self.util.get_path("res_dir")
202
- uar = int(uar * 1000) / 1000.0
203
- acc = int(acc * 1000) / 1000.0
204
- rpt = f"epoch: {epoch}, UAR: {uar}, ACC: {acc}"
205
- # print(rpt)
206
- self.util.debug(rpt)
207
- file_name = f"{res_dir}{self.util.get_exp_name()}_conf.txt"
208
- with open(file_name, "w") as text_file:
209
- text_file.write(rpt)
210
-
211
- def print_results(self, epoch):
212
- """Print all evaluation values to text file"""
213
- res_dir = self.util.get_path("res_dir")
214
- file_name = f"{res_dir}{self.util.get_exp_name()}_{epoch}.txt"
215
- if self.util.exp_is_classification():
216
- labels = glob_conf.labels
217
- try:
218
- rpt = classification_report(
219
- self.truths,
220
- self.preds,
221
- target_names=labels,
222
- output_dict=True,
223
- )
224
- except ValueError as e:
225
- self.util.debug(
226
- "Reporter: caught a ValueError when trying to get"
227
- " classification_report: " + e
228
- )
229
- rpt = self.result.to_string()
230
- with open(file_name, "w") as text_file:
231
- c_ress = list(range(len(labels)))
232
- for i, l in enumerate(labels):
233
- c_res = rpt[l]["f1-score"]
234
- c_ress[i] = float(f"{c_res:.3f}")
235
- self.util.debug(f"labels: {labels}")
236
- f1_per_class = f"result per class (F1 score): {c_ress}"
237
- self.util.debug(f1_per_class)
238
- rpt_str = f"{json.dumps(rpt)}\n{f1_per_class}"
239
- text_file.write(rpt_str)
240
- glob_conf.report.add_item(
241
- ReportItem(
242
- Header.HEADER_RESULTS,
243
- f"Classification result {self.util.get_model_description()}",
244
- rpt_str,
245
- )
246
- )
247
-
248
- else: # regression
249
- result = self.result.test
250
- r2 = r2_score(self.truths, self.preds)
251
- pcc = pearsonr(self.truths, self.preds)[0]
252
- measure = self.util.config_val("MODEL", "measure", "mse")
253
- with open(file_name, "w") as text_file:
254
- text_file.write(
255
- f"{measure}: {result:.3f}, r_2: {r2:.3f}, pcc {pcc:.3f}"
256
- )
257
-
258
- def make_conf_animation(self, out_name):
259
- import imageio
260
-
261
- fig_dir = self.util.get_path("fig_dir")
262
- filenames = glob.glob(fig_dir + f"{self.util.get_plot_name()}*_?_???_cnf.png")
263
- images = []
264
- for filename in filenames:
265
- images.append(imageio.imread(filename))
266
- fps = self.util.config_val("PLOT", "fps", "1")
267
- try:
268
- imageio.mimsave(fig_dir + out_name, images, fps=int(fps))
269
- except RuntimeError as e:
270
- self.util.error("error writing anim gif: " + e)
271
-
272
- def get_result(self):
273
- return self.result
274
-
275
- def plot_epoch_progression(self, reports, out_name):
276
- fig_dir = self.util.get_path("fig_dir")
277
- results, losses, train_results, losses_eval = [], [], [], []
278
- for r in reports:
279
- results.append(r.get_result().test)
280
- losses.append(r.get_result().loss)
281
- train_results.append(r.get_result().train)
282
- losses_eval.append(r.get_result().loss_eval)
283
-
284
- # do a plot per run
285
- # scale the losses so they fit on the picture
286
- losses, results, train_results, losses_eval = (
287
- np.asarray(losses),
288
- np.asarray(results),
289
- np.asarray(train_results),
290
- np.asarray(losses_eval),
291
- )
292
-
293
- if np.all((results > 1)):
294
- # scale down values
295
- results = results / 100.0
296
- train_results = train_results / 100.0
297
- # if np.all((losses < 1)):
298
- # scale up values
299
- plt.figure(dpi=200)
300
- plt.plot(train_results, "green", label="train set")
301
- plt.plot(results, "red", label="dev set")
302
- plt.plot(losses, "black", label="losses")
303
- plt.plot(losses_eval, "grey", label="losses_eval")
304
- plt.xlabel("epochs")
305
- plt.ylabel(f"{self.MEASURE}")
306
- plt.legend()
307
- plt.savefig(f"{fig_dir}{out_name}.{self.format}")
308
- plt.close()
309
-
310
- @staticmethod
311
- def ccc(ground_truth, prediction):
312
- mean_gt = np.mean(ground_truth, 0)
313
- mean_pred = np.mean(prediction, 0)
314
- var_gt = np.var(ground_truth, 0)
315
- var_pred = np.var(prediction, 0)
316
- v_pred = prediction - mean_pred
317
- v_gt = ground_truth - mean_gt
318
- cor = sum(v_pred * v_gt) / (np.sqrt(sum(v_pred**2)) * np.sqrt(sum(v_gt**2)))
319
- sd_gt = np.std(ground_truth)
320
- sd_pred = np.std(prediction)
321
- numerator = 2 * cor * sd_gt * sd_pred
322
- denominator = var_gt + var_pred + (mean_gt - mean_pred) ** 2
323
- ccc = numerator / denominator
324
- return ccc