nkululeko 0.83.1__py3-none-any.whl → 0.83.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.83.1"
1
+ VERSION="0.83.3"
2
2
  SAMPLING_RATE = 16000
nkululeko/experiment.py CHANGED
@@ -679,9 +679,12 @@ class Experiment:
679
679
  return result
680
680
 
681
681
  def load(self, filename):
682
- f = open(filename, "rb")
683
- tmp_dict = pickle.load(f)
684
- f.close()
682
+ try:
683
+ f = open(filename, "rb")
684
+ tmp_dict = pickle.load(f)
685
+ f.close()
686
+ except EOFError as eof:
687
+ self.util.error(f"can't open file {filename}: {eof}")
685
688
  self.__dict__.update(tmp_dict)
686
689
  glob_conf.set_labels(self.labels)
687
690
 
@@ -689,22 +692,26 @@ class Experiment:
689
692
  if self.runmgr.modelrunner.model.is_ann():
690
693
  self.runmgr.modelrunner.model = None
691
694
  self.util.warn(
692
- f"Save experiment: Can't pickle the learning model so saving without it."
695
+ "Save experiment: Can't pickle the learning model so saving without it."
693
696
  )
694
697
  try:
695
698
  f = open(filename, "wb")
696
699
  pickle.dump(self.__dict__, f)
697
700
  f.close()
698
- except TypeError:
701
+ except (TypeError, AttributeError) as error:
699
702
  self.feature_extractor.feat_extractor.model = None
700
703
  f = open(filename, "wb")
701
704
  pickle.dump(self.__dict__, f)
702
705
  f.close()
703
706
  self.util.warn(
704
- f"Save experiment: Can't pickle the feature extraction model so saving without it."
707
+ "Save experiment: Can't pickle the feature extraction model so saving without it."
708
+ + f"{type(error).__name__} {error}"
709
+ )
710
+ except RuntimeError as error:
711
+ self.util.warn(
712
+ "Save experiment: Can't pickle local object, NOT saving: "
713
+ + f"{type(error).__name__} {error}"
705
714
  )
706
- except (AttributeError, RuntimeError) as error:
707
- self.util.warn(f"Save experiment: Can't pickle local object: {error}")
708
715
 
709
716
  def save_onnx(self, filename):
710
717
  # export the model to onnx
@@ -28,9 +28,11 @@ class AgenderAgenderSet(Featureset):
28
28
  if not os.path.isdir(model_root):
29
29
  cache_root = audeer.mkdir("cache")
30
30
  model_root = audeer.mkdir(model_root)
31
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
31
+ archive_path = audeer.download_url(
32
+ model_url, cache_root, verbose=True)
32
33
  audeer.extract_archive(archive_path, model_root)
33
- device = self.util.config_val("MODEL", "device", "cpu")
34
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
35
+ device = self.util.config_val("MODEL", "device", cuda)
34
36
  self.model = audonnx.load(model_root, device=device)
35
37
  # pytorch_total_params = sum(p.numel() for p in self.model.parameters())
36
38
  # self.util.debug(
@@ -28,12 +28,17 @@ from nkululeko.utils.util import Util
28
28
 
29
29
 
30
30
  class SquimSet(Featureset):
31
- """Class to predict SQUIM features"""
31
+ """Class to predict SQUIM features."""
32
32
 
33
33
  def __init__(self, name, data_df, feats_type):
34
- """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
34
+ """Constructor.
35
+
36
+ Is_train is needed to distinguish from test/dev sets,
37
+ because they use the codebook from the training.
38
+ """
35
39
  super().__init__(name, data_df, feats_type)
36
- self.device = self.util.config_val("MODEL", "device", "cpu")
40
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
41
+ self.device = self.util.config_val("MODEL", "device", cuda)
37
42
  self.model_initialized = False
38
43
 
39
44
  def init_model(self):
@@ -1,35 +1,39 @@
1
1
  # feats_trill.py
2
- import tensorflow_hub as hub
3
2
  import os
3
+
4
+ import pandas as pd
4
5
  import tensorflow as tf
5
- from numpy.core.numeric import tensordot
6
+ import tensorflow_hub as hub
6
7
  from tqdm import tqdm
7
- import pandas as pd
8
+
8
9
  import audiofile as af
9
- from nkululeko.utils.util import Util
10
- import nkululeko.glob_conf as glob_conf
10
+
11
11
  from nkululeko.feat_extract.featureset import Featureset
12
+ import nkululeko.glob_conf as glob_conf
13
+ from nkululeko.utils.util import Util
14
+
12
15
 
13
16
  # Import TF 2.X and make sure we're running eager.
14
17
  assert tf.executing_eagerly()
15
18
 
16
19
 
17
20
  class TRILLset(Featureset):
18
- """A feature extractor for the Google TRILL embeddings"""
21
+ """A feature extractor for the Google TRILL embeddings.
19
22
 
20
- """https://ai.googleblog.com/2020/06/improving-speech-representations-and.html"""
23
+ See https://ai.googleblog.com/2020/06/improving-speech-representations-and.html.
24
+ """
21
25
 
22
26
  # Initialization of the class
23
27
  def __init__(self, name, data_df, feats_type):
24
- """
25
- Initialize the class with name, data and Util instance
26
- Also loads the model from hub
28
+ """Initialize the class with name, data and Util instance.
27
29
 
28
- :param name: Name of the class
29
- :type name: str
30
- :param data_df: Data of the class
31
- :type data_df: DataFrame
32
- :return: None
30
+ Also loads the model from hub
31
+ Args:
32
+ :param name: Name of the class
33
+ :type name: str
34
+ :param data_df: Data of the class
35
+ :type data_df: DataFrame
36
+ :return: None
33
37
  """
34
38
  super().__init__(name, data_df, feats_type)
35
39
  # Load the model from the configured path
@@ -38,25 +42,21 @@ class TRILLset(Featureset):
38
42
  "trill.model",
39
43
  "https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3",
40
44
  )
41
- self.module = hub.load(model_path)
45
+ self.model = hub.load(model_path)
42
46
  self.feats_type = feats_type
43
47
 
44
48
  def extract(self):
45
49
  store = self.util.get_path("store")
46
50
  storage = f"{store}{self.name}.pkl"
47
- extract = self.util.config_val(
48
- "FEATS", "needs_feature_extraction", False)
51
+ extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
49
52
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
50
53
  if extract or no_reuse or not os.path.isfile(storage):
51
- self.util.debug(
52
- "extracting TRILL embeddings, this might take a while...")
54
+ self.util.debug("extracting TRILL embeddings, this might take a while...")
53
55
  emb_series = pd.Series(index=self.data_df.index, dtype=object)
54
- length = len(self.data_df.index)
55
56
  for idx, file in enumerate(tqdm(self.data_df.index.get_level_values(0))):
56
- emb = self.getEmbeddings(file)
57
- emb_series[idx] = emb
58
- self.df = pd.DataFrame(
59
- emb_series.values.tolist(), index=self.data_df.index)
57
+ emb = self.get_embeddings(file)
58
+ emb_series.iloc[idx] = emb
59
+ self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
60
60
  self.df.to_pickle(storage)
61
61
  try:
62
62
  glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
@@ -70,15 +70,15 @@ class TRILLset(Featureset):
70
70
  if len(wav.shape) > 1:
71
71
  wav = tf.reduce_mean(wav, axis=0)
72
72
 
73
- emb_dict = self.module(samples=wav, sample_rate=tf.constant(16000))
73
+ emb_dict = self.model(samples=wav, sample_rate=tf.constant(16000))
74
74
  return emb_dict["embedding"]
75
75
 
76
- def getEmbeddings(self, file):
76
+ def get_embeddings(self, file):
77
77
  wav = af.read(file)[0]
78
- emb_short = self.getEmbeddings_signal(wav, 16000)
78
+ emb_short = self.get_embeddings_signal(wav, 16000)
79
79
  return emb_short
80
80
 
81
- def getEmbeddings_signal(self, signal, sr):
81
+ def get_embeddings_signal(self, signal, sr):
82
82
  wav = tf.convert_to_tensor(signal)
83
83
  emb_short = self.embed_wav(wav)
84
84
  # you get one embedding per frame, we use the mean for all the frames
@@ -86,7 +86,7 @@ class TRILLset(Featureset):
86
86
  return emb_short
87
87
 
88
88
  def extract_sample(self, signal, sr):
89
- if self.module == None:
89
+ if self.model == None:
90
90
  self.__init__("na", None)
91
- feats = self.getEmbeddings_signal(signal, sr)
91
+ feats = self.get_embeddings_signal(signal, sr)
92
92
  return feats
@@ -21,7 +21,11 @@ class Wav2vec2(Featureset):
21
21
  """Class to extract wav2vec2 embeddings"""
22
22
 
23
23
  def __init__(self, name, data_df, feat_type):
24
- """Constructor. is_train is needed to distinguish from test/dev sets, because they use the codebook from the training"""
24
+ """Constructor.
25
+
26
+ If_train is needed to distinguish from test/dev sets,
27
+ because they use the codebook from the training
28
+ """
25
29
  super().__init__(name, data_df, feat_type)
26
30
  cuda = "cuda" if torch.cuda.is_available() else "cpu"
27
31
  self.device = self.util.config_val("MODEL", "device", cuda)
@@ -39,8 +43,7 @@ class Wav2vec2(Featureset):
39
43
  )
40
44
  config = transformers.AutoConfig.from_pretrained(model_path)
41
45
  layer_num = config.num_hidden_layers
42
- hidden_layer = int(self.util.config_val(
43
- "FEATS", "wav2vec2.layer", "0"))
46
+ hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
44
47
  config.num_hidden_layers = layer_num - hidden_layer
45
48
  self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
46
49
  self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
@@ -55,8 +58,7 @@ class Wav2vec2(Featureset):
55
58
  """Extract the features or load them from disk if present."""
56
59
  store = self.util.get_path("store")
57
60
  storage = f"{store}{self.name}.pkl"
58
- extract = self.util.config_val(
59
- "FEATS", "needs_feature_extraction", False)
61
+ extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
60
62
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
61
63
  if extract or no_reuse or not os.path.isfile(storage):
62
64
  if not self.model_initialized:
@@ -77,8 +79,7 @@ class Wav2vec2(Featureset):
77
79
  emb = self.get_embeddings(signal, sampling_rate, file)
78
80
  emb_series[idx] = emb
79
81
  # print(f"emb_series shape: {emb_series.shape}")
80
- self.df = pd.DataFrame(
81
- emb_series.values.tolist(), index=self.data_df.index)
82
+ self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
82
83
  # print(f"df shape: {self.df.shape}")
83
84
  self.df.to_pickle(storage)
84
85
  try:
@@ -32,19 +32,22 @@ class Whisper(Featureset):
32
32
  model_name = f"openai/{self.feat_type}"
33
33
  self.model = WhisperModel.from_pretrained(model_name).to(self.device)
34
34
  print(f"intialized Whisper model on {self.device}")
35
- self.feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
35
+ self.feature_extractor = AutoFeatureExtractor.from_pretrained(
36
+ model_name)
36
37
  self.model_initialized = True
37
38
 
38
39
  def extract(self):
39
40
  """Extract the features or load them from disk if present."""
40
41
  store = self.util.get_path("store")
41
42
  storage = f"{store}{self.name}.pkl"
42
- extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
43
+ extract = self.util.config_val(
44
+ "FEATS", "needs_feature_extraction", False)
43
45
  no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
44
46
  if extract or no_reuse or not os.path.isfile(storage):
45
47
  if not self.model_initialized:
46
48
  self.init_model()
47
- self.util.debug("extracting whisper embeddings, this might take a while...")
49
+ self.util.debug(
50
+ "extracting whisper embeddings, this might take a while...")
48
51
  emb_series = []
49
52
  for (file, start, end), _ in audeer.progress_bar(
50
53
  self.data_df.iterrows(),
@@ -16,6 +16,7 @@ import numpy as np
16
16
  from sklearn.metrics import recall_score
17
17
  from collections import OrderedDict
18
18
  from PIL import Image
19
+ from traitlets import default
19
20
 
20
21
  from nkululeko.utils.util import Util
21
22
  import nkululeko.glob_conf as glob_conf
@@ -48,6 +49,7 @@ class CNN_model(Model):
48
49
  self.util.error(f"unknown loss function: {criterion}")
49
50
  self.util.debug(f"using model with cross entropy loss function")
50
51
  # set up the model
52
+ # cuda = "cuda" if torch.cuda.is_available() else "cpu"
51
53
  self.device = self.util.config_val("MODEL", "device", "cpu")
52
54
  try:
53
55
  layers_string = glob_conf.config["MODEL"]["layers"]
@@ -84,7 +86,8 @@ class CNN_model(Model):
84
86
  train_set = self.Dataset_image(
85
87
  feats_train, df_train, self.target, transformations
86
88
  )
87
- test_set = self.Dataset_image(feats_test, df_test, self.target, transformations)
89
+ test_set = self.Dataset_image(
90
+ feats_test, df_test, self.target, transformations)
88
91
  # Define data loaders
89
92
  self.trainloader = torch.utils.data.DataLoader(
90
93
  train_set,
@@ -137,7 +140,8 @@ class CNN_model(Model):
137
140
  losses = []
138
141
  for images, labels in self.trainloader:
139
142
  logits = self.model(images.to(self.device))
140
- loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
143
+ loss = self.criterion(logits, labels.to(
144
+ self.device, dtype=torch.int64))
141
145
  losses.append(loss.item())
142
146
  self.optimizer.zero_grad()
143
147
  loss.backward()
@@ -165,14 +169,16 @@ class CNN_model(Model):
165
169
 
166
170
  self.loss_eval = (np.asarray(losses)).mean()
167
171
  predictions = logits.argmax(dim=1)
168
- uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
172
+ uar = recall_score(
173
+ targets.numpy(), predictions.numpy(), average="macro")
169
174
  return uar, targets, predictions
170
175
 
171
176
  def predict(self):
172
177
  _, truths, predictions = self.evaluate_model(
173
178
  self.model, self.testloader, self.device
174
179
  )
175
- uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
180
+ uar, _, _ = self.evaluate_model(
181
+ self.model, self.trainloader, self.device)
176
182
  report = Reporter(truths, predictions, self.run, self.epoch)
177
183
  try:
178
184
  report.result.loss = self.loss
@@ -209,7 +215,8 @@ class CNN_model(Model):
209
215
  dir = self.util.get_path("model_dir")
210
216
  # name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
211
217
  name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
212
- self.device = self.util.config_val("MODEL", "device", "cpu")
218
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
219
+ self.device = self.util.config_val("MODEL", "device", cuda)
213
220
  layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
214
221
  self.store_path = dir + name
215
222
  drop = self.util.config_val("MODEL", "drop", False)
@@ -222,7 +229,8 @@ class CNN_model(Model):
222
229
  def load_path(self, path, run, epoch):
223
230
  self.set_id(run, epoch)
224
231
  with open(path, "rb") as handle:
225
- self.device = self.util.config_val("MODEL", "device", "cpu")
232
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
233
+ self.device = self.util.config_val("MODEL", "device", cuda)
226
234
  layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
227
235
  self.store_path = path
228
236
  drop = self.util.config_val("MODEL", "drop", False)
@@ -34,8 +34,9 @@ class MLP_model(Model):
34
34
  else:
35
35
  self.util.error(f"unknown loss function: {criterion}")
36
36
  self.util.debug(f"using model with cross entropy loss function")
37
- # set up the model
38
- self.device = self.util.config_val("MODEL", "device", "cpu")
37
+ # set up the model, use GPU if availabe
38
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
39
+ self.device = self.util.config_val("MODEL", "device", cuda)
39
40
  try:
40
41
  layers_string = glob_conf.config["MODEL"]["layers"]
41
42
  except KeyError as ke:
@@ -86,7 +87,8 @@ class MLP_model(Model):
86
87
  losses = []
87
88
  for features, labels in self.trainloader:
88
89
  logits = self.model(features.to(self.device))
89
- loss = self.criterion(logits, labels.to(self.device, dtype=torch.int64))
90
+ loss = self.criterion(logits, labels.to(
91
+ self.device, dtype=torch.int64))
90
92
  losses.append(loss.item())
91
93
  self.optimizer.zero_grad()
92
94
  loss.backward()
@@ -114,14 +116,16 @@ class MLP_model(Model):
114
116
 
115
117
  self.loss_eval = (np.asarray(losses)).mean()
116
118
  predictions = logits.argmax(dim=1)
117
- uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
119
+ uar = recall_score(
120
+ targets.numpy(), predictions.numpy(), average="macro")
118
121
  return uar, targets, predictions
119
122
 
120
123
  def predict(self):
121
124
  _, truths, predictions = self.evaluate_model(
122
125
  self.model, self.testloader, self.device
123
126
  )
124
- uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
127
+ uar, _, _ = self.evaluate_model(
128
+ self.model, self.trainloader, self.device)
125
129
  report = Reporter(truths, predictions, self.run, self.epoch)
126
130
  try:
127
131
  report.result.loss = self.loss
@@ -179,6 +183,9 @@ class MLP_model(Model):
179
183
  features = np.reshape(features, (-1, 1)).T
180
184
  logits = self.model(features.to(self.device))
181
185
  # logits = self.model(features)
186
+ # if tensor conver to cpu
187
+ if isinstance(logits, torch.Tensor):
188
+ logits = logits.cpu()
182
189
  a = logits.numpy()
183
190
  res = {}
184
191
  for i in range(len(a[0])):
@@ -196,7 +203,8 @@ class MLP_model(Model):
196
203
  dir = self.util.get_path("model_dir")
197
204
  # name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
198
205
  name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
199
- self.device = self.util.config_val("MODEL", "device", "cpu")
206
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
207
+ self.device = self.util.config_val("MODEL", "device", cuda)
200
208
  layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
201
209
  self.store_path = dir + name
202
210
  drop = self.util.config_val("MODEL", "drop", False)
@@ -211,7 +219,8 @@ class MLP_model(Model):
211
219
  def load_path(self, path, run, epoch):
212
220
  self.set_id(run, epoch)
213
221
  with open(path, "rb") as handle:
214
- self.device = self.util.config_val("MODEL", "device", "cpu")
222
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
223
+ self.device = self.util.config_val("MODEL", "device", cuda)
215
224
  layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
216
225
  self.store_path = path
217
226
  drop = self.util.config_val("MODEL", "drop", False)
@@ -9,6 +9,7 @@ import torch
9
9
  from audmetric import concordance_cc
10
10
  from audmetric import mean_absolute_error
11
11
  from audmetric import mean_squared_error
12
+ from traitlets import default
12
13
 
13
14
  import nkululeko.glob_conf as glob_conf
14
15
  from nkululeko.losses.loss_ccc import ConcordanceCorCoeff
@@ -40,7 +41,8 @@ class MLP_Reg_model(Model):
40
41
  self.util.error(f"unknown loss function: {criterion}")
41
42
  self.util.debug(f"training model with {criterion} loss function")
42
43
  # set up the model
43
- self.device = self.util.config_val("MODEL", "device", "cpu")
44
+ cuda = "cuda" if torch.cuda.is_available() else "cpu"
45
+ self.device = self.util.config_val("MODEL", "device", cuda)
44
46
  layers_string = glob_conf.config["MODEL"]["layers"]
45
47
  self.util.debug(f"using layers {layers_string}")
46
48
  try:
@@ -50,7 +52,8 @@ class MLP_Reg_model(Model):
50
52
  drop = self.util.config_val("MODEL", "drop", False)
51
53
  if drop:
52
54
  self.util.debug(f"training with dropout: {drop}")
53
- self.model = self.MLP(feats_train.shape[1], layers, 1, drop).to(self.device)
55
+ self.model = self.MLP(
56
+ feats_train.shape[1], layers, 1, drop).to(self.device)
54
57
  self.learning_rate = float(
55
58
  self.util.config_val("MODEL", "learning_rate", 0.0001)
56
59
  )
@@ -93,8 +96,10 @@ class MLP_Reg_model(Model):
93
96
  _, truths, predictions = self.evaluate_model(
94
97
  self.model, self.testloader, self.device
95
98
  )
96
- result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
97
- report = Reporter(truths.numpy(), predictions.numpy(), self.run, self.epoch)
99
+ result, _, _ = self.evaluate_model(
100
+ self.model, self.trainloader, self.device)
101
+ report = Reporter(truths.numpy(), predictions.numpy(),
102
+ self.run, self.epoch)
98
103
  try:
99
104
  report.result.loss = self.loss
100
105
  except AttributeError: # if the model was loaded from disk the loss is unknown
@@ -128,9 +133,11 @@ class MLP_Reg_model(Model):
128
133
 
129
134
  def __getitem__(self, item):
130
135
  index = self.df.index[item]
131
- features = self.df_features.loc[index, :].values.astype("float32").squeeze()
136
+ features = self.df_features.loc[index, :].values.astype(
137
+ "float32").squeeze()
132
138
  labels = (
133
- np.array([self.df.loc[index, self.label]]).astype("float32").squeeze()
139
+ np.array([self.df.loc[index, self.label]]
140
+ ).astype("float32").squeeze()
134
141
  )
135
142
  return features, labels
136
143
 
@@ -187,7 +194,8 @@ class MLP_Reg_model(Model):
187
194
  end_index = (index + 1) * loader.batch_size
188
195
  if end_index > len(loader.dataset):
189
196
  end_index = len(loader.dataset)
190
- logits[start_index:end_index] = model(features.to(device)).reshape(-1)
197
+ logits[start_index:end_index] = model(
198
+ features.to(device)).reshape(-1)
191
199
  targets[start_index:end_index] = labels
192
200
  loss = self.criterion(
193
201
  logits[start_index:end_index].to(
nkululeko/plots.py CHANGED
@@ -28,7 +28,8 @@ class Plots:
28
28
  df_speaker["samplenum"] = df_speaker.shape[0]
29
29
  df_speakers = pd.concat([df_speakers, df_speaker.head(1)])
30
30
  # plot the distribution of samples per speaker
31
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
31
+ # one up because of the runs
32
+ fig_dir = self.util.get_path("fig_dir") + "../"
32
33
  self.util.debug(f"plotting samples per speaker")
33
34
  if "gender" in df_speakers:
34
35
  filename = f"samples_value_counts"
@@ -137,7 +138,8 @@ class Plots:
137
138
  df, att1, class_label, att1, type_s
138
139
  )
139
140
  else:
140
- ax, caption = self._plot2cont(df, class_label, att1, type_s)
141
+ ax, caption = self._plot2cont(
142
+ df, class_label, att1, type_s)
141
143
  self._save_plot(
142
144
  ax,
143
145
  caption,
@@ -150,7 +152,8 @@ class Plots:
150
152
  att1 = att[0]
151
153
  att2 = att[1]
152
154
  if att1 == self.target or att2 == self.target:
153
- self.util.debug(f"no need to correlate {self.target} with itself")
155
+ self.util.debug(
156
+ f"no need to correlate {self.target} with itself")
154
157
  return
155
158
  if att1 not in df:
156
159
  self.util.error(f"unknown feature: {att1}")
@@ -165,7 +168,8 @@ class Plots:
165
168
  if self.util.is_categorical(df[att1]):
166
169
  if self.util.is_categorical(df[att2]):
167
170
  # class_label = cat, att1 = cat, att2 = cat
168
- ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
171
+ ax, caption = self._plot2cat(
172
+ df, att1, att2, att1, type_s)
169
173
  else:
170
174
  # class_label = cat, att1 = cat, att2 = cont
171
175
  ax, caption = self._plotcatcont(
@@ -186,7 +190,8 @@ class Plots:
186
190
  if self.util.is_categorical(df[att1]):
187
191
  if self.util.is_categorical(df[att2]):
188
192
  # class_label = cont, att1 = cat, att2 = cat
189
- ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
193
+ ax, caption = self._plot2cat(
194
+ df, att1, att2, att1, type_s)
190
195
  else:
191
196
  # class_label = cont, att1 = cat, att2 = cont
192
197
  ax, caption = self._plot2cont_cat(
@@ -200,7 +205,8 @@ class Plots:
200
205
  )
201
206
  else:
202
207
  # class_label = cont, att1 = cont, att2 = cont
203
- ax, caption = self._plot2cont(df, att1, att2, type_s)
208
+ ax, caption = self._plot2cont(
209
+ df, att1, att2, type_s)
204
210
 
205
211
  self._save_plot(
206
212
  ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
@@ -213,7 +219,8 @@ class Plots:
213
219
  )
214
220
 
215
221
  def _save_plot(self, ax, caption, header, filename, type_s):
216
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
222
+ # one up because of the runs
223
+ fig_dir = self.util.get_path("fig_dir") + "../"
217
224
  fig = ax.figure
218
225
  # avoid warning
219
226
  # plt.tight_layout()
@@ -231,7 +238,8 @@ class Plots:
231
238
  )
232
239
 
233
240
  def _check_binning(self, att, df):
234
- bin_reals_att = eval(self.util.config_val("EXPL", f"{att}.bin_reals", "False"))
241
+ bin_reals_att = eval(self.util.config_val(
242
+ "EXPL", f"{att}.bin_reals", "False"))
235
243
  if bin_reals_att:
236
244
  self.util.debug(f"binning continuous variable {att} to categories")
237
245
  att_new = f"{att}_binned"
@@ -305,7 +313,8 @@ class Plots:
305
313
  return ax, caption
306
314
 
307
315
  def plot_durations(self, df, filename, sample_selection, caption=""):
308
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
316
+ # one up because of the runs
317
+ fig_dir = self.util.get_path("fig_dir") + "../"
309
318
  try:
310
319
  ax = sns.histplot(df, x="duration", hue="class_label", kde=True)
311
320
  except AttributeError as ae:
@@ -333,7 +342,8 @@ class Plots:
333
342
 
334
343
  def describe_df(self, name, df, target, filename):
335
344
  """Make a stacked barplot of samples and speakers per sex and target values. speaker, gender and target columns must be present"""
336
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
345
+ fig_dir = self.util.get_path(
346
+ "fig_dir") + "../" # one up because of the runs
337
347
  sampl_num = df.shape[0]
338
348
  sex_col = "gender"
339
349
  if target == "gender":
@@ -380,8 +390,10 @@ class Plots:
380
390
 
381
391
  def scatter_plot(self, feats, label_df, label, dimred_type):
382
392
  dim_num = int(self.util.config_val("EXPL", "scatter.dim", 2))
383
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
384
- sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
393
+ # one up because of the runs
394
+ fig_dir = self.util.get_path("fig_dir") + "../"
395
+ sample_selection = self.util.config_val(
396
+ "EXPL", "sample_selection", "all")
385
397
  filename = f"{label}_{self.util.get_feattype_name()}_{sample_selection}_{dimred_type}_{str(dim_num)}d"
386
398
  filename = f"{fig_dir}{filename}.{self.format}"
387
399
  self.util.debug(f"computing {dimred_type}, this might take a while...")
@@ -423,7 +435,8 @@ class Plots:
423
435
 
424
436
  if dim_num == 2:
425
437
  plot_data = np.vstack((data.T, labels)).T
426
- plot_df = pd.DataFrame(data=plot_data, columns=("Dim_1", "Dim_2", "label"))
438
+ plot_df = pd.DataFrame(
439
+ data=plot_data, columns=("Dim_1", "Dim_2", "label"))
427
440
  # plt.tight_layout()
428
441
  ax = (
429
442
  sns.FacetGrid(plot_df, hue="label", height=6)
@@ -515,7 +528,8 @@ class Plots:
515
528
  def plot_feature(self, title, feature, label, df_labels, df_features):
516
529
  # remove fullstops in the name
517
530
  feature_name = feature.replace(".", "-")
518
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
531
+ # one up because of the runs
532
+ fig_dir = self.util.get_path("fig_dir") + "../"
519
533
  filename = f"{fig_dir}feat_dist_{title}_{feature_name}.{self.format}"
520
534
  if self.util.is_categorical(df_labels[label]):
521
535
  df_plot = pd.DataFrame(
@@ -554,7 +568,8 @@ class Plots:
554
568
  tree.plot_tree(model, feature_names=list(features.columns), ax=ax)
555
569
  # plt.tight_layout()
556
570
  # print(ax)
557
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
571
+ # one up because of the runs
572
+ fig_dir = self.util.get_path("fig_dir") + "../"
558
573
  exp_name = self.util.get_exp_name(only_data=True)
559
574
  format = self.util.config_val("PLOT", "format", "png")
560
575
  filename = f"{fig_dir}{exp_name}EXPL_tree-plot.{format}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.83.1
3
+ Version: 0.83.3
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -333,6 +333,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
333
333
  Changelog
334
334
  =========
335
335
 
336
+ Version 0.83.3
337
+ --------------
338
+ * fixed a naming error in trill features that prevented storage of experiment
339
+
340
+ Version 0.83.2
341
+ --------------
342
+ * added default cuda if present and not stated
343
+
336
344
  Version 0.83.1
337
345
  --------------
338
346
  * add test module to nkuluflag
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=i6-Vtyje9xE8w8o3lG27IiJczQFyrNbsxiXs7b4-q28,39
5
+ nkululeko/constants.py,sha256=zgeDgqWCuY5esPoOf_ve4SZAnwvJCy_A_qNl_zaWAHM,39
6
6
  nkululeko/demo.py,sha256=55kNFA2helMhOxD4yZuKg1JWDtlUUpxm-6uAnroIydI,3264
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=-ggSHc3DXxRzjzcGB4qFBOMvKsfUdTkkde50BDrS9dA,4755
9
- nkululeko/experiment.py,sha256=aueWoKJCQx8wU9daosh6n7ZDGhT2cfo_9Av5HIfN1_w,29605
9
+ nkululeko/experiment.py,sha256=RZfVevt7bYX8SGJ8o9HWKoZ_OVec7K_9A0HkgJYt8dA,29873
10
10
  nkululeko/explore.py,sha256=2wdoGRqldvsN1zCiWk0quSDgHHHUoF2UZOWQ1r-2OLM,2310
11
11
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
12
12
  nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
@@ -17,7 +17,7 @@ nkululeko/modelrunner.py,sha256=GwDXcE2gDQXat4W0-HhHQ1BcUNCRBXMBQ4QycfHp_5c,9288
17
17
  nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
18
18
  nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
19
19
  nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
20
- nkululeko/plots.py,sha256=K88ZRPFGX_r03BT742H06Dde20xZYdltv7dxjgUiAFA,23025
20
+ nkululeko/plots.py,sha256=nd9tF_61DyAx7oGZF8gTrHXazkgFjFe4eClxu1nQ_XU,23276
21
21
  nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
22
22
  nkululeko/resample.py,sha256=3WbxkwgyTe_fW38046Rjxk3knOkFdhqn2C4nfhbUurQ,2287
23
23
  nkululeko/runmanager.py,sha256=eTM1DNQKt1lxYhzt4vZyZluPXW9sWlIJHNQzex4lkJU,7624
@@ -48,7 +48,7 @@ nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,276
48
48
  nkululeko/data/dataset_csv.py,sha256=uLa7jW4w2ft299NkpXZMD361kPHF8oSYoIZ_ucxhuOM,3884
49
49
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
50
  nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
51
- nkululeko/feat_extract/feats_agender_agender.py,sha256=5dA7YA-YGxODovMC7ynMk3bnpPjfs0ApvSfjqvoSZY0,3346
51
+ nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
52
52
  nkululeko/feat_extract/feats_analyser.py,sha256=_5oz4y-NZCEBgfNP2GZ9WNqQR50Hbykm0TvDVomWP0U,11399
53
53
  nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
54
54
  nkululeko/feat_extract/feats_audmodel.py,sha256=VjBNgAoxsHJhwr6Kwt9CxX6SaCM4RK_OV-GU2W5-bhU,3187
@@ -63,11 +63,11 @@ nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq
63
63
  nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
64
64
  nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
65
65
  nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgsYI038QlRY,4803
66
- nkululeko/feat_extract/feats_squim.py,sha256=jToXiwRq5-MQheAP6xczvry1uVIHYUrD8bM7Wb1cnqM,4528
67
- nkululeko/feat_extract/feats_trill.py,sha256=HXQBaPWTX0iNEjBY7RD8uyFeYjDieHqv8ZilE0Jb-Pg,3319
68
- nkululeko/feat_extract/feats_wav2vec2.py,sha256=tFGe4t6MIVxTDQYR8geNCtZ_3ALc-gpi-rmQbF74HfI,5285
66
+ nkululeko/feat_extract/feats_squim.py,sha256=Y31YmDmscuG0YozvxyBZIutO3id8t7IZJWCfKucw-6M,4617
67
+ nkululeko/feat_extract/feats_trill.py,sha256=K2ahhdpwpjgg3WZS1POg3UMP2U44i8cLZZvn5Rq7fUI,3228
68
+ nkululeko/feat_extract/feats_wav2vec2.py,sha256=9WUMfyddB_3nx79g7mZoQrRynhM1uEBWuOotRq8bxoU,5268
69
69
  nkululeko/feat_extract/feats_wavlm.py,sha256=ulxpGjifUFx2ZgGmY32SmBJGIuvkYHoLb2n1LZ8KMwA,4703
70
- nkululeko/feat_extract/feats_whisper.py,sha256=0N7Vj65OVi2PNoB_NrDjWT5lP6xZNKxFOZZIoxkJvcA,4533
70
+ nkululeko/feat_extract/feats_whisper.py,sha256=BFspQBI53HAgw22vBEeFskGwFZA-94Rpl17xM458HRo,4576
71
71
  nkululeko/feat_extract/featureset.py,sha256=HtgW2389rmlRAgFP3F1sSFzq2_iUVr2NhOfIXG9omt0,1448
72
72
  nkululeko/feat_extract/feinberg_praat.py,sha256=EP9pMALjlKdiYInLQdrZ7MmE499Mq-ISRCgqbqL3Rxc,21304
73
73
  nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -76,13 +76,13 @@ nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv5
76
76
  nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
77
  nkululeko/models/model.py,sha256=oAdKq2wY5lYKfpZkQwO46ojYRsj_Z-FR56oR1uHAWI0,11569
78
78
  nkululeko/models/model_bayes.py,sha256=wI7-sCwibqXMCHviu349TYjgJXXNXym-Z6ZM83uxlFQ,378
79
- nkululeko/models/model_cnn.py,sha256=j4NTp7quWqInzOPfpiMrTcfMbXkOsdlFF9ns0tW_ld4,9726
79
+ nkululeko/models/model_cnn.py,sha256=revCxyeX69DU6OA63YTnF28UaAFV7AmUfqODMCE_pbQ,10002
80
80
  nkululeko/models/model_gmm.py,sha256=onovzGBeguwZ-upXtuDLaBw9sd6fDDQslVBOrz1Z8TE,645
81
81
  nkululeko/models/model_knn.py,sha256=5tGqiPo2JTw9VLmD-MXNZKFJ5RTLA6uv_blJDJ9lScA,573
82
82
  nkululeko/models/model_knn_reg.py,sha256=Fbuk6Ku6eyrbbMEk7rB5dwfhvQOMsdZk6HI_0T0gYPw,580
83
83
  nkululeko/models/model_lin_reg.py,sha256=NBTnY2ULuhUBt5ArYQwskZ2Vq4BBDGkqd9SYBFl7Ql4,392
84
- nkululeko/models/model_mlp.py,sha256=lYhGrkqEj6fa6a_tcPrqEoorOpM7t7bjSfFLKEV6pu4,9107
85
- nkululeko/models/model_mlp_regression.py,sha256=NP1yEsqvpDcDBWWzDq7W4SHnXC1kE4fAo4A9aBCq3cY,10083
84
+ nkululeko/models/model_mlp.py,sha256=IuNGrLPx54-ZmpydH2yJdm2ddCm4rgu59Csv5ikbEpI,9471
85
+ nkululeko/models/model_mlp_regression.py,sha256=-ailThquUXwLkOj5jlJ4qn1vlb3nSHW5s0KS7GLp4qI,10290
86
86
  nkululeko/models/model_svm.py,sha256=QqwRjfG9I5y-57CcJAMUSbvYzV0DOlDcpDK5f4yQ_qw,914
87
87
  nkululeko/models/model_svr.py,sha256=p-Mb4Bn54yOe1upuHQKNpfj4ttOmQnm9pCB7ECkJkJQ,699
88
88
  nkululeko/models/model_tree.py,sha256=soXjV523eRvRZ-jbX7X_3S73Wto1B9bm7ZzzDmgYzTc,390
@@ -103,8 +103,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
103
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
104
104
  nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
105
105
  nkululeko/utils/util.py,sha256=_Z6OMJ3f-8TdETW9eqJYY5hwNRS5XCt9azzRnqoTTZE,12330
106
- nkululeko-0.83.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
107
- nkululeko-0.83.1.dist-info/METADATA,sha256=EgPYOS_ELZQmEvPWlX-klt8gmo59suFFL_HDptU474w,36080
108
- nkululeko-0.83.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
109
- nkululeko-0.83.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
110
- nkululeko-0.83.1.dist-info/RECORD,,
106
+ nkululeko-0.83.3.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
107
+ nkululeko-0.83.3.dist-info/METADATA,sha256=rowQ7syG1q0BotCIiP9ZbiiMgNNvYxuRKYTvIztWMXs,36267
108
+ nkululeko-0.83.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
109
+ nkululeko-0.83.3.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
110
+ nkululeko-0.83.3.dist-info/RECORD,,