nkululeko 0.83.1__py3-none-any.whl → 0.83.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +6 -3
- nkululeko/feat_extract/feats_agender_agender.py +4 -2
- nkululeko/feat_extract/feats_squim.py +8 -3
- nkululeko/feat_extract/feats_wav2vec2.py +8 -7
- nkululeko/feat_extract/feats_whisper.py +6 -3
- nkululeko/models/model_cnn.py +14 -6
- nkululeko/models/model_mlp.py +16 -7
- nkululeko/models/model_mlp_regression.py +15 -7
- nkululeko/plots.py +30 -15
- {nkululeko-0.83.1.dist-info → nkululeko-0.83.2.dist-info}/METADATA +5 -1
- {nkululeko-0.83.1.dist-info → nkululeko-0.83.2.dist-info}/RECORD +15 -15
- {nkululeko-0.83.1.dist-info → nkululeko-0.83.2.dist-info}/LICENSE +0 -0
- {nkululeko-0.83.1.dist-info → nkululeko-0.83.2.dist-info}/WHEEL +0 -0
- {nkululeko-0.83.1.dist-info → nkululeko-0.83.2.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.83.
|
1
|
+
VERSION="0.83.2"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/experiment.py
CHANGED
@@ -679,9 +679,12 @@ class Experiment:
|
|
679
679
|
return result
|
680
680
|
|
681
681
|
def load(self, filename):
|
682
|
-
|
683
|
-
|
684
|
-
|
682
|
+
try:
|
683
|
+
f = open(filename, "rb")
|
684
|
+
tmp_dict = pickle.load(f)
|
685
|
+
f.close()
|
686
|
+
except EOFError as eof:
|
687
|
+
self.util.error(f"can't open file {filename}: {eof}")
|
685
688
|
self.__dict__.update(tmp_dict)
|
686
689
|
glob_conf.set_labels(self.labels)
|
687
690
|
|
@@ -28,9 +28,11 @@ class AgenderAgenderSet(Featureset):
|
|
28
28
|
if not os.path.isdir(model_root):
|
29
29
|
cache_root = audeer.mkdir("cache")
|
30
30
|
model_root = audeer.mkdir(model_root)
|
31
|
-
archive_path = audeer.download_url(
|
31
|
+
archive_path = audeer.download_url(
|
32
|
+
model_url, cache_root, verbose=True)
|
32
33
|
audeer.extract_archive(archive_path, model_root)
|
33
|
-
|
34
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
35
|
+
device = self.util.config_val("MODEL", "device", cuda)
|
34
36
|
self.model = audonnx.load(model_root, device=device)
|
35
37
|
# pytorch_total_params = sum(p.numel() for p in self.model.parameters())
|
36
38
|
# self.util.debug(
|
@@ -28,12 +28,17 @@ from nkululeko.utils.util import Util
|
|
28
28
|
|
29
29
|
|
30
30
|
class SquimSet(Featureset):
|
31
|
-
"""Class to predict SQUIM features"""
|
31
|
+
"""Class to predict SQUIM features."""
|
32
32
|
|
33
33
|
def __init__(self, name, data_df, feats_type):
|
34
|
-
"""Constructor.
|
34
|
+
"""Constructor.
|
35
|
+
|
36
|
+
Is_train is needed to distinguish from test/dev sets,
|
37
|
+
because they use the codebook from the training.
|
38
|
+
"""
|
35
39
|
super().__init__(name, data_df, feats_type)
|
36
|
-
|
40
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
41
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
37
42
|
self.model_initialized = False
|
38
43
|
|
39
44
|
def init_model(self):
|
@@ -21,7 +21,11 @@ class Wav2vec2(Featureset):
|
|
21
21
|
"""Class to extract wav2vec2 embeddings"""
|
22
22
|
|
23
23
|
def __init__(self, name, data_df, feat_type):
|
24
|
-
"""Constructor.
|
24
|
+
"""Constructor.
|
25
|
+
|
26
|
+
If_train is needed to distinguish from test/dev sets,
|
27
|
+
because they use the codebook from the training
|
28
|
+
"""
|
25
29
|
super().__init__(name, data_df, feat_type)
|
26
30
|
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
27
31
|
self.device = self.util.config_val("MODEL", "device", cuda)
|
@@ -39,8 +43,7 @@ class Wav2vec2(Featureset):
|
|
39
43
|
)
|
40
44
|
config = transformers.AutoConfig.from_pretrained(model_path)
|
41
45
|
layer_num = config.num_hidden_layers
|
42
|
-
hidden_layer = int(self.util.config_val(
|
43
|
-
"FEATS", "wav2vec2.layer", "0"))
|
46
|
+
hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
|
44
47
|
config.num_hidden_layers = layer_num - hidden_layer
|
45
48
|
self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
|
46
49
|
self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
|
@@ -55,8 +58,7 @@ class Wav2vec2(Featureset):
|
|
55
58
|
"""Extract the features or load them from disk if present."""
|
56
59
|
store = self.util.get_path("store")
|
57
60
|
storage = f"{store}{self.name}.pkl"
|
58
|
-
extract = self.util.config_val(
|
59
|
-
"FEATS", "needs_feature_extraction", False)
|
61
|
+
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
|
60
62
|
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
61
63
|
if extract or no_reuse or not os.path.isfile(storage):
|
62
64
|
if not self.model_initialized:
|
@@ -77,8 +79,7 @@ class Wav2vec2(Featureset):
|
|
77
79
|
emb = self.get_embeddings(signal, sampling_rate, file)
|
78
80
|
emb_series[idx] = emb
|
79
81
|
# print(f"emb_series shape: {emb_series.shape}")
|
80
|
-
self.df = pd.DataFrame(
|
81
|
-
emb_series.values.tolist(), index=self.data_df.index)
|
82
|
+
self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
|
82
83
|
# print(f"df shape: {self.df.shape}")
|
83
84
|
self.df.to_pickle(storage)
|
84
85
|
try:
|
@@ -32,19 +32,22 @@ class Whisper(Featureset):
|
|
32
32
|
model_name = f"openai/{self.feat_type}"
|
33
33
|
self.model = WhisperModel.from_pretrained(model_name).to(self.device)
|
34
34
|
print(f"intialized Whisper model on {self.device}")
|
35
|
-
self.feature_extractor = AutoFeatureExtractor.from_pretrained(
|
35
|
+
self.feature_extractor = AutoFeatureExtractor.from_pretrained(
|
36
|
+
model_name)
|
36
37
|
self.model_initialized = True
|
37
38
|
|
38
39
|
def extract(self):
|
39
40
|
"""Extract the features or load them from disk if present."""
|
40
41
|
store = self.util.get_path("store")
|
41
42
|
storage = f"{store}{self.name}.pkl"
|
42
|
-
extract = self.util.config_val(
|
43
|
+
extract = self.util.config_val(
|
44
|
+
"FEATS", "needs_feature_extraction", False)
|
43
45
|
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
|
44
46
|
if extract or no_reuse or not os.path.isfile(storage):
|
45
47
|
if not self.model_initialized:
|
46
48
|
self.init_model()
|
47
|
-
self.util.debug(
|
49
|
+
self.util.debug(
|
50
|
+
"extracting whisper embeddings, this might take a while...")
|
48
51
|
emb_series = []
|
49
52
|
for (file, start, end), _ in audeer.progress_bar(
|
50
53
|
self.data_df.iterrows(),
|
nkululeko/models/model_cnn.py
CHANGED
@@ -16,6 +16,7 @@ import numpy as np
|
|
16
16
|
from sklearn.metrics import recall_score
|
17
17
|
from collections import OrderedDict
|
18
18
|
from PIL import Image
|
19
|
+
from traitlets import default
|
19
20
|
|
20
21
|
from nkululeko.utils.util import Util
|
21
22
|
import nkululeko.glob_conf as glob_conf
|
@@ -48,6 +49,7 @@ class CNN_model(Model):
|
|
48
49
|
self.util.error(f"unknown loss function: {criterion}")
|
49
50
|
self.util.debug(f"using model with cross entropy loss function")
|
50
51
|
# set up the model
|
52
|
+
# cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
51
53
|
self.device = self.util.config_val("MODEL", "device", "cpu")
|
52
54
|
try:
|
53
55
|
layers_string = glob_conf.config["MODEL"]["layers"]
|
@@ -84,7 +86,8 @@ class CNN_model(Model):
|
|
84
86
|
train_set = self.Dataset_image(
|
85
87
|
feats_train, df_train, self.target, transformations
|
86
88
|
)
|
87
|
-
test_set = self.Dataset_image(
|
89
|
+
test_set = self.Dataset_image(
|
90
|
+
feats_test, df_test, self.target, transformations)
|
88
91
|
# Define data loaders
|
89
92
|
self.trainloader = torch.utils.data.DataLoader(
|
90
93
|
train_set,
|
@@ -137,7 +140,8 @@ class CNN_model(Model):
|
|
137
140
|
losses = []
|
138
141
|
for images, labels in self.trainloader:
|
139
142
|
logits = self.model(images.to(self.device))
|
140
|
-
loss = self.criterion(logits, labels.to(
|
143
|
+
loss = self.criterion(logits, labels.to(
|
144
|
+
self.device, dtype=torch.int64))
|
141
145
|
losses.append(loss.item())
|
142
146
|
self.optimizer.zero_grad()
|
143
147
|
loss.backward()
|
@@ -165,14 +169,16 @@ class CNN_model(Model):
|
|
165
169
|
|
166
170
|
self.loss_eval = (np.asarray(losses)).mean()
|
167
171
|
predictions = logits.argmax(dim=1)
|
168
|
-
uar = recall_score(
|
172
|
+
uar = recall_score(
|
173
|
+
targets.numpy(), predictions.numpy(), average="macro")
|
169
174
|
return uar, targets, predictions
|
170
175
|
|
171
176
|
def predict(self):
|
172
177
|
_, truths, predictions = self.evaluate_model(
|
173
178
|
self.model, self.testloader, self.device
|
174
179
|
)
|
175
|
-
uar, _, _ = self.evaluate_model(
|
180
|
+
uar, _, _ = self.evaluate_model(
|
181
|
+
self.model, self.trainloader, self.device)
|
176
182
|
report = Reporter(truths, predictions, self.run, self.epoch)
|
177
183
|
try:
|
178
184
|
report.result.loss = self.loss
|
@@ -209,7 +215,8 @@ class CNN_model(Model):
|
|
209
215
|
dir = self.util.get_path("model_dir")
|
210
216
|
# name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
|
211
217
|
name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
|
212
|
-
|
218
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
219
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
213
220
|
layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
|
214
221
|
self.store_path = dir + name
|
215
222
|
drop = self.util.config_val("MODEL", "drop", False)
|
@@ -222,7 +229,8 @@ class CNN_model(Model):
|
|
222
229
|
def load_path(self, path, run, epoch):
|
223
230
|
self.set_id(run, epoch)
|
224
231
|
with open(path, "rb") as handle:
|
225
|
-
|
232
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
233
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
226
234
|
layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
|
227
235
|
self.store_path = path
|
228
236
|
drop = self.util.config_val("MODEL", "drop", False)
|
nkululeko/models/model_mlp.py
CHANGED
@@ -34,8 +34,9 @@ class MLP_model(Model):
|
|
34
34
|
else:
|
35
35
|
self.util.error(f"unknown loss function: {criterion}")
|
36
36
|
self.util.debug(f"using model with cross entropy loss function")
|
37
|
-
# set up the model
|
38
|
-
|
37
|
+
# set up the model, use GPU if availabe
|
38
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
39
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
39
40
|
try:
|
40
41
|
layers_string = glob_conf.config["MODEL"]["layers"]
|
41
42
|
except KeyError as ke:
|
@@ -86,7 +87,8 @@ class MLP_model(Model):
|
|
86
87
|
losses = []
|
87
88
|
for features, labels in self.trainloader:
|
88
89
|
logits = self.model(features.to(self.device))
|
89
|
-
loss = self.criterion(logits, labels.to(
|
90
|
+
loss = self.criterion(logits, labels.to(
|
91
|
+
self.device, dtype=torch.int64))
|
90
92
|
losses.append(loss.item())
|
91
93
|
self.optimizer.zero_grad()
|
92
94
|
loss.backward()
|
@@ -114,14 +116,16 @@ class MLP_model(Model):
|
|
114
116
|
|
115
117
|
self.loss_eval = (np.asarray(losses)).mean()
|
116
118
|
predictions = logits.argmax(dim=1)
|
117
|
-
uar = recall_score(
|
119
|
+
uar = recall_score(
|
120
|
+
targets.numpy(), predictions.numpy(), average="macro")
|
118
121
|
return uar, targets, predictions
|
119
122
|
|
120
123
|
def predict(self):
|
121
124
|
_, truths, predictions = self.evaluate_model(
|
122
125
|
self.model, self.testloader, self.device
|
123
126
|
)
|
124
|
-
uar, _, _ = self.evaluate_model(
|
127
|
+
uar, _, _ = self.evaluate_model(
|
128
|
+
self.model, self.trainloader, self.device)
|
125
129
|
report = Reporter(truths, predictions, self.run, self.epoch)
|
126
130
|
try:
|
127
131
|
report.result.loss = self.loss
|
@@ -179,6 +183,9 @@ class MLP_model(Model):
|
|
179
183
|
features = np.reshape(features, (-1, 1)).T
|
180
184
|
logits = self.model(features.to(self.device))
|
181
185
|
# logits = self.model(features)
|
186
|
+
# if tensor conver to cpu
|
187
|
+
if isinstance(logits, torch.Tensor):
|
188
|
+
logits = logits.cpu()
|
182
189
|
a = logits.numpy()
|
183
190
|
res = {}
|
184
191
|
for i in range(len(a[0])):
|
@@ -196,7 +203,8 @@ class MLP_model(Model):
|
|
196
203
|
dir = self.util.get_path("model_dir")
|
197
204
|
# name = f'{self.util.get_exp_name()}_{run}_{epoch:03d}.model'
|
198
205
|
name = f"{self.util.get_exp_name(only_train=True)}_{self.run}_{self.epoch:03d}.model"
|
199
|
-
|
206
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
207
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
200
208
|
layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
|
201
209
|
self.store_path = dir + name
|
202
210
|
drop = self.util.config_val("MODEL", "drop", False)
|
@@ -211,7 +219,8 @@ class MLP_model(Model):
|
|
211
219
|
def load_path(self, path, run, epoch):
|
212
220
|
self.set_id(run, epoch)
|
213
221
|
with open(path, "rb") as handle:
|
214
|
-
|
222
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
223
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
215
224
|
layers = ast.literal_eval(glob_conf.config["MODEL"]["layers"])
|
216
225
|
self.store_path = path
|
217
226
|
drop = self.util.config_val("MODEL", "drop", False)
|
@@ -9,6 +9,7 @@ import torch
|
|
9
9
|
from audmetric import concordance_cc
|
10
10
|
from audmetric import mean_absolute_error
|
11
11
|
from audmetric import mean_squared_error
|
12
|
+
from traitlets import default
|
12
13
|
|
13
14
|
import nkululeko.glob_conf as glob_conf
|
14
15
|
from nkululeko.losses.loss_ccc import ConcordanceCorCoeff
|
@@ -40,7 +41,8 @@ class MLP_Reg_model(Model):
|
|
40
41
|
self.util.error(f"unknown loss function: {criterion}")
|
41
42
|
self.util.debug(f"training model with {criterion} loss function")
|
42
43
|
# set up the model
|
43
|
-
|
44
|
+
cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
45
|
+
self.device = self.util.config_val("MODEL", "device", cuda)
|
44
46
|
layers_string = glob_conf.config["MODEL"]["layers"]
|
45
47
|
self.util.debug(f"using layers {layers_string}")
|
46
48
|
try:
|
@@ -50,7 +52,8 @@ class MLP_Reg_model(Model):
|
|
50
52
|
drop = self.util.config_val("MODEL", "drop", False)
|
51
53
|
if drop:
|
52
54
|
self.util.debug(f"training with dropout: {drop}")
|
53
|
-
self.model = self.MLP(
|
55
|
+
self.model = self.MLP(
|
56
|
+
feats_train.shape[1], layers, 1, drop).to(self.device)
|
54
57
|
self.learning_rate = float(
|
55
58
|
self.util.config_val("MODEL", "learning_rate", 0.0001)
|
56
59
|
)
|
@@ -93,8 +96,10 @@ class MLP_Reg_model(Model):
|
|
93
96
|
_, truths, predictions = self.evaluate_model(
|
94
97
|
self.model, self.testloader, self.device
|
95
98
|
)
|
96
|
-
result, _, _ = self.evaluate_model(
|
97
|
-
|
99
|
+
result, _, _ = self.evaluate_model(
|
100
|
+
self.model, self.trainloader, self.device)
|
101
|
+
report = Reporter(truths.numpy(), predictions.numpy(),
|
102
|
+
self.run, self.epoch)
|
98
103
|
try:
|
99
104
|
report.result.loss = self.loss
|
100
105
|
except AttributeError: # if the model was loaded from disk the loss is unknown
|
@@ -128,9 +133,11 @@ class MLP_Reg_model(Model):
|
|
128
133
|
|
129
134
|
def __getitem__(self, item):
|
130
135
|
index = self.df.index[item]
|
131
|
-
features = self.df_features.loc[index, :].values.astype(
|
136
|
+
features = self.df_features.loc[index, :].values.astype(
|
137
|
+
"float32").squeeze()
|
132
138
|
labels = (
|
133
|
-
np.array([self.df.loc[index, self.label]]
|
139
|
+
np.array([self.df.loc[index, self.label]]
|
140
|
+
).astype("float32").squeeze()
|
134
141
|
)
|
135
142
|
return features, labels
|
136
143
|
|
@@ -187,7 +194,8 @@ class MLP_Reg_model(Model):
|
|
187
194
|
end_index = (index + 1) * loader.batch_size
|
188
195
|
if end_index > len(loader.dataset):
|
189
196
|
end_index = len(loader.dataset)
|
190
|
-
logits[start_index:end_index] = model(
|
197
|
+
logits[start_index:end_index] = model(
|
198
|
+
features.to(device)).reshape(-1)
|
191
199
|
targets[start_index:end_index] = labels
|
192
200
|
loss = self.criterion(
|
193
201
|
logits[start_index:end_index].to(
|
nkululeko/plots.py
CHANGED
@@ -28,7 +28,8 @@ class Plots:
|
|
28
28
|
df_speaker["samplenum"] = df_speaker.shape[0]
|
29
29
|
df_speakers = pd.concat([df_speakers, df_speaker.head(1)])
|
30
30
|
# plot the distribution of samples per speaker
|
31
|
-
|
31
|
+
# one up because of the runs
|
32
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
32
33
|
self.util.debug(f"plotting samples per speaker")
|
33
34
|
if "gender" in df_speakers:
|
34
35
|
filename = f"samples_value_counts"
|
@@ -137,7 +138,8 @@ class Plots:
|
|
137
138
|
df, att1, class_label, att1, type_s
|
138
139
|
)
|
139
140
|
else:
|
140
|
-
ax, caption = self._plot2cont(
|
141
|
+
ax, caption = self._plot2cont(
|
142
|
+
df, class_label, att1, type_s)
|
141
143
|
self._save_plot(
|
142
144
|
ax,
|
143
145
|
caption,
|
@@ -150,7 +152,8 @@ class Plots:
|
|
150
152
|
att1 = att[0]
|
151
153
|
att2 = att[1]
|
152
154
|
if att1 == self.target or att2 == self.target:
|
153
|
-
self.util.debug(
|
155
|
+
self.util.debug(
|
156
|
+
f"no need to correlate {self.target} with itself")
|
154
157
|
return
|
155
158
|
if att1 not in df:
|
156
159
|
self.util.error(f"unknown feature: {att1}")
|
@@ -165,7 +168,8 @@ class Plots:
|
|
165
168
|
if self.util.is_categorical(df[att1]):
|
166
169
|
if self.util.is_categorical(df[att2]):
|
167
170
|
# class_label = cat, att1 = cat, att2 = cat
|
168
|
-
ax, caption = self._plot2cat(
|
171
|
+
ax, caption = self._plot2cat(
|
172
|
+
df, att1, att2, att1, type_s)
|
169
173
|
else:
|
170
174
|
# class_label = cat, att1 = cat, att2 = cont
|
171
175
|
ax, caption = self._plotcatcont(
|
@@ -186,7 +190,8 @@ class Plots:
|
|
186
190
|
if self.util.is_categorical(df[att1]):
|
187
191
|
if self.util.is_categorical(df[att2]):
|
188
192
|
# class_label = cont, att1 = cat, att2 = cat
|
189
|
-
ax, caption = self._plot2cat(
|
193
|
+
ax, caption = self._plot2cat(
|
194
|
+
df, att1, att2, att1, type_s)
|
190
195
|
else:
|
191
196
|
# class_label = cont, att1 = cat, att2 = cont
|
192
197
|
ax, caption = self._plot2cont_cat(
|
@@ -200,7 +205,8 @@ class Plots:
|
|
200
205
|
)
|
201
206
|
else:
|
202
207
|
# class_label = cont, att1 = cont, att2 = cont
|
203
|
-
ax, caption = self._plot2cont(
|
208
|
+
ax, caption = self._plot2cont(
|
209
|
+
df, att1, att2, type_s)
|
204
210
|
|
205
211
|
self._save_plot(
|
206
212
|
ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
|
@@ -213,7 +219,8 @@ class Plots:
|
|
213
219
|
)
|
214
220
|
|
215
221
|
def _save_plot(self, ax, caption, header, filename, type_s):
|
216
|
-
|
222
|
+
# one up because of the runs
|
223
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
217
224
|
fig = ax.figure
|
218
225
|
# avoid warning
|
219
226
|
# plt.tight_layout()
|
@@ -231,7 +238,8 @@ class Plots:
|
|
231
238
|
)
|
232
239
|
|
233
240
|
def _check_binning(self, att, df):
|
234
|
-
bin_reals_att = eval(self.util.config_val(
|
241
|
+
bin_reals_att = eval(self.util.config_val(
|
242
|
+
"EXPL", f"{att}.bin_reals", "False"))
|
235
243
|
if bin_reals_att:
|
236
244
|
self.util.debug(f"binning continuous variable {att} to categories")
|
237
245
|
att_new = f"{att}_binned"
|
@@ -305,7 +313,8 @@ class Plots:
|
|
305
313
|
return ax, caption
|
306
314
|
|
307
315
|
def plot_durations(self, df, filename, sample_selection, caption=""):
|
308
|
-
|
316
|
+
# one up because of the runs
|
317
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
309
318
|
try:
|
310
319
|
ax = sns.histplot(df, x="duration", hue="class_label", kde=True)
|
311
320
|
except AttributeError as ae:
|
@@ -333,7 +342,8 @@ class Plots:
|
|
333
342
|
|
334
343
|
def describe_df(self, name, df, target, filename):
|
335
344
|
"""Make a stacked barplot of samples and speakers per sex and target values. speaker, gender and target columns must be present"""
|
336
|
-
fig_dir = self.util.get_path(
|
345
|
+
fig_dir = self.util.get_path(
|
346
|
+
"fig_dir") + "../" # one up because of the runs
|
337
347
|
sampl_num = df.shape[0]
|
338
348
|
sex_col = "gender"
|
339
349
|
if target == "gender":
|
@@ -380,8 +390,10 @@ class Plots:
|
|
380
390
|
|
381
391
|
def scatter_plot(self, feats, label_df, label, dimred_type):
|
382
392
|
dim_num = int(self.util.config_val("EXPL", "scatter.dim", 2))
|
383
|
-
|
384
|
-
|
393
|
+
# one up because of the runs
|
394
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
395
|
+
sample_selection = self.util.config_val(
|
396
|
+
"EXPL", "sample_selection", "all")
|
385
397
|
filename = f"{label}_{self.util.get_feattype_name()}_{sample_selection}_{dimred_type}_{str(dim_num)}d"
|
386
398
|
filename = f"{fig_dir}{filename}.{self.format}"
|
387
399
|
self.util.debug(f"computing {dimred_type}, this might take a while...")
|
@@ -423,7 +435,8 @@ class Plots:
|
|
423
435
|
|
424
436
|
if dim_num == 2:
|
425
437
|
plot_data = np.vstack((data.T, labels)).T
|
426
|
-
plot_df = pd.DataFrame(
|
438
|
+
plot_df = pd.DataFrame(
|
439
|
+
data=plot_data, columns=("Dim_1", "Dim_2", "label"))
|
427
440
|
# plt.tight_layout()
|
428
441
|
ax = (
|
429
442
|
sns.FacetGrid(plot_df, hue="label", height=6)
|
@@ -515,7 +528,8 @@ class Plots:
|
|
515
528
|
def plot_feature(self, title, feature, label, df_labels, df_features):
|
516
529
|
# remove fullstops in the name
|
517
530
|
feature_name = feature.replace(".", "-")
|
518
|
-
|
531
|
+
# one up because of the runs
|
532
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
519
533
|
filename = f"{fig_dir}feat_dist_{title}_{feature_name}.{self.format}"
|
520
534
|
if self.util.is_categorical(df_labels[label]):
|
521
535
|
df_plot = pd.DataFrame(
|
@@ -554,7 +568,8 @@ class Plots:
|
|
554
568
|
tree.plot_tree(model, feature_names=list(features.columns), ax=ax)
|
555
569
|
# plt.tight_layout()
|
556
570
|
# print(ax)
|
557
|
-
|
571
|
+
# one up because of the runs
|
572
|
+
fig_dir = self.util.get_path("fig_dir") + "../"
|
558
573
|
exp_name = self.util.get_exp_name(only_data=True)
|
559
574
|
format = self.util.config_val("PLOT", "format", "png")
|
560
575
|
filename = f"{fig_dir}{exp_name}EXPL_tree-plot.{format}"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.83.
|
3
|
+
Version: 0.83.2
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -333,6 +333,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
333
333
|
Changelog
|
334
334
|
=========
|
335
335
|
|
336
|
+
Version 0.83.2
|
337
|
+
--------------
|
338
|
+
* added default cuda if present and not stated
|
339
|
+
|
336
340
|
Version 0.83.1
|
337
341
|
--------------
|
338
342
|
* add test module to nkuluflag
|
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
3
|
nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=VE94aCLZ8N-hTKIgb4OLo1s9l_Fxncl9iTNis0eotFw,39
|
6
6
|
nkululeko/demo.py,sha256=55kNFA2helMhOxD4yZuKg1JWDtlUUpxm-6uAnroIydI,3264
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=-ggSHc3DXxRzjzcGB4qFBOMvKsfUdTkkde50BDrS9dA,4755
|
9
|
-
nkululeko/experiment.py,sha256=
|
9
|
+
nkululeko/experiment.py,sha256=WyLiOJ_VxlaXoS1cwXruzYV9OESMjjedcFNreKE1Z8I,29728
|
10
10
|
nkululeko/explore.py,sha256=2wdoGRqldvsN1zCiWk0quSDgHHHUoF2UZOWQ1r-2OLM,2310
|
11
11
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
12
12
|
nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
|
@@ -17,7 +17,7 @@ nkululeko/modelrunner.py,sha256=GwDXcE2gDQXat4W0-HhHQ1BcUNCRBXMBQ4QycfHp_5c,9288
|
|
17
17
|
nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
|
18
18
|
nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
|
19
19
|
nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
|
20
|
-
nkululeko/plots.py,sha256=
|
20
|
+
nkululeko/plots.py,sha256=nd9tF_61DyAx7oGZF8gTrHXazkgFjFe4eClxu1nQ_XU,23276
|
21
21
|
nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
|
22
22
|
nkululeko/resample.py,sha256=3WbxkwgyTe_fW38046Rjxk3knOkFdhqn2C4nfhbUurQ,2287
|
23
23
|
nkululeko/runmanager.py,sha256=eTM1DNQKt1lxYhzt4vZyZluPXW9sWlIJHNQzex4lkJU,7624
|
@@ -48,7 +48,7 @@ nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,276
|
|
48
48
|
nkululeko/data/dataset_csv.py,sha256=uLa7jW4w2ft299NkpXZMD361kPHF8oSYoIZ_ucxhuOM,3884
|
49
49
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
50
|
nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
|
51
|
-
nkululeko/feat_extract/feats_agender_agender.py,sha256=
|
51
|
+
nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
|
52
52
|
nkululeko/feat_extract/feats_analyser.py,sha256=_5oz4y-NZCEBgfNP2GZ9WNqQR50Hbykm0TvDVomWP0U,11399
|
53
53
|
nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
|
54
54
|
nkululeko/feat_extract/feats_audmodel.py,sha256=VjBNgAoxsHJhwr6Kwt9CxX6SaCM4RK_OV-GU2W5-bhU,3187
|
@@ -63,11 +63,11 @@ nkululeko/feat_extract/feats_praat.py,sha256=kZrS6srzH7WoWEd2prp1Dxw6g9JklFQGTNq
|
|
63
63
|
nkululeko/feat_extract/feats_snr.py,sha256=9dqZ-4RpK98iJEssM3ttozNd18LWlZYM_QVXvp5xDcs,2829
|
64
64
|
nkululeko/feat_extract/feats_spectra.py,sha256=5Pex8awIQC3cjQRHSu4NQFmg4quamG0RL3V3Yd0pJHs,3670
|
65
65
|
nkululeko/feat_extract/feats_spkrec.py,sha256=VK4ma3uWzM0YZStsgRTirfkbzjWIfRWSgsYI038QlRY,4803
|
66
|
-
nkululeko/feat_extract/feats_squim.py,sha256=
|
66
|
+
nkululeko/feat_extract/feats_squim.py,sha256=Y31YmDmscuG0YozvxyBZIutO3id8t7IZJWCfKucw-6M,4617
|
67
67
|
nkululeko/feat_extract/feats_trill.py,sha256=HXQBaPWTX0iNEjBY7RD8uyFeYjDieHqv8ZilE0Jb-Pg,3319
|
68
|
-
nkululeko/feat_extract/feats_wav2vec2.py,sha256=
|
68
|
+
nkululeko/feat_extract/feats_wav2vec2.py,sha256=9WUMfyddB_3nx79g7mZoQrRynhM1uEBWuOotRq8bxoU,5268
|
69
69
|
nkululeko/feat_extract/feats_wavlm.py,sha256=ulxpGjifUFx2ZgGmY32SmBJGIuvkYHoLb2n1LZ8KMwA,4703
|
70
|
-
nkululeko/feat_extract/feats_whisper.py,sha256=
|
70
|
+
nkululeko/feat_extract/feats_whisper.py,sha256=BFspQBI53HAgw22vBEeFskGwFZA-94Rpl17xM458HRo,4576
|
71
71
|
nkululeko/feat_extract/featureset.py,sha256=HtgW2389rmlRAgFP3F1sSFzq2_iUVr2NhOfIXG9omt0,1448
|
72
72
|
nkululeko/feat_extract/feinberg_praat.py,sha256=EP9pMALjlKdiYInLQdrZ7MmE499Mq-ISRCgqbqL3Rxc,21304
|
73
73
|
nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -76,13 +76,13 @@ nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv5
|
|
76
76
|
nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
77
77
|
nkululeko/models/model.py,sha256=oAdKq2wY5lYKfpZkQwO46ojYRsj_Z-FR56oR1uHAWI0,11569
|
78
78
|
nkululeko/models/model_bayes.py,sha256=wI7-sCwibqXMCHviu349TYjgJXXNXym-Z6ZM83uxlFQ,378
|
79
|
-
nkululeko/models/model_cnn.py,sha256=
|
79
|
+
nkululeko/models/model_cnn.py,sha256=revCxyeX69DU6OA63YTnF28UaAFV7AmUfqODMCE_pbQ,10002
|
80
80
|
nkululeko/models/model_gmm.py,sha256=onovzGBeguwZ-upXtuDLaBw9sd6fDDQslVBOrz1Z8TE,645
|
81
81
|
nkululeko/models/model_knn.py,sha256=5tGqiPo2JTw9VLmD-MXNZKFJ5RTLA6uv_blJDJ9lScA,573
|
82
82
|
nkululeko/models/model_knn_reg.py,sha256=Fbuk6Ku6eyrbbMEk7rB5dwfhvQOMsdZk6HI_0T0gYPw,580
|
83
83
|
nkululeko/models/model_lin_reg.py,sha256=NBTnY2ULuhUBt5ArYQwskZ2Vq4BBDGkqd9SYBFl7Ql4,392
|
84
|
-
nkululeko/models/model_mlp.py,sha256=
|
85
|
-
nkululeko/models/model_mlp_regression.py,sha256
|
84
|
+
nkululeko/models/model_mlp.py,sha256=IuNGrLPx54-ZmpydH2yJdm2ddCm4rgu59Csv5ikbEpI,9471
|
85
|
+
nkululeko/models/model_mlp_regression.py,sha256=-ailThquUXwLkOj5jlJ4qn1vlb3nSHW5s0KS7GLp4qI,10290
|
86
86
|
nkululeko/models/model_svm.py,sha256=QqwRjfG9I5y-57CcJAMUSbvYzV0DOlDcpDK5f4yQ_qw,914
|
87
87
|
nkululeko/models/model_svr.py,sha256=p-Mb4Bn54yOe1upuHQKNpfj4ttOmQnm9pCB7ECkJkJQ,699
|
88
88
|
nkululeko/models/model_tree.py,sha256=soXjV523eRvRZ-jbX7X_3S73Wto1B9bm7ZzzDmgYzTc,390
|
@@ -103,8 +103,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
103
103
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
104
104
|
nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
|
105
105
|
nkululeko/utils/util.py,sha256=_Z6OMJ3f-8TdETW9eqJYY5hwNRS5XCt9azzRnqoTTZE,12330
|
106
|
-
nkululeko-0.83.
|
107
|
-
nkululeko-0.83.
|
108
|
-
nkululeko-0.83.
|
109
|
-
nkululeko-0.83.
|
110
|
-
nkululeko-0.83.
|
106
|
+
nkululeko-0.83.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
107
|
+
nkululeko-0.83.2.dist-info/METADATA,sha256=DMkXO8jSm6iR4eETrG2aEK__7MfPhpAvOe6Tf99n_HE,36158
|
108
|
+
nkululeko-0.83.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
109
|
+
nkululeko-0.83.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
110
|
+
nkululeko-0.83.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|