nkululeko 0.86.0__py3-none-any.whl → 0.86.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +3 -2
- nkululeko/models/model_mlp.py +2 -2
- nkululeko/models/model_tuned.py +79 -16
- nkululeko/plots.py +19 -26
- nkululeko/reporting/reporter.py +17 -0
- nkululeko/resample.py +32 -6
- nkululeko/utils/util.py +6 -0
- {nkululeko-0.86.0.dist-info → nkululeko-0.86.2.dist-info}/METADATA +11 -1
- {nkululeko-0.86.0.dist-info → nkululeko-0.86.2.dist-info}/RECORD +13 -13
- {nkululeko-0.86.0.dist-info → nkululeko-0.86.2.dist-info}/LICENSE +0 -0
- {nkululeko-0.86.0.dist-info → nkululeko-0.86.2.dist-info}/WHEEL +0 -0
- {nkululeko-0.86.0.dist-info → nkululeko-0.86.2.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.86.
|
1
|
+
VERSION="0.86.2"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/experiment.py
CHANGED
@@ -72,8 +72,9 @@ class Experiment:
|
|
72
72
|
if self.util.config_val("REPORT", "latex", False):
|
73
73
|
self.report.export_latex()
|
74
74
|
|
75
|
-
|
76
|
-
|
75
|
+
# moved to util
|
76
|
+
# def get_name(self):
|
77
|
+
# return self.util.get_exp_name()
|
77
78
|
|
78
79
|
def set_globals(self, config_obj):
|
79
80
|
"""install a config object in the global space"""
|
nkululeko/models/model_mlp.py
CHANGED
@@ -14,12 +14,12 @@ from nkululeko.losses.loss_softf1loss import SoftF1Loss
|
|
14
14
|
|
15
15
|
|
16
16
|
class MLP_model(Model):
|
17
|
-
"""MLP = multi layer perceptron"""
|
17
|
+
"""MLP = multi layer perceptron."""
|
18
18
|
|
19
19
|
is_classifier = True
|
20
20
|
|
21
21
|
def __init__(self, df_train, df_test, feats_train, feats_test):
|
22
|
-
"""Constructor taking the configuration and all dataframes"""
|
22
|
+
"""Constructor taking the configuration and all dataframes."""
|
23
23
|
super().__init__(df_train, df_test, feats_train, feats_test)
|
24
24
|
super().set_model_type("ann")
|
25
25
|
self.name = "mlp"
|
nkululeko/models/model_tuned.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Code based on @jwagner."""
|
2
2
|
|
3
|
+
import ast
|
3
4
|
import dataclasses
|
4
5
|
import json
|
5
6
|
import os
|
@@ -51,8 +52,9 @@ class TunedModel(BaseModel):
|
|
51
52
|
self.batch_size = int(self.util.config_val("MODEL", "batch_size", "8"))
|
52
53
|
self.util.debug(f"batch size: {self.batch_size}")
|
53
54
|
self.learning_rate = float(
|
54
|
-
self.util.config_val("MODEL", "learning_rate", 0.0001)
|
55
|
+
self.util.config_val("MODEL", "learning_rate", "0.0001")
|
55
56
|
)
|
57
|
+
self.max_duration = float(self.util.config_val("MODEL", "max_duration", "8.0"))
|
56
58
|
self.df_train, self.df_test = df_train, df_test
|
57
59
|
self.epoch_num = int(self.util.config_val("EXP", "epochs", 1))
|
58
60
|
drop = self.util.config_val("MODEL", "drop", False)
|
@@ -67,7 +69,7 @@ class TunedModel(BaseModel):
|
|
67
69
|
pretrained_model = self.util.config_val("MODEL", "pretrained_model", model_path)
|
68
70
|
self.num_layers = None
|
69
71
|
self.sampling_rate = 16000
|
70
|
-
self.max_duration_sec =
|
72
|
+
self.max_duration_sec = self.max_duration
|
71
73
|
self.accumulation_steps = 4
|
72
74
|
|
73
75
|
# print finetuning information via debug
|
@@ -95,6 +97,7 @@ class TunedModel(BaseModel):
|
|
95
97
|
|
96
98
|
# load pre-trained model
|
97
99
|
if self.is_classifier:
|
100
|
+
self.util.debug(f"Task is classification.")
|
98
101
|
le = glob_conf.label_encoder
|
99
102
|
mapping = dict(zip(le.classes_, range(len(le.classes_))))
|
100
103
|
target_mapping = {k: int(v) for k, v in mapping.items()}
|
@@ -102,15 +105,16 @@ class TunedModel(BaseModel):
|
|
102
105
|
value: key for key, value in target_mapping.items()
|
103
106
|
}
|
104
107
|
self.config = transformers.AutoConfig.from_pretrained(
|
105
|
-
|
108
|
+
pretrained_model,
|
106
109
|
num_labels=len(target_mapping),
|
107
110
|
label2id=target_mapping,
|
108
111
|
id2label=target_mapping_reverse,
|
109
112
|
finetuning_task=target_name,
|
110
113
|
)
|
111
114
|
else:
|
115
|
+
self.util.debug(f"Task is regression.")
|
112
116
|
self.config = transformers.AutoConfig.from_pretrained(
|
113
|
-
|
117
|
+
pretrained_model,
|
114
118
|
num_labels=1,
|
115
119
|
finetuning_task=target_name,
|
116
120
|
)
|
@@ -232,8 +236,8 @@ class TunedModel(BaseModel):
|
|
232
236
|
def train(self):
|
233
237
|
"""Train the model."""
|
234
238
|
model_root = self.util.get_path("model_dir")
|
235
|
-
log_root = os.path.join(self.util.get_exp_dir(), "log")
|
236
|
-
audeer.mkdir(log_root)
|
239
|
+
self.log_root = os.path.join(self.util.get_exp_dir(), "log")
|
240
|
+
audeer.mkdir(self.log_root)
|
237
241
|
self.torch_root = audeer.path(model_root, "torch")
|
238
242
|
conf_file = os.path.join(self.torch_root, "config.json")
|
239
243
|
if os.path.isfile(conf_file):
|
@@ -241,17 +245,35 @@ class TunedModel(BaseModel):
|
|
241
245
|
self.load(self.run, self.epoch_num)
|
242
246
|
return
|
243
247
|
targets = pd.DataFrame(self.dataset["train"]["targets"])
|
244
|
-
counts = targets[0].value_counts().sort_index()
|
245
248
|
|
246
249
|
if self.is_classifier:
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
250
|
+
criterion = self.util.config_val("MODEL", "loss", "cross")
|
251
|
+
if criterion == "cross":
|
252
|
+
if self.util.config_val("MODEL", "class_weight", False):
|
253
|
+
counts = targets[0].value_counts().sort_index()
|
254
|
+
train_weights = 1 / counts
|
255
|
+
train_weights /= train_weights.sum()
|
256
|
+
self.util.debug(f"train weights: {train_weights}")
|
257
|
+
criterion = torch.nn.CrossEntropyLoss(
|
258
|
+
weight=torch.Tensor(train_weights).to("cuda"),
|
259
|
+
)
|
260
|
+
else:
|
261
|
+
criterion = torch.nn.CrossEntropyLoss()
|
262
|
+
else:
|
263
|
+
self.util.error(f"criterion {criterion} not supported for classifier")
|
253
264
|
else:
|
254
|
-
criterion =
|
265
|
+
self.criterion = self.util.config_val("MODEL", "loss", "ccc")
|
266
|
+
if criterion == "1-ccc":
|
267
|
+
criterion = ConcordanceCorCoeff()
|
268
|
+
elif criterion == "mse":
|
269
|
+
criterion = torch.nn.MSELoss()
|
270
|
+
elif criterion == "mae":
|
271
|
+
criterion = torch.nn.L1Loss()
|
272
|
+
else:
|
273
|
+
self.util.error(f"criterion {criterion} not supported for regressor")
|
274
|
+
|
275
|
+
# set push_to_hub value, default false
|
276
|
+
push = eval(self.util.config_val("MODEL", "push_to_hub", "False"))
|
255
277
|
|
256
278
|
class Trainer(transformers.Trainer):
|
257
279
|
def compute_loss(
|
@@ -278,10 +300,20 @@ class TunedModel(BaseModel):
|
|
278
300
|
num_steps = max(1, num_steps)
|
279
301
|
|
280
302
|
metrics_for_best_model = self.measure.upper()
|
303
|
+
if metrics_for_best_model == "UAR":
|
304
|
+
greater_is_better = True
|
305
|
+
elif metrics_for_best_model == "CCC":
|
306
|
+
greater_is_better = True
|
307
|
+
elif metrics_for_best_model == "MSE":
|
308
|
+
greater_is_better = False
|
309
|
+
elif metrics_for_best_model == "MAE":
|
310
|
+
greater_is_better = False
|
311
|
+
else:
|
312
|
+
self.util.error(f"unknown metric/measure: {metrics_for_best_model}")
|
281
313
|
|
282
314
|
training_args = transformers.TrainingArguments(
|
283
315
|
output_dir=model_root,
|
284
|
-
logging_dir=log_root,
|
316
|
+
logging_dir=self.log_root,
|
285
317
|
per_device_train_batch_size=self.batch_size,
|
286
318
|
per_device_eval_batch_size=self.batch_size,
|
287
319
|
gradient_accumulation_steps=self.accumulation_steps,
|
@@ -295,10 +327,12 @@ class TunedModel(BaseModel):
|
|
295
327
|
learning_rate=self.learning_rate,
|
296
328
|
save_total_limit=2,
|
297
329
|
metric_for_best_model=metrics_for_best_model,
|
298
|
-
greater_is_better=
|
330
|
+
greater_is_better=greater_is_better,
|
299
331
|
load_best_model_at_end=True,
|
300
332
|
remove_unused_columns=False,
|
301
333
|
report_to="none",
|
334
|
+
push_to_hub=push,
|
335
|
+
hub_model_id=f"{self.util.get_name()}",
|
302
336
|
)
|
303
337
|
|
304
338
|
trainer = Trainer(
|
@@ -311,8 +345,15 @@ class TunedModel(BaseModel):
|
|
311
345
|
tokenizer=self.processor.feature_extractor,
|
312
346
|
callbacks=[transformers.integrations.TensorBoardCallback()],
|
313
347
|
)
|
348
|
+
|
314
349
|
trainer.train()
|
315
350
|
trainer.save_model(self.torch_root)
|
351
|
+
log_file = os.path.join(
|
352
|
+
self.log_root,
|
353
|
+
"log.txt",
|
354
|
+
)
|
355
|
+
with open(log_file, "w") as text_file:
|
356
|
+
print(trainer.state.log_history, file=text_file)
|
316
357
|
self.util.debug(f"saved best model to {self.torch_root}")
|
317
358
|
self.load(self.run, self.epoch)
|
318
359
|
|
@@ -343,8 +384,30 @@ class TunedModel(BaseModel):
|
|
343
384
|
self.run,
|
344
385
|
self.epoch_num,
|
345
386
|
)
|
387
|
+
self._plot_epoch_progression(report)
|
346
388
|
return report
|
347
389
|
|
390
|
+
def _plot_epoch_progression(self, report):
|
391
|
+
log_file = os.path.join(
|
392
|
+
self.log_root,
|
393
|
+
"log.txt",
|
394
|
+
)
|
395
|
+
with open(log_file, "r") as file:
|
396
|
+
data = file.read()
|
397
|
+
list = ast.literal_eval(data)
|
398
|
+
epochs, vals, loss = [], [], []
|
399
|
+
for index, tp in enumerate(list):
|
400
|
+
try:
|
401
|
+
epochs.append(tp["epoch"])
|
402
|
+
measure = self.measure.upper()
|
403
|
+
vals.append(tp[f"eval_{measure}"])
|
404
|
+
loss.append(tp["eval_loss"])
|
405
|
+
except KeyError:
|
406
|
+
del epochs[-1]
|
407
|
+
# print(f'no value at {index}')
|
408
|
+
df = pd.DataFrame({"results": vals, "losses": loss}, index=epochs)
|
409
|
+
report.plot_epoch_progression_finetuned(df)
|
410
|
+
|
348
411
|
def predict_sample(self, signal):
|
349
412
|
"""Predict one sample"""
|
350
413
|
prediction = {}
|
nkululeko/plots.py
CHANGED
@@ -1,21 +1,23 @@
|
|
1
1
|
# plots.py
|
2
|
-
import
|
2
|
+
import ast
|
3
|
+
|
3
4
|
import matplotlib.pyplot as plt
|
4
|
-
from sklearn.manifold import TSNE
|
5
|
-
import seaborn as sns
|
6
5
|
import numpy as np
|
7
|
-
import
|
6
|
+
import pandas as pd
|
8
7
|
from scipy import stats
|
9
|
-
|
10
|
-
|
8
|
+
import seaborn as sns
|
9
|
+
from sklearn.manifold import TSNE
|
10
|
+
|
11
11
|
import nkululeko.glob_conf as glob_conf
|
12
|
-
from nkululeko.reporting.report_item import ReportItem
|
13
12
|
from nkululeko.reporting.defines import Header
|
13
|
+
from nkululeko.reporting.report_item import ReportItem
|
14
|
+
import nkululeko.utils.stats as su
|
15
|
+
from nkululeko.utils.util import Util
|
14
16
|
|
15
17
|
|
16
18
|
class Plots:
|
17
19
|
def __init__(self):
|
18
|
-
"""Initializing the util system"""
|
20
|
+
"""Initializing the util system."""
|
19
21
|
self.util = Util("plots")
|
20
22
|
self.format = self.util.config_val("PLOT", "format", "png")
|
21
23
|
self.target = self.util.config_val("DATA", "target", "emotion")
|
@@ -138,8 +140,7 @@ class Plots:
|
|
138
140
|
df, att1, class_label, att1, type_s
|
139
141
|
)
|
140
142
|
else:
|
141
|
-
ax, caption = self._plot2cont(
|
142
|
-
df, class_label, att1, type_s)
|
143
|
+
ax, caption = self._plot2cont(df, class_label, att1, type_s)
|
143
144
|
self._save_plot(
|
144
145
|
ax,
|
145
146
|
caption,
|
@@ -152,8 +153,7 @@ class Plots:
|
|
152
153
|
att1 = att[0]
|
153
154
|
att2 = att[1]
|
154
155
|
if att1 == self.target or att2 == self.target:
|
155
|
-
self.util.debug(
|
156
|
-
f"no need to correlate {self.target} with itself")
|
156
|
+
self.util.debug(f"no need to correlate {self.target} with itself")
|
157
157
|
return
|
158
158
|
if att1 not in df:
|
159
159
|
self.util.error(f"unknown feature: {att1}")
|
@@ -168,8 +168,7 @@ class Plots:
|
|
168
168
|
if self.util.is_categorical(df[att1]):
|
169
169
|
if self.util.is_categorical(df[att2]):
|
170
170
|
# class_label = cat, att1 = cat, att2 = cat
|
171
|
-
ax, caption = self._plot2cat(
|
172
|
-
df, att1, att2, att1, type_s)
|
171
|
+
ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
|
173
172
|
else:
|
174
173
|
# class_label = cat, att1 = cat, att2 = cont
|
175
174
|
ax, caption = self._plotcatcont(
|
@@ -190,8 +189,7 @@ class Plots:
|
|
190
189
|
if self.util.is_categorical(df[att1]):
|
191
190
|
if self.util.is_categorical(df[att2]):
|
192
191
|
# class_label = cont, att1 = cat, att2 = cat
|
193
|
-
ax, caption = self._plot2cat(
|
194
|
-
df, att1, att2, att1, type_s)
|
192
|
+
ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
|
195
193
|
else:
|
196
194
|
# class_label = cont, att1 = cat, att2 = cont
|
197
195
|
ax, caption = self._plot2cont_cat(
|
@@ -205,8 +203,7 @@ class Plots:
|
|
205
203
|
)
|
206
204
|
else:
|
207
205
|
# class_label = cont, att1 = cont, att2 = cont
|
208
|
-
ax, caption = self._plot2cont(
|
209
|
-
df, att1, att2, type_s)
|
206
|
+
ax, caption = self._plot2cont(df, att1, att2, type_s)
|
210
207
|
|
211
208
|
self._save_plot(
|
212
209
|
ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
|
@@ -238,8 +235,7 @@ class Plots:
|
|
238
235
|
)
|
239
236
|
|
240
237
|
def _check_binning(self, att, df):
|
241
|
-
bin_reals_att = eval(self.util.config_val(
|
242
|
-
"EXPL", f"{att}.bin_reals", "False"))
|
238
|
+
bin_reals_att = eval(self.util.config_val("EXPL", f"{att}.bin_reals", "False"))
|
243
239
|
if bin_reals_att:
|
244
240
|
self.util.debug(f"binning continuous variable {att} to categories")
|
245
241
|
att_new = f"{att}_binned"
|
@@ -342,8 +338,7 @@ class Plots:
|
|
342
338
|
|
343
339
|
def describe_df(self, name, df, target, filename):
|
344
340
|
"""Make a stacked barplot of samples and speakers per sex and target values. speaker, gender and target columns must be present"""
|
345
|
-
fig_dir = self.util.get_path(
|
346
|
-
"fig_dir") + "../" # one up because of the runs
|
341
|
+
fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
|
347
342
|
sampl_num = df.shape[0]
|
348
343
|
sex_col = "gender"
|
349
344
|
if target == "gender":
|
@@ -392,8 +387,7 @@ class Plots:
|
|
392
387
|
dim_num = int(self.util.config_val("EXPL", "scatter.dim", 2))
|
393
388
|
# one up because of the runs
|
394
389
|
fig_dir = self.util.get_path("fig_dir") + "../"
|
395
|
-
sample_selection = self.util.config_val(
|
396
|
-
"EXPL", "sample_selection", "all")
|
390
|
+
sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
|
397
391
|
filename = f"{label}_{self.util.get_feattype_name()}_{sample_selection}_{dimred_type}_{str(dim_num)}d"
|
398
392
|
filename = f"{fig_dir}{filename}.{self.format}"
|
399
393
|
self.util.debug(f"computing {dimred_type}, this might take a while...")
|
@@ -435,8 +429,7 @@ class Plots:
|
|
435
429
|
|
436
430
|
if dim_num == 2:
|
437
431
|
plot_data = np.vstack((data.T, labels)).T
|
438
|
-
plot_df = pd.DataFrame(
|
439
|
-
data=plot_data, columns=("Dim_1", "Dim_2", "label"))
|
432
|
+
plot_df = pd.DataFrame(data=plot_data, columns=("Dim_1", "Dim_2", "label"))
|
440
433
|
# plt.tight_layout()
|
441
434
|
ax = (
|
442
435
|
sns.FacetGrid(plot_df, hue="label", height=6)
|
nkululeko/reporting/reporter.py
CHANGED
@@ -300,6 +300,23 @@ class Reporter:
|
|
300
300
|
def get_result(self):
|
301
301
|
return self.result
|
302
302
|
|
303
|
+
def plot_epoch_progression_finetuned(self, df):
|
304
|
+
plot_name_suggest = self.util.get_exp_name()
|
305
|
+
fig_dir = self.util.get_path("fig_dir")
|
306
|
+
plot_name = (
|
307
|
+
self.util.config_val("PLOT", "name", plot_name_suggest)
|
308
|
+
+ "_epoch_progression"
|
309
|
+
)
|
310
|
+
ax = df.plot()
|
311
|
+
fig = ax.figure
|
312
|
+
plt.xlabel("epochs")
|
313
|
+
plt.ylabel(f"{self.MEASURE}")
|
314
|
+
plot_path = f"{fig_dir}{plot_name}.{self.format}"
|
315
|
+
plt.savefig(plot_path)
|
316
|
+
self.util.debug(f"plotted epoch progression to {plot_path}")
|
317
|
+
plt.close(fig)
|
318
|
+
fig.clear()
|
319
|
+
|
303
320
|
def plot_epoch_progression(self, reports, out_name):
|
304
321
|
fig_dir = self.util.get_path("fig_dir")
|
305
322
|
results, losses, train_results, losses_eval = [], [], [], []
|
nkululeko/resample.py
CHANGED
@@ -11,22 +11,32 @@ from nkululeko.utils.util import Util
|
|
11
11
|
|
12
12
|
from nkululeko.constants import VERSION
|
13
13
|
from nkululeko.experiment import Experiment
|
14
|
+
from nkululeko.utils.files import find_files
|
14
15
|
|
15
16
|
|
16
17
|
def main(src_dir):
|
17
18
|
parser = argparse.ArgumentParser(
|
18
|
-
description="Call the nkululeko RESAMPLE framework."
|
19
|
+
description="Call the nkululeko RESAMPLE framework."
|
20
|
+
)
|
19
21
|
parser.add_argument("--config", default=None,
|
20
22
|
help="The base configuration")
|
21
23
|
parser.add_argument("--file", default=None,
|
22
24
|
help="The input audio file to resample")
|
23
|
-
parser.add_argument(
|
24
|
-
|
25
|
+
parser.add_argument(
|
26
|
+
"--folder",
|
27
|
+
default=None,
|
28
|
+
help="The input directory containing audio files and subdirectories to resample",
|
29
|
+
)
|
30
|
+
parser.add_argument(
|
31
|
+
"--replace", action="store_true", help="Replace the original audio file"
|
32
|
+
)
|
25
33
|
|
26
34
|
args = parser.parse_args()
|
27
35
|
|
28
|
-
if args.file is None and args.config is None:
|
29
|
-
print(
|
36
|
+
if args.file is None and args.folder is None and args.config is None:
|
37
|
+
print(
|
38
|
+
"ERROR: Either --file, --folder, or --config argument must be provided."
|
39
|
+
)
|
30
40
|
exit()
|
31
41
|
|
32
42
|
if args.file is not None:
|
@@ -42,6 +52,20 @@ def main(src_dir):
|
|
42
52
|
util.debug(f"Resampling audio file: {args.file}")
|
43
53
|
rs = Resampler(df_sample, not_testing=True, replace=args.replace)
|
44
54
|
rs.resample()
|
55
|
+
elif args.folder is not None:
|
56
|
+
# Load all audio files in the directory and its subdirectories into a DataFrame
|
57
|
+
files = find_files(args.folder, relative=True, ext=["wav"])
|
58
|
+
files = pd.Series(files)
|
59
|
+
df_sample = pd.DataFrame(index=files)
|
60
|
+
df_sample.index = audformat.utils.to_segmented_index(
|
61
|
+
df_sample.index, allow_nat=False
|
62
|
+
)
|
63
|
+
|
64
|
+
# Resample the audio files
|
65
|
+
util = Util("resampler", has_config=False)
|
66
|
+
util.debug(f"Resampling audio files in directory: {args.folder}")
|
67
|
+
rs = Resampler(df_sample, not_testing=True, replace=args.replace)
|
68
|
+
rs.resample()
|
45
69
|
else:
|
46
70
|
# Existing code for handling INI file
|
47
71
|
config_file = args.config
|
@@ -66,6 +90,7 @@ def main(src_dir):
|
|
66
90
|
|
67
91
|
if util.config_val("EXP", "no_warnings", False):
|
68
92
|
import warnings
|
93
|
+
|
69
94
|
warnings.filterwarnings("ignore")
|
70
95
|
|
71
96
|
# Load the data
|
@@ -74,7 +99,8 @@ def main(src_dir):
|
|
74
99
|
# Split into train and test
|
75
100
|
expr.fill_train_and_tests()
|
76
101
|
util.debug(
|
77
|
-
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}"
|
102
|
+
f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}"
|
103
|
+
)
|
78
104
|
|
79
105
|
sample_selection = util.config_val(
|
80
106
|
"RESAMPLE", "sample_selection", "all")
|
nkululeko/utils/util.py
CHANGED
@@ -134,6 +134,12 @@ class Util:
|
|
134
134
|
pd_series.dtype, pd.CategoricalDtype
|
135
135
|
)
|
136
136
|
|
137
|
+
def get_name(self):
|
138
|
+
"""
|
139
|
+
Get the name of the experiment
|
140
|
+
"""
|
141
|
+
return self.config["EXP"]["name"]
|
142
|
+
|
137
143
|
def get_exp_dir(self):
|
138
144
|
"""
|
139
145
|
Get the experiment directory
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.86.
|
3
|
+
Version: 0.86.2
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -256,6 +256,7 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
|
|
256
256
|
* [Compare several databases](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/)
|
257
257
|
* [Tweak the target variable for database comparison](http://blog.syntheticspeech.de/2024/03/13/nkululeko-how-to-tweak-the-target-variable-for-database-comparison/)
|
258
258
|
* [How to run multiple experiments in one go](http://blog.syntheticspeech.de/2022/03/28/how-to-run-multiple-experiments-in-one-go-with-nkululeko/)
|
259
|
+
* [How to finetune a transformer-model](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
|
259
260
|
|
260
261
|
### <a name="helloworld">Hello World example</a>
|
261
262
|
* NEW: [Here's a Google colab that runs this example out-of-the-box](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and here is the same [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
|
@@ -333,6 +334,15 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
333
334
|
Changelog
|
334
335
|
=========
|
335
336
|
|
337
|
+
Version 0.86.2
|
338
|
+
--------------
|
339
|
+
* plots epoch progression for finetuned models now
|
340
|
+
|
341
|
+
Version 0.86.1
|
342
|
+
--------------
|
343
|
+
* functionality to push to hub
|
344
|
+
* fixed bug that prevented wavlm finetuning
|
345
|
+
|
336
346
|
Version 0.86.0
|
337
347
|
--------------
|
338
348
|
* added regression to finetuning
|
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
3
|
nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=E9mXpvAI5IDamRRXgBlBH8XGTw1xEjEBzNibjhFPEFc,39
|
6
6
|
nkululeko/demo.py,sha256=8bl15Kitoesnz8oa8yrs52T6YCSOhWbbq9PnZ8Hj6D0,3232
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
|
9
|
-
nkululeko/experiment.py,sha256=
|
9
|
+
nkululeko/experiment.py,sha256=24FmvF9_zNXE86fO6gzss1M-BjceOCiV6nyJAs0SM_Y,30986
|
10
10
|
nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
|
11
11
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
12
12
|
nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
|
@@ -17,9 +17,9 @@ nkululeko/modelrunner.py,sha256=iCmfJxsS2UafcikjRdUqPQuqQMOYA-Ctr3et3HeNR3c,1045
|
|
17
17
|
nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
|
18
18
|
nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
|
19
19
|
nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
|
20
|
-
nkululeko/plots.py,sha256=
|
20
|
+
nkululeko/plots.py,sha256=C2mwQFK0Vxfl5ZM7CO87tULDoEf7G16ek0nU77bhOc4,23070
|
21
21
|
nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
|
22
|
-
nkululeko/resample.py,sha256=
|
22
|
+
nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
|
23
23
|
nkululeko/runmanager.py,sha256=eTM1DNQKt1lxYhzt4vZyZluPXW9sWlIJHNQzex4lkJU,7624
|
24
24
|
nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
|
25
25
|
nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
|
@@ -82,13 +82,13 @@ nkululeko/models/model_gmm.py,sha256=hZ9UO36KNf48qa3J-xkWIicIj9-TApmt21zNES2vEOs
|
|
82
82
|
nkululeko/models/model_knn.py,sha256=KlnrJfwiVnmXZrAaYGFrKA2f5sznvTzSJQ8-5etOP0k,599
|
83
83
|
nkululeko/models/model_knn_reg.py,sha256=j7YFfVm6xOR2d9yBYdQiwwqYfqkX0JynX_qLCvkr1fk,610
|
84
84
|
nkululeko/models/model_lin_reg.py,sha256=0D7mSnSwK82lNWDMwHYRyq3FmGa6y-DHDGg4qUe85q4,422
|
85
|
-
nkululeko/models/model_mlp.py,sha256=
|
85
|
+
nkululeko/models/model_mlp.py,sha256=xMirtYax3bLBz_0kkC0M4Rc6-KQY05NNKHQGw7rbum8,9856
|
86
86
|
nkululeko/models/model_mlp_regression.py,sha256=PO5qyfjgAJH8hawhmeXDaUThyXDYdM642dQHkO0NY7c,10204
|
87
87
|
nkululeko/models/model_svm.py,sha256=rsME3KvKvNG7bdE5lbvYUu85WZhaASZxxmdNDIVJRZ4,940
|
88
88
|
nkululeko/models/model_svr.py,sha256=_YZeksqB3eBENGlg3g9RwYFlk9rQQ-XCeNBKLlGGVoE,725
|
89
89
|
nkululeko/models/model_tree.py,sha256=rf16faUm4o2LJgkoYpeY998b8DQIvXZ73_m1IS3TnnE,417
|
90
90
|
nkululeko/models/model_tree_reg.py,sha256=IgQcPTE-304HQLYSKPF8Z4ot_Ur9dH01fZjS0nXke_M,428
|
91
|
-
nkululeko/models/model_tuned.py,sha256=
|
91
|
+
nkululeko/models/model_tuned.py,sha256=xOoY5TROzzTVu3sDtlmEle3V1MAgpf8S3WxO9o4MzV4,20777
|
92
92
|
nkululeko/models/model_xgb.py,sha256=Thgx5ESdIok4v72mKh4plxpo4smGcKALWNCJTDScY0M,447
|
93
93
|
nkululeko/models/model_xgr.py,sha256=aGBtNGLWjOE_2rICGYGFxmT8DtnHYsIl1lIpMtghHsY,418
|
94
94
|
nkululeko/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -96,7 +96,7 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
|
|
96
96
|
nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
|
97
97
|
nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
|
98
98
|
nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
|
99
|
-
nkululeko/reporting/reporter.py,sha256=
|
99
|
+
nkululeko/reporting/reporter.py,sha256=NugmGmS3iwuBJ59jqyuTCKPRpiPLGhnz12z_nlVh69Y,13445
|
100
100
|
nkululeko/reporting/result.py,sha256=nSN5or-Py2GPRWHkWpGRh7UCi1W0er7WLEHz8fYLk-A,742
|
101
101
|
nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
102
102
|
nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
|
@@ -104,9 +104,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
|
|
104
104
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
105
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
106
106
|
nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
|
107
|
-
nkululeko/utils/util.py,sha256=
|
108
|
-
nkululeko-0.86.
|
109
|
-
nkululeko-0.86.
|
110
|
-
nkululeko-0.86.
|
111
|
-
nkululeko-0.86.
|
112
|
-
nkululeko-0.86.
|
107
|
+
nkululeko/utils/util.py,sha256=mK1MgO14NinrPhavJw72eR_2WN_kBKjVKiEJnzvdO1Q,13946
|
108
|
+
nkululeko-0.86.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
109
|
+
nkululeko-0.86.2.dist-info/METADATA,sha256=DmmpMrftBptpWqx7h9US7_4mvMIQbZ5ugzv_4kyBjkM,37170
|
110
|
+
nkululeko-0.86.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
111
|
+
nkululeko-0.86.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
112
|
+
nkululeko-0.86.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|