nkululeko 0.86.7__py3-none-any.whl → 0.86.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +39 -21
- nkululeko/feature_extractor.py +10 -4
- nkululeko/utils/util.py +22 -16
- {nkululeko-0.86.7.dist-info → nkululeko-0.86.8.dist-info}/METADATA +5 -1
- {nkululeko-0.86.7.dist-info → nkululeko-0.86.8.dist-info}/RECORD +9 -9
- {nkululeko-0.86.7.dist-info → nkululeko-0.86.8.dist-info}/LICENSE +0 -0
- {nkululeko-0.86.7.dist-info → nkululeko-0.86.8.dist-info}/WHEEL +0 -0
- {nkululeko-0.86.7.dist-info → nkululeko-0.86.8.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.86.
|
1
|
+
VERSION="0.86.8"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/experiment.py
CHANGED
@@ -5,13 +5,13 @@ import pickle
|
|
5
5
|
import random
|
6
6
|
import time
|
7
7
|
|
8
|
+
import audeer
|
9
|
+
import audformat
|
8
10
|
import numpy as np
|
9
11
|
import pandas as pd
|
10
12
|
from sklearn.preprocessing import LabelEncoder
|
11
13
|
|
12
|
-
import
|
13
|
-
import audformat
|
14
|
-
|
14
|
+
import nkululeko.glob_conf as glob_conf
|
15
15
|
from nkululeko.data.dataset import Dataset
|
16
16
|
from nkululeko.data.dataset_csv import Dataset_CSV
|
17
17
|
from nkululeko.demo_predictor import Demo_predictor
|
@@ -19,8 +19,6 @@ from nkululeko.feat_extract.feats_analyser import FeatureAnalyser
|
|
19
19
|
from nkululeko.feature_extractor import FeatureExtractor
|
20
20
|
from nkululeko.file_checker import FileChecker
|
21
21
|
from nkululeko.filter_data import DataFilter
|
22
|
-
from nkululeko.filter_data import filter_min_dur
|
23
|
-
import nkululeko.glob_conf as glob_conf
|
24
22
|
from nkululeko.plots import Plots
|
25
23
|
from nkululeko.reporting.report import Report
|
26
24
|
from nkululeko.runmanager import Runmanager
|
@@ -109,7 +107,8 @@ class Experiment:
|
|
109
107
|
# print keys/column
|
110
108
|
dbs = ",".join(list(self.datasets.keys()))
|
111
109
|
labels = self.util.config_val("DATA", "labels", False)
|
112
|
-
auto_labels = list(
|
110
|
+
auto_labels = list(
|
111
|
+
next(iter(self.datasets.values())).df[self.target].unique())
|
113
112
|
if labels:
|
114
113
|
self.labels = ast.literal_eval(labels)
|
115
114
|
self.util.debug(f"Using target labels (from config): {labels}")
|
@@ -159,7 +158,8 @@ class Experiment:
|
|
159
158
|
data.split()
|
160
159
|
data.prepare_labels()
|
161
160
|
self.df_test = pd.concat(
|
162
|
-
[self.df_test, self.util.make_segmented_index(
|
161
|
+
[self.df_test, self.util.make_segmented_index(
|
162
|
+
data.df_test)]
|
163
163
|
)
|
164
164
|
self.df_test.is_labeled = data.is_labeled
|
165
165
|
self.df_test.got_gender = self.got_gender
|
@@ -260,7 +260,8 @@ class Experiment:
|
|
260
260
|
test_cats = self.df_test[self.target].unique()
|
261
261
|
else:
|
262
262
|
# if there is no target, copy a dummy label
|
263
|
-
self.df_test = self._add_random_target(
|
263
|
+
self.df_test = self._add_random_target(
|
264
|
+
self.df_test).astype("str")
|
264
265
|
train_cats = self.df_train[self.target].unique()
|
265
266
|
# print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
|
266
267
|
# print(f"train_cats with target {self.target}: {train_cats}")
|
@@ -268,7 +269,8 @@ class Experiment:
|
|
268
269
|
if type(test_cats) == np.ndarray:
|
269
270
|
self.util.debug(f"Categories test (nd.array): {test_cats}")
|
270
271
|
else:
|
271
|
-
self.util.debug(
|
272
|
+
self.util.debug(
|
273
|
+
f"Categories test (list): {list(test_cats)}")
|
272
274
|
if type(train_cats) == np.ndarray:
|
273
275
|
self.util.debug(f"Categories train (nd.array): {train_cats}")
|
274
276
|
else:
|
@@ -291,7 +293,8 @@ class Experiment:
|
|
291
293
|
|
292
294
|
target_factor = self.util.config_val("DATA", "target_divide_by", False)
|
293
295
|
if target_factor:
|
294
|
-
self.df_test[self.target] = self.df_test[self.target] /
|
296
|
+
self.df_test[self.target] = self.df_test[self.target] / \
|
297
|
+
float(target_factor)
|
295
298
|
self.df_train[self.target] = self.df_train[self.target] / float(
|
296
299
|
target_factor
|
297
300
|
)
|
@@ -314,14 +317,16 @@ class Experiment:
|
|
314
317
|
def plot_distribution(self, df_labels):
|
315
318
|
"""Plot the distribution of samples and speaker per target class and biological sex"""
|
316
319
|
plot = Plots()
|
317
|
-
sample_selection = self.util.config_val(
|
320
|
+
sample_selection = self.util.config_val(
|
321
|
+
"EXPL", "sample_selection", "all")
|
318
322
|
plot.plot_distributions(df_labels)
|
319
323
|
if self.got_speaker:
|
320
324
|
plot.plot_distributions_speaker(df_labels)
|
321
325
|
|
322
326
|
def extract_test_feats(self):
|
323
327
|
self.feats_test = pd.DataFrame()
|
324
|
-
feats_name = "_".join(ast.literal_eval(
|
328
|
+
feats_name = "_".join(ast.literal_eval(
|
329
|
+
glob_conf.config["DATA"]["tests"]))
|
325
330
|
feats_types = self.util.config_val_list("FEATS", "type", ["os"])
|
326
331
|
self.feature_extractor = FeatureExtractor(
|
327
332
|
self.df_test, feats_types, feats_name, "test"
|
@@ -338,9 +343,17 @@ class Experiment:
|
|
338
343
|
|
339
344
|
"""
|
340
345
|
df_train, df_test = self.df_train, self.df_test
|
341
|
-
feats_name = "_".join(ast.literal_eval(
|
346
|
+
feats_name = "_".join(ast.literal_eval(
|
347
|
+
glob_conf.config["DATA"]["databases"]))
|
342
348
|
self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
|
343
|
-
feats_types = self.util.
|
349
|
+
feats_types = self.util.config_val("FEATS", "type", "os")
|
350
|
+
# Ensure feats_types is always a list of strings
|
351
|
+
if isinstance(feats_types, str):
|
352
|
+
if feats_types.startswith("[") and feats_types.endswith("]"):
|
353
|
+
feats_types = ast.literal_eval(feats_types)
|
354
|
+
else:
|
355
|
+
feats_types = [feats_types]
|
356
|
+
# print(f"feats_types: {feats_types}")
|
344
357
|
# for some models no features are needed
|
345
358
|
if len(feats_types) == 0:
|
346
359
|
self.util.debug("no feature extractor specified.")
|
@@ -372,7 +385,8 @@ class Experiment:
|
|
372
385
|
f"test feats ({self.feats_test.shape[0]}) != test labels"
|
373
386
|
f" ({self.df_test.shape[0]})"
|
374
387
|
)
|
375
|
-
self.df_test = self.df_test[self.df_test.index.isin(
|
388
|
+
self.df_test = self.df_test[self.df_test.index.isin(
|
389
|
+
self.feats_test.index)]
|
376
390
|
self.util.warn(f"new test labels shape: {self.df_test.shape[0]}")
|
377
391
|
|
378
392
|
self._check_scale()
|
@@ -387,7 +401,8 @@ class Experiment:
|
|
387
401
|
"""Augment the selected samples."""
|
388
402
|
from nkululeko.augmenting.augmenter import Augmenter
|
389
403
|
|
390
|
-
sample_selection = self.util.config_val(
|
404
|
+
sample_selection = self.util.config_val(
|
405
|
+
"AUGMENT", "sample_selection", "all")
|
391
406
|
if sample_selection == "all":
|
392
407
|
df = pd.concat([self.df_train, self.df_test])
|
393
408
|
elif sample_selection == "train":
|
@@ -482,7 +497,8 @@ class Experiment:
|
|
482
497
|
"""
|
483
498
|
from nkululeko.augmenting.randomsplicer import Randomsplicer
|
484
499
|
|
485
|
-
sample_selection = self.util.config_val(
|
500
|
+
sample_selection = self.util.config_val(
|
501
|
+
"AUGMENT", "sample_selection", "all")
|
486
502
|
if sample_selection == "all":
|
487
503
|
df = pd.concat([self.df_train, self.df_test])
|
488
504
|
elif sample_selection == "train":
|
@@ -503,7 +519,8 @@ class Experiment:
|
|
503
519
|
plot_feats = eval(
|
504
520
|
self.util.config_val("EXPL", "feature_distributions", "False")
|
505
521
|
)
|
506
|
-
sample_selection = self.util.config_val(
|
522
|
+
sample_selection = self.util.config_val(
|
523
|
+
"EXPL", "sample_selection", "all")
|
507
524
|
# get the data labels
|
508
525
|
if sample_selection == "all":
|
509
526
|
df_labels = pd.concat([self.df_train, self.df_test])
|
@@ -566,7 +583,8 @@ class Experiment:
|
|
566
583
|
for scat_target in scat_targets:
|
567
584
|
if self.util.is_categorical(df_labels[scat_target]):
|
568
585
|
for scatter in scatters:
|
569
|
-
plots.scatter_plot(
|
586
|
+
plots.scatter_plot(
|
587
|
+
df_feats, df_labels, scat_target, scatter)
|
570
588
|
else:
|
571
589
|
self.util.debug(
|
572
590
|
f"{self.name}: binning continuous variable to categories"
|
@@ -657,7 +675,8 @@ class Experiment:
|
|
657
675
|
preds = best.preds
|
658
676
|
speakers = self.df_test.speaker.values
|
659
677
|
print(f"{len(truths)} {len(preds)} {len(speakers) }")
|
660
|
-
df = pd.DataFrame(
|
678
|
+
df = pd.DataFrame(
|
679
|
+
data={"truth": truths, "pred": preds, "speaker": speakers})
|
661
680
|
plot_name = "result_combined_per_speaker"
|
662
681
|
self.util.debug(
|
663
682
|
f"plotting speaker combination ({function}) confusion matrix to"
|
@@ -733,7 +752,6 @@ class Experiment:
|
|
733
752
|
if model.is_ann():
|
734
753
|
print("converting to onnx from torch")
|
735
754
|
else:
|
736
|
-
from skl2onnx import to_onnx
|
737
755
|
|
738
756
|
print("converting to onnx from sklearn")
|
739
757
|
# save the rest
|
nkululeko/feature_extractor.py
CHANGED
@@ -39,16 +39,20 @@ class FeatureExtractor:
|
|
39
39
|
self.feats = pd.DataFrame()
|
40
40
|
for feats_type in self.feats_types:
|
41
41
|
store_name = f"{self.data_name}_{feats_type}"
|
42
|
-
self.feat_extractor = self._get_feat_extractor(
|
42
|
+
self.feat_extractor = self._get_feat_extractor(
|
43
|
+
store_name, feats_type)
|
43
44
|
self.feat_extractor.extract()
|
44
45
|
self.feat_extractor.filter()
|
45
|
-
self.feats = pd.concat(
|
46
|
+
self.feats = pd.concat(
|
47
|
+
[self.feats, self.feat_extractor.df], axis=1)
|
46
48
|
return self.feats
|
47
49
|
|
48
50
|
def extract_sample(self, signal, sr):
|
49
51
|
return self.feat_extractor.extract_sample(signal, sr)
|
50
52
|
|
51
53
|
def _get_feat_extractor(self, store_name, feats_type):
|
54
|
+
if isinstance(feats_type, list) and len(feats_type) == 1:
|
55
|
+
feats_type = feats_type[0]
|
52
56
|
feat_extractor_class = self._get_feat_extractor_class(feats_type)
|
53
57
|
if feat_extractor_class is None:
|
54
58
|
self.util.error(f"unknown feats_type: {feats_type}")
|
@@ -103,13 +107,15 @@ class FeatureExtractor:
|
|
103
107
|
prefix, _, ext = feats_type.partition("-")
|
104
108
|
from importlib import import_module
|
105
109
|
|
106
|
-
module = import_module(
|
110
|
+
module = import_module(
|
111
|
+
f"nkululeko.feat_extract.feats_{prefix.lower()}")
|
107
112
|
class_name = f"{prefix.capitalize()}"
|
108
113
|
return getattr(module, class_name)
|
109
114
|
|
110
115
|
def _get_feat_extractor_by_name(self, feats_type):
|
111
116
|
from importlib import import_module
|
112
117
|
|
113
|
-
module = import_module(
|
118
|
+
module = import_module(
|
119
|
+
f"nkululeko.feat_extract.feats_{feats_type.lower()}")
|
114
120
|
class_name = f"{feats_type.capitalize()}Set"
|
115
121
|
return getattr(module, class_name)
|
nkululeko/utils/util.py
CHANGED
@@ -5,15 +5,15 @@ import os.path
|
|
5
5
|
import pickle
|
6
6
|
import sys
|
7
7
|
|
8
|
-
import numpy as np
|
9
|
-
import pandas as pd
|
10
|
-
|
11
8
|
import audeer
|
12
9
|
import audformat
|
10
|
+
import numpy as np
|
11
|
+
import pandas as pd
|
13
12
|
|
14
13
|
|
15
14
|
class Util:
|
16
|
-
# a list of words that need not to be warned upon if default values are
|
15
|
+
# a list of words that need not to be warned upon if default values are
|
16
|
+
# used
|
17
17
|
stopvals = [
|
18
18
|
"all",
|
19
19
|
False,
|
@@ -40,7 +40,8 @@ class Util:
|
|
40
40
|
self.got_data_roots = self.config_val(
|
41
41
|
"DATA", "root_folders", False)
|
42
42
|
if self.got_data_roots:
|
43
|
-
# if there is a global data rootfolder file, read from
|
43
|
+
# if there is a global data rootfolder file, read from
|
44
|
+
# there
|
44
45
|
if not os.path.isfile(self.got_data_roots):
|
45
46
|
self.error(f"no such file: {self.got_data_roots}")
|
46
47
|
self.data_roots = configparser.ConfigParser()
|
@@ -107,16 +108,17 @@ class Util:
|
|
107
108
|
if self.got_data_roots:
|
108
109
|
try:
|
109
110
|
if len(key) > 0:
|
110
|
-
return self.data_roots["DATA"][dataset +
|
111
|
+
return self.data_roots["DATA"][dataset +
|
112
|
+
"." + key].strip("'\"")
|
111
113
|
else:
|
112
114
|
return self.data_roots["DATA"][dataset].strip("'\"")
|
113
115
|
except KeyError:
|
114
|
-
if not
|
116
|
+
if default not in self.stopvals:
|
115
117
|
self.debug(
|
116
|
-
f"value for {key} not found, using default:"
|
117
|
-
|
118
|
+
f"value for {key} not found, using default:"
|
119
|
+
f" {default}")
|
118
120
|
return default
|
119
|
-
if not
|
121
|
+
if default not in self.stopvals:
|
120
122
|
self.debug(
|
121
123
|
f"value for {key} not found, using default: {default}")
|
122
124
|
return default
|
@@ -182,7 +184,7 @@ class Util:
|
|
182
184
|
|
183
185
|
def get_feattype_name(self):
|
184
186
|
"""
|
185
|
-
Get a string as name from all feature sets that are
|
187
|
+
Get a string as name from all feature sets that are used
|
186
188
|
"""
|
187
189
|
return "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
|
188
190
|
|
@@ -205,7 +207,12 @@ class Util:
|
|
205
207
|
def get_model_description(self):
|
206
208
|
mt = ""
|
207
209
|
mt = f'{self.config["MODEL"]["type"]}'
|
208
|
-
ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
|
210
|
+
# ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
|
211
|
+
ft_value = self.config["FEATS"]["type"]
|
212
|
+
if isinstance(ft_value, str) and ft_value.startswith("[") and ft_value.endswith("]"):
|
213
|
+
ft = "_".join(ast.literal_eval(ft_value))
|
214
|
+
else:
|
215
|
+
ft = ft_value
|
209
216
|
ft += "_"
|
210
217
|
layer_string = ""
|
211
218
|
layer_s = self.config_val("MODEL", "layers", False)
|
@@ -230,9 +237,8 @@ class Util:
|
|
230
237
|
["FEATS", "wav2vec2.layer"],
|
231
238
|
]
|
232
239
|
for option in options:
|
233
|
-
return_string += self._get_value_descript(
|
234
|
-
".", "-"
|
235
|
-
)
|
240
|
+
return_string += self._get_value_descript(
|
241
|
+
option[0], option[1]).replace(".", "-")
|
236
242
|
return return_string
|
237
243
|
|
238
244
|
def get_plot_name(self):
|
@@ -286,7 +292,7 @@ class Util:
|
|
286
292
|
try:
|
287
293
|
return ast.literal_eval(self.config[section][key])
|
288
294
|
except KeyError:
|
289
|
-
if not
|
295
|
+
if default not in self.stopvals:
|
290
296
|
self.debug(
|
291
297
|
f"value for {key} not found, using default: {default}")
|
292
298
|
return default
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.86.
|
3
|
+
Version: 0.86.8
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -343,6 +343,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
343
343
|
Changelog
|
344
344
|
=========
|
345
345
|
|
346
|
+
Version 0.86.8
|
347
|
+
--------------
|
348
|
+
* handle single feature sets as strings in the config
|
349
|
+
|
346
350
|
Version 0.86.7
|
347
351
|
--------------
|
348
352
|
* handles now audformat tables where the target is in a file index
|
@@ -2,14 +2,14 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
3
|
nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=FOK-XF_DHGNFHsO_OMLof3jwgrn2buWnPVfrHy5QBm8,39
|
6
6
|
nkululeko/demo.py,sha256=WSKr-W5uJ9DQfemK923g7Hd5V3kgAn03Er0JX1Pa45I,5142
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
|
9
|
-
nkululeko/experiment.py,sha256=
|
9
|
+
nkululeko/experiment.py,sha256=s9PIjm45dR9yzmHu_69JpBjX9qMVzi5wIgPfMR3F44A,31530
|
10
10
|
nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
|
11
11
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
12
|
-
nkululeko/feature_extractor.py,sha256=
|
12
|
+
nkululeko/feature_extractor.py,sha256=rL-TybLmjZz5uxT9LNTORaDat9FKp_1qloxbyMrinyE,4141
|
13
13
|
nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
|
14
14
|
nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
|
15
15
|
nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
|
@@ -104,9 +104,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
|
|
104
104
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
105
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
106
106
|
nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
|
107
|
-
nkululeko/utils/util.py,sha256=
|
108
|
-
nkululeko-0.86.
|
109
|
-
nkululeko-0.86.
|
110
|
-
nkululeko-0.86.
|
111
|
-
nkululeko-0.86.
|
112
|
-
nkululeko-0.86.
|
107
|
+
nkululeko/utils/util.py,sha256=ZCS02mE2c3_h9_q4hpsSm4XAooCranqRF_5pY-6055E,14432
|
108
|
+
nkululeko-0.86.8.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
109
|
+
nkululeko-0.86.8.dist-info/METADATA,sha256=5TQSWqzrN9E7XJGcVn5oPKGl6qy-RliYGEG2Ycl46qk,38109
|
110
|
+
nkululeko-0.86.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
111
|
+
nkululeko-0.86.8.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
112
|
+
nkululeko-0.86.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|