nkululeko 0.85.1__py3-none-any.whl → 0.85.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset_csv.py +7 -4
- nkululeko/experiment.py +31 -17
- {nkululeko-0.85.1.dist-info → nkululeko-0.85.2.dist-info}/METADATA +5 -1
- {nkululeko-0.85.1.dist-info → nkululeko-0.85.2.dist-info}/RECORD +8 -8
- {nkululeko-0.85.1.dist-info → nkululeko-0.85.2.dist-info}/LICENSE +0 -0
- {nkululeko-0.85.1.dist-info → nkululeko-0.85.2.dist-info}/WHEEL +0 -0
- {nkululeko-0.85.1.dist-info → nkululeko-0.85.2.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.85.
|
1
|
+
VERSION="0.85.2"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/data/dataset_csv.py
CHANGED
@@ -21,7 +21,7 @@ class Dataset_CSV(Dataset):
|
|
21
21
|
# exp_root = self.util.config_val("EXP", "root", "")
|
22
22
|
# data_file = os.path.join(exp_root, data_file)
|
23
23
|
root = os.path.dirname(data_file)
|
24
|
-
audio_path = self.util.config_val_data(self.name, "audio_path", "")
|
24
|
+
audio_path = self.util.config_val_data(self.name, "audio_path", "./")
|
25
25
|
df = pd.read_csv(data_file)
|
26
26
|
# special treatment for segmented dataframes with only one column:
|
27
27
|
if "start" in df.columns and len(df.columns) == 4:
|
@@ -49,7 +49,8 @@ class Dataset_CSV(Dataset):
|
|
49
49
|
.map(lambda x: root + "/" + audio_path + "/" + x)
|
50
50
|
.values
|
51
51
|
)
|
52
|
-
df = df.set_index(df.index.set_levels(
|
52
|
+
df = df.set_index(df.index.set_levels(
|
53
|
+
file_index, level="file"))
|
53
54
|
else:
|
54
55
|
if not isinstance(df, pd.DataFrame):
|
55
56
|
df = pd.DataFrame(df)
|
@@ -63,7 +64,8 @@ class Dataset_CSV(Dataset):
|
|
63
64
|
self.db = None
|
64
65
|
self.got_target = True
|
65
66
|
self.is_labeled = self.got_target
|
66
|
-
self.start_fresh = eval(
|
67
|
+
self.start_fresh = eval(
|
68
|
+
self.util.config_val("DATA", "no_reuse", "False"))
|
67
69
|
is_index = False
|
68
70
|
try:
|
69
71
|
if self.is_labeled and not "class_label" in self.df.columns:
|
@@ -90,7 +92,8 @@ class Dataset_CSV(Dataset):
|
|
90
92
|
f" {self.got_gender}, got age: {self.got_age}"
|
91
93
|
)
|
92
94
|
self.util.debug(r_string)
|
93
|
-
glob_conf.report.add_item(ReportItem(
|
95
|
+
glob_conf.report.add_item(ReportItem(
|
96
|
+
"Data", "Loaded report", r_string))
|
94
97
|
|
95
98
|
def prepare(self):
|
96
99
|
super().prepare()
|
nkululeko/experiment.py
CHANGED
@@ -109,14 +109,15 @@ class Experiment:
|
|
109
109
|
# print keys/column
|
110
110
|
dbs = ",".join(list(self.datasets.keys()))
|
111
111
|
labels = self.util.config_val("DATA", "labels", False)
|
112
|
+
auto_labels = list(
|
113
|
+
next(iter(self.datasets.values())).df[self.target].unique()
|
114
|
+
)
|
112
115
|
if labels:
|
113
116
|
self.labels = ast.literal_eval(labels)
|
114
117
|
self.util.debug(f"Target labels (from config): {labels}")
|
115
118
|
else:
|
116
|
-
self.labels =
|
117
|
-
|
118
|
-
)
|
119
|
-
self.util.debug(f"Target labels (from database): {labels}")
|
119
|
+
self.labels = auto_labels
|
120
|
+
self.util.debug(f"Target labels (from database): {auto_labels}")
|
120
121
|
glob_conf.set_labels(self.labels)
|
121
122
|
self.util.debug(f"loaded databases {dbs}")
|
122
123
|
|
@@ -159,7 +160,8 @@ class Experiment:
|
|
159
160
|
data.split()
|
160
161
|
data.prepare_labels()
|
161
162
|
self.df_test = pd.concat(
|
162
|
-
[self.df_test, self.util.make_segmented_index(
|
163
|
+
[self.df_test, self.util.make_segmented_index(
|
164
|
+
data.df_test)]
|
163
165
|
)
|
164
166
|
self.df_test.is_labeled = data.is_labeled
|
165
167
|
self.df_test.got_gender = self.got_gender
|
@@ -260,7 +262,8 @@ class Experiment:
|
|
260
262
|
test_cats = self.df_test[self.target].unique()
|
261
263
|
else:
|
262
264
|
# if there is no target, copy a dummy label
|
263
|
-
self.df_test = self._add_random_target(
|
265
|
+
self.df_test = self._add_random_target(
|
266
|
+
self.df_test).astype("str")
|
264
267
|
train_cats = self.df_train[self.target].unique()
|
265
268
|
# print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
|
266
269
|
# print(f"train_cats with target {self.target}: {train_cats}")
|
@@ -268,7 +271,8 @@ class Experiment:
|
|
268
271
|
if type(test_cats) == np.ndarray:
|
269
272
|
self.util.debug(f"Categories test (nd.array): {test_cats}")
|
270
273
|
else:
|
271
|
-
self.util.debug(
|
274
|
+
self.util.debug(
|
275
|
+
f"Categories test (list): {list(test_cats)}")
|
272
276
|
if type(train_cats) == np.ndarray:
|
273
277
|
self.util.debug(f"Categories train (nd.array): {train_cats}")
|
274
278
|
else:
|
@@ -291,7 +295,8 @@ class Experiment:
|
|
291
295
|
|
292
296
|
target_factor = self.util.config_val("DATA", "target_divide_by", False)
|
293
297
|
if target_factor:
|
294
|
-
self.df_test[self.target] = self.df_test[self.target] /
|
298
|
+
self.df_test[self.target] = self.df_test[self.target] / \
|
299
|
+
float(target_factor)
|
295
300
|
self.df_train[self.target] = self.df_train[self.target] / float(
|
296
301
|
target_factor
|
297
302
|
)
|
@@ -314,14 +319,16 @@ class Experiment:
|
|
314
319
|
def plot_distribution(self, df_labels):
|
315
320
|
"""Plot the distribution of samples and speaker per target class and biological sex"""
|
316
321
|
plot = Plots()
|
317
|
-
sample_selection = self.util.config_val(
|
322
|
+
sample_selection = self.util.config_val(
|
323
|
+
"EXPL", "sample_selection", "all")
|
318
324
|
plot.plot_distributions(df_labels)
|
319
325
|
if self.got_speaker:
|
320
326
|
plot.plot_distributions_speaker(df_labels)
|
321
327
|
|
322
328
|
def extract_test_feats(self):
|
323
329
|
self.feats_test = pd.DataFrame()
|
324
|
-
feats_name = "_".join(ast.literal_eval(
|
330
|
+
feats_name = "_".join(ast.literal_eval(
|
331
|
+
glob_conf.config["DATA"]["tests"]))
|
325
332
|
feats_types = self.util.config_val_list("FEATS", "type", ["os"])
|
326
333
|
self.feature_extractor = FeatureExtractor(
|
327
334
|
self.df_test, feats_types, feats_name, "test"
|
@@ -338,7 +345,8 @@ class Experiment:
|
|
338
345
|
|
339
346
|
"""
|
340
347
|
df_train, df_test = self.df_train, self.df_test
|
341
|
-
feats_name = "_".join(ast.literal_eval(
|
348
|
+
feats_name = "_".join(ast.literal_eval(
|
349
|
+
glob_conf.config["DATA"]["databases"]))
|
342
350
|
self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
|
343
351
|
feats_types = self.util.config_val_list("FEATS", "type", [])
|
344
352
|
# for some models no features are needed
|
@@ -372,7 +380,8 @@ class Experiment:
|
|
372
380
|
f"test feats ({self.feats_test.shape[0]}) != test labels"
|
373
381
|
f" ({self.df_test.shape[0]})"
|
374
382
|
)
|
375
|
-
self.df_test = self.df_test[self.df_test.index.isin(
|
383
|
+
self.df_test = self.df_test[self.df_test.index.isin(
|
384
|
+
self.feats_test.index)]
|
376
385
|
self.util.warn(f"mew test labels shape: {self.df_test.shape[0]}")
|
377
386
|
|
378
387
|
self._check_scale()
|
@@ -383,7 +392,8 @@ class Experiment:
|
|
383
392
|
"""
|
384
393
|
from nkululeko.augmenting.augmenter import Augmenter
|
385
394
|
|
386
|
-
sample_selection = self.util.config_val(
|
395
|
+
sample_selection = self.util.config_val(
|
396
|
+
"AUGMENT", "sample_selection", "all")
|
387
397
|
if sample_selection == "all":
|
388
398
|
df = pd.concat([self.df_train, self.df_test])
|
389
399
|
elif sample_selection == "train":
|
@@ -478,7 +488,8 @@ class Experiment:
|
|
478
488
|
"""
|
479
489
|
from nkululeko.augmenting.randomsplicer import Randomsplicer
|
480
490
|
|
481
|
-
sample_selection = self.util.config_val(
|
491
|
+
sample_selection = self.util.config_val(
|
492
|
+
"AUGMENT", "sample_selection", "all")
|
482
493
|
if sample_selection == "all":
|
483
494
|
df = pd.concat([self.df_train, self.df_test])
|
484
495
|
elif sample_selection == "train":
|
@@ -499,7 +510,8 @@ class Experiment:
|
|
499
510
|
plot_feats = eval(
|
500
511
|
self.util.config_val("EXPL", "feature_distributions", "False")
|
501
512
|
)
|
502
|
-
sample_selection = self.util.config_val(
|
513
|
+
sample_selection = self.util.config_val(
|
514
|
+
"EXPL", "sample_selection", "all")
|
503
515
|
# get the data labels
|
504
516
|
if sample_selection == "all":
|
505
517
|
df_labels = pd.concat([self.df_train, self.df_test])
|
@@ -562,7 +574,8 @@ class Experiment:
|
|
562
574
|
for scat_target in scat_targets:
|
563
575
|
if self.util.is_categorical(df_labels[scat_target]):
|
564
576
|
for scatter in scatters:
|
565
|
-
plots.scatter_plot(
|
577
|
+
plots.scatter_plot(
|
578
|
+
df_feats, df_labels, scat_target, scatter)
|
566
579
|
else:
|
567
580
|
self.util.debug(
|
568
581
|
f"{self.name}: binning continuous variable to categories"
|
@@ -651,7 +664,8 @@ class Experiment:
|
|
651
664
|
preds = best.preds
|
652
665
|
speakers = self.df_test.speaker.values
|
653
666
|
print(f"{len(truths)} {len(preds)} {len(speakers) }")
|
654
|
-
df = pd.DataFrame(
|
667
|
+
df = pd.DataFrame(
|
668
|
+
data={"truth": truths, "pred": preds, "speaker": speakers})
|
655
669
|
plot_name = "result_combined_per_speaker"
|
656
670
|
self.util.debug(
|
657
671
|
f"plotting speaker combination ({function}) confusion matrix to"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.85.
|
3
|
+
Version: 0.85.2
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -333,6 +333,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
333
333
|
Changelog
|
334
334
|
=========
|
335
335
|
|
336
|
+
Version 0.85.2
|
337
|
+
--------------
|
338
|
+
* added data, and automatic task label detection
|
339
|
+
|
336
340
|
Version 0.85.1
|
337
341
|
--------------
|
338
342
|
* fixed bug in model_finetuned that label_num was constant 2
|
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
3
|
nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=l15EMSj8vmejkCKCzQ6jMrgj5PuNrcHIREXt9kbSw7U,39
|
6
6
|
nkululeko/demo.py,sha256=8bl15Kitoesnz8oa8yrs52T6YCSOhWbbq9PnZ8Hj6D0,3232
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
|
9
|
-
nkululeko/experiment.py,sha256=
|
9
|
+
nkululeko/experiment.py,sha256=ZsSWdasWUyIBF_4vxb4FxvHs42pytG7ErUOABA-WWTo,30722
|
10
10
|
nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
|
11
11
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
12
12
|
nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
|
@@ -46,7 +46,7 @@ nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzW
|
|
46
46
|
nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
|
47
47
|
nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
48
|
nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,27650
|
49
|
-
nkululeko/data/dataset_csv.py,sha256=
|
49
|
+
nkululeko/data/dataset_csv.py,sha256=vTnjIc2UdSJT7foL-ltE9MWrZTCg0nplwKdEtMPxt2o,3933
|
50
50
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
51
|
nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
|
52
52
|
nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
|
@@ -106,8 +106,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
106
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
107
107
|
nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
|
108
108
|
nkululeko/utils/util.py,sha256=b1IHFucRNuF9Iyv5IJeK4AEg0Rga0xKG80UM5GWWdHA,13816
|
109
|
-
nkululeko-0.85.
|
110
|
-
nkululeko-0.85.
|
111
|
-
nkululeko-0.85.
|
112
|
-
nkululeko-0.85.
|
113
|
-
nkululeko-0.85.
|
109
|
+
nkululeko-0.85.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
110
|
+
nkululeko-0.85.2.dist-info/METADATA,sha256=RVGREhA1jakUtQ707C0ecklnUZwx4skVHV0UbPwEsn0,36671
|
111
|
+
nkululeko-0.85.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
112
|
+
nkululeko-0.85.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
113
|
+
nkululeko-0.85.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|