nkululeko 0.89.2__py3-none-any.whl → 0.90.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/augment.py +2 -2
- nkululeko/augmenting/randomsplicer.py +6 -3
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset.py +8 -2
- nkululeko/experiment.py +8 -2
- nkululeko/plots.py +1 -1
- {nkululeko-0.89.2.dist-info → nkululeko-0.90.0.dist-info}/METADATA +7 -1
- {nkululeko-0.89.2.dist-info → nkululeko-0.90.0.dist-info}/RECORD +11 -11
- {nkululeko-0.89.2.dist-info → nkululeko-0.90.0.dist-info}/LICENSE +0 -0
- {nkululeko-0.89.2.dist-info → nkululeko-0.90.0.dist-info}/WHEEL +0 -0
- {nkululeko-0.89.2.dist-info → nkululeko-0.90.0.dist-info}/top_level.txt +0 -0
nkululeko/augment.py
CHANGED
@@ -37,8 +37,8 @@ def doit(config_file):
|
|
37
37
|
|
38
38
|
filename = util.config_val("AUGMENT", "result", "augmented.csv")
|
39
39
|
filename = f"{expr.data_dir}/{filename}"
|
40
|
-
|
41
|
-
if os.path.exists(filename):
|
40
|
+
no_reuse = eval(util.config_val("DATA", "no_reuse", "False"))
|
41
|
+
if os.path.exists(filename) and not no_reuse:
|
42
42
|
util.debug("files already augmented")
|
43
43
|
else:
|
44
44
|
# load the data
|
@@ -41,14 +41,17 @@ class Randomsplicer:
|
|
41
41
|
* top_db: top db level for silence to be recognized (default: 12)
|
42
42
|
"""
|
43
43
|
|
44
|
-
p_reverse = 0.3
|
45
|
-
top_db = 12
|
44
|
+
p_reverse = float(self.util.config_val("AUGMENT", "p_reverse", "0.3"))
|
45
|
+
top_db = float(self.util.config_val("AUGMENT", "top_db", "12"))
|
46
46
|
|
47
47
|
files = self.df.index.get_level_values(0).values
|
48
48
|
store = self.util.get_path("store")
|
49
49
|
filepath = f"{store}randomspliced/"
|
50
50
|
audeer.mkdir(filepath)
|
51
|
-
self.util.debug(
|
51
|
+
self.util.debug(
|
52
|
+
f"random splicing {sample_selection} samples to {filepath}, "
|
53
|
+
+ f"p_reverse = {p_reverse}, top_db = {top_db}",
|
54
|
+
)
|
52
55
|
newpath = ""
|
53
56
|
index_map = {}
|
54
57
|
for i, f in enumerate(tqdm(files)):
|
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.90.0"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/data/dataset.py
CHANGED
@@ -30,8 +30,8 @@ class Dataset:
|
|
30
30
|
def __init__(self, name):
|
31
31
|
"""Constructor setting up name and configuration"""
|
32
32
|
self.name = name
|
33
|
-
self.target = glob_conf.config["DATA"]["target"]
|
34
33
|
self.util = Util("dataset")
|
34
|
+
self.target = self.util.config_val("DATA", "target", "none")
|
35
35
|
self.plot = Plots()
|
36
36
|
self.limit = int(self.util.config_val_data(self.name, "limit", 0))
|
37
37
|
self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
|
@@ -127,6 +127,9 @@ class Dataset:
|
|
127
127
|
self.got_gender,
|
128
128
|
self.got_age,
|
129
129
|
) = self._get_df_for_lists(self.db, df_files_tables)
|
130
|
+
if df.shape[0] > 0 and self.target == "none":
|
131
|
+
self.df = df
|
132
|
+
return
|
130
133
|
if False in {
|
131
134
|
self.is_labeled,
|
132
135
|
self.got_speaker,
|
@@ -553,7 +556,10 @@ class Dataset:
|
|
553
556
|
" samples in train/test"
|
554
557
|
)
|
555
558
|
# because this generates new train/test sample quantaties, the feature extraction has to be done again
|
556
|
-
|
559
|
+
try:
|
560
|
+
glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
|
561
|
+
except KeyError:
|
562
|
+
pass
|
557
563
|
|
558
564
|
def random_split(self):
|
559
565
|
"""One way to split train and eval sets: Specify percentage of random samples"""
|
nkululeko/experiment.py
CHANGED
@@ -101,12 +101,15 @@ class Experiment:
|
|
101
101
|
if data.got_speaker:
|
102
102
|
self.got_speaker = True
|
103
103
|
self.datasets.update({d: data})
|
104
|
-
self.target = self.util.config_val("DATA", "target", "
|
104
|
+
self.target = self.util.config_val("DATA", "target", "none")
|
105
105
|
glob_conf.set_target(self.target)
|
106
106
|
# print target via debug
|
107
107
|
self.util.debug(f"target: {self.target}")
|
108
108
|
# print keys/column
|
109
109
|
dbs = ",".join(list(self.datasets.keys()))
|
110
|
+
if self.target == "none":
|
111
|
+
self.util.debug(f"loaded databases {dbs}")
|
112
|
+
return
|
110
113
|
labels = self.util.config_val("DATA", "labels", False)
|
111
114
|
auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
|
112
115
|
if labels:
|
@@ -191,7 +194,8 @@ class Experiment:
|
|
191
194
|
self.df_train, self.df_test = pd.DataFrame(), pd.DataFrame()
|
192
195
|
for d in self.datasets.values():
|
193
196
|
d.split()
|
194
|
-
|
197
|
+
if self.target != "none":
|
198
|
+
d.prepare_labels()
|
195
199
|
if d.df_train.shape[0] == 0:
|
196
200
|
self.util.debug(f"warn: {d.name} train empty")
|
197
201
|
self.df_train = pd.concat([self.df_train, d.df_train])
|
@@ -207,6 +211,8 @@ class Experiment:
|
|
207
211
|
self.df_test.to_csv(storage_test)
|
208
212
|
self.df_train.to_csv(storage_train)
|
209
213
|
|
214
|
+
if self.target == "none":
|
215
|
+
return
|
210
216
|
self.util.copy_flags(self, self.df_test)
|
211
217
|
self.util.copy_flags(self, self.df_train)
|
212
218
|
# Try data checks
|
nkululeko/plots.py
CHANGED
@@ -263,7 +263,7 @@ class Plots:
|
|
263
263
|
|
264
264
|
def plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
|
265
265
|
"""Plot relation of categorical distribution with continuous."""
|
266
|
-
dist_type = self.util.config_val("EXPL", "dist_type", "
|
266
|
+
dist_type = self.util.config_val("EXPL", "dist_type", "kde")
|
267
267
|
cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
|
268
268
|
model_type = self.util.get_model_type()
|
269
269
|
if dist_type == "hist" and model_type != "tree":
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.90.0
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -356,6 +356,12 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
356
356
|
Changelog
|
357
357
|
=========
|
358
358
|
|
359
|
+
Version 0.90.0
|
360
|
+
--------------
|
361
|
+
* augmentation can now be done without target
|
362
|
+
* random splicing params configurable
|
363
|
+
* made kde default for plot continous/categorical plots
|
364
|
+
|
359
365
|
Version 0.89.2
|
360
366
|
--------------
|
361
367
|
* fix shap value calculation
|
@@ -1,13 +1,13 @@
|
|
1
1
|
nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
2
2
|
nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
|
3
|
-
nkululeko/augment.py,sha256=
|
3
|
+
nkululeko/augment.py,sha256=xNeOR22sXHD5mxv0SEe6kvgEXX0RtiUL4BK-m-BDfcM,3133
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=t11gtE4sZM7oZrRSZhmVhIKwQAI83AN0cEZRPXkw5cs,39
|
6
6
|
nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
|
7
7
|
nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
|
8
8
|
nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
|
9
9
|
nkululeko/ensemble.py,sha256=MayHpngGH_FTvSxUsH3NdxJd6WBAosGRFQeQ7cMjIco,12922
|
10
|
-
nkululeko/experiment.py,sha256=
|
10
|
+
nkululeko/experiment.py,sha256=BXUmJrJn17W-umYh4O0Jt6ZZzr2u_VDL7Lq7fPEEVMQ,31390
|
11
11
|
nkululeko/explore.py,sha256=AbTVDmuDIaLfALQGvDW1yndcw2ikaEVEZ_fJVuUS070,3940
|
12
12
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
13
13
|
nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
|
@@ -18,7 +18,7 @@ nkululeko/modelrunner.py,sha256=lJy-xM4QfDDWeL0dLTE_VIb4sYrnd_Z_yJRK3wwohQA,1119
|
|
18
18
|
nkululeko/multidb.py,sha256=CCjmVsZyvydgOztFlaeBvOJH8nsvU-sPQdFAw8-q0U4,6752
|
19
19
|
nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
|
20
20
|
nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
|
21
|
-
nkululeko/plots.py,sha256=
|
21
|
+
nkululeko/plots.py,sha256=p9YyN-xAtdGBKjcA305V0KOagAzG8VG6D_Ceoa9rae4,22964
|
22
22
|
nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
|
23
23
|
nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
|
24
24
|
nkululeko/runmanager.py,sha256=xvxL5a9d3jtGFqx0Z3nyyxowA368uNyP0ZitO8kxIIE,7581
|
@@ -30,7 +30,7 @@ nkululeko/test_predictor.py,sha256=DEHE_D3A6m6KJTrpDKceA1n655t_UZV3WQd57K4a3Ho,2
|
|
30
30
|
nkululeko/test_pretrain.py,sha256=ZWl-bR6nmeSmXkGAIE6zyfQEjN8Zg0rIxfaS-O6Zbas,8465
|
31
31
|
nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
32
|
nkululeko/augmenting/augmenter.py,sha256=XAt0dpmlnKxqyysqCgV3rcz-pRIvOz7rU7dmGDCVAzs,2905
|
33
|
-
nkululeko/augmenting/randomsplicer.py,sha256=
|
33
|
+
nkululeko/augmenting/randomsplicer.py,sha256=jmn4uZc2O_-A_O-GXz7lh0rHR6-2sD9eNG0vwgtRd2w,2861
|
34
34
|
nkululeko/augmenting/randomsplicing.py,sha256=ldym9vZNsZIU5BAAaJVaOmAgmVHNs4a5i5K3bW-WAQU,1791
|
35
35
|
nkululeko/augmenting/resampler.py,sha256=nOBsiQpX6p4jXsP7x6wak78F3B5YYYRmC_iHX8iuOXs,3542
|
36
36
|
nkululeko/autopredict/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -46,7 +46,7 @@ nkululeko/autopredict/ap_stoi.py,sha256=It0Lk-ki-gohA2AzD8nkLAN2WahYvD9rPDGTQuvd
|
|
46
46
|
nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzWeWW4VM,1024
|
47
47
|
nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
|
48
48
|
nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
|
-
nkululeko/data/dataset.py,sha256=
|
49
|
+
nkululeko/data/dataset.py,sha256=QqU1YoBQk41g3MV8bc0iW1YN_gMHDizuG-cjmSq0d_o,29455
|
50
50
|
nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo,4588
|
51
51
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
52
|
nkululeko/feat_extract/feats_agender.py,sha256=tMK3_qs8adylNNSR0CS1RjU9RxmpumLqmuyzmc2ZYjA,3184
|
@@ -107,8 +107,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
107
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
108
108
|
nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
|
109
109
|
nkululeko/utils/util.py,sha256=363Lgmcg6fPKCGbroX0DDyW_zcYNx-Ayqv67qdpfYcw,16710
|
110
|
-
nkululeko-0.
|
111
|
-
nkululeko-0.
|
112
|
-
nkululeko-0.
|
113
|
-
nkululeko-0.
|
114
|
-
nkululeko-0.
|
110
|
+
nkululeko-0.90.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
111
|
+
nkululeko-0.90.0.dist-info/METADATA,sha256=BiguFg1WzW9crNTqgr2qbtmaOL_PgeLP3M1Azn4Lehk,40900
|
112
|
+
nkululeko-0.90.0.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
113
|
+
nkululeko-0.90.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
114
|
+
nkululeko-0.90.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|