nkululeko 0.59.1__py3-none-any.whl → 0.61.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +43 -43
- nkululeko/feature_extractor.py +101 -58
- nkululeko/modelrunner.py +14 -14
- nkululeko/plots.py +11 -0
- nkululeko/segment.py +23 -27
- nkululeko/test_predictor.py +1 -1
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/METADATA +13 -1
- nkululeko-0.61.0.dist-info/RECORD +31 -0
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/WHEEL +1 -1
- nkululeko/ap_age.py +0 -31
- nkululeko/ap_arousal.py +0 -30
- nkululeko/ap_dominance.py +0 -29
- nkululeko/ap_gender.py +0 -29
- nkululeko/ap_mos.py +0 -35
- nkululeko/ap_pesq.py +0 -35
- nkululeko/ap_sdr.py +0 -36
- nkululeko/ap_snr.py +0 -35
- nkululeko/ap_stoi.py +0 -34
- nkululeko/ap_valence.py +0 -30
- nkululeko/augmenter.py +0 -64
- nkululeko/dataset.py +0 -415
- nkululeko/dataset_csv.py +0 -49
- nkululeko/dataset_ravdess.py +0 -19
- nkululeko/estimate_snr.py +0 -89
- nkululeko/feats_agender.py +0 -63
- nkululeko/feats_agender_agender.py +0 -65
- nkululeko/feats_analyser.py +0 -87
- nkululeko/feats_audmodel.py +0 -63
- nkululeko/feats_audmodel_dim.py +0 -63
- nkululeko/feats_clap.py +0 -74
- nkululeko/feats_import.py +0 -44
- nkululeko/feats_mld.py +0 -47
- nkululeko/feats_mos.py +0 -92
- nkululeko/feats_opensmile.py +0 -84
- nkululeko/feats_oxbow.py +0 -87
- nkululeko/feats_praat.py +0 -72
- nkululeko/feats_snr.py +0 -63
- nkululeko/feats_squim.py +0 -99
- nkululeko/feats_trill.py +0 -74
- nkululeko/feats_wav2vec2.py +0 -94
- nkululeko/featureset.py +0 -41
- nkululeko/feinberg_praat.py +0 -430
- nkululeko/loss_ccc.py +0 -28
- nkululeko/loss_softf1loss.py +0 -40
- nkululeko/model.py +0 -256
- nkululeko/model_bayes.py +0 -14
- nkululeko/model_cnn.py +0 -118
- nkululeko/model_gmm.py +0 -16
- nkululeko/model_knn.py +0 -16
- nkululeko/model_knn_reg.py +0 -16
- nkululeko/model_mlp.py +0 -175
- nkululeko/model_mlp_regression.py +0 -197
- nkululeko/model_svm.py +0 -18
- nkululeko/model_svr.py +0 -18
- nkululeko/model_tree.py +0 -14
- nkululeko/model_tree_reg.py +0 -14
- nkululeko/model_xgb.py +0 -12
- nkululeko/model_xgr.py +0 -12
- nkululeko/randomsplicer.py +0 -76
- nkululeko/randomsplicing.py +0 -74
- nkululeko-0.59.1.dist-info/RECORD +0 -82
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/LICENSE +0 -0
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '0.
|
1
|
+
VERSION = '0.61.0'
|
nkululeko/experiment.py
CHANGED
@@ -2,13 +2,13 @@ import random
|
|
2
2
|
import os
|
3
3
|
import time
|
4
4
|
import numpy as np
|
5
|
-
from nkululeko.dataset import Dataset
|
6
|
-
from nkululeko.dataset_csv import Dataset_CSV
|
7
|
-
from nkululeko.dataset_ravdess import Ravdess
|
5
|
+
from nkululeko.data.dataset import Dataset
|
6
|
+
from nkululeko.data.dataset_csv import Dataset_CSV
|
7
|
+
from nkululeko.data.dataset_ravdess import Ravdess
|
8
8
|
from nkululeko.filter_data import filter_min_dur
|
9
9
|
from nkululeko.runmanager import Runmanager
|
10
10
|
from nkululeko.test_predictor import Test_predictor
|
11
|
-
from nkululeko.feats_analyser import FeatureAnalyser
|
11
|
+
from nkululeko.feat_extract.feats_analyser import FeatureAnalyser
|
12
12
|
from nkululeko.util import Util
|
13
13
|
from nkululeko.feature_extractor import FeatureExtractor
|
14
14
|
from nkululeko.plots import Plots
|
@@ -148,32 +148,32 @@ class Experiment:
|
|
148
148
|
self.df_train = self._import_csv(storage_train)
|
149
149
|
else:
|
150
150
|
self.df_train, self.df_test = pd.DataFrame(), pd.DataFrame()
|
151
|
-
strategy = self.util.config_val('DATA', 'strategy', 'traintest')
|
152
|
-
# some datasets against others in their entierty
|
153
|
-
if strategy == 'cross_data':
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
elif strategy == 'traintest':
|
151
|
+
# strategy = self.util.config_val('DATA', 'strategy', 'traintest')
|
152
|
+
# # some datasets against others in their entierty
|
153
|
+
# if strategy == 'cross_data':
|
154
|
+
# train_dbs = ast.literal_eval(glob_conf.config['DATA']['trains'])
|
155
|
+
# test_dbs = ast.literal_eval(glob_conf.config['DATA']['tests'])
|
156
|
+
# for dn in train_dbs:
|
157
|
+
# d = self.datasets[dn]
|
158
|
+
# d.prepare_labels()
|
159
|
+
# self.df_train = self.df_train.append(self.util.make_segmented_index(d.df))
|
160
|
+
# self.util.copy_flags(d, self.df_train)
|
161
|
+
# for dn in test_dbs:
|
162
|
+
# d = self.datasets[dn]
|
163
|
+
# d.prepare_labels()
|
164
|
+
# self.df_test = self.df_test.append(self.util.make_segmented_index(d.df))
|
165
|
+
# self.util.copy_flags(d, self.df_test)
|
166
|
+
# elif strategy == 'traintest':
|
167
167
|
# default: train vs. test combined from all datasets
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
else:
|
176
|
-
|
168
|
+
for d in self.datasets.values():
|
169
|
+
d.split()
|
170
|
+
d.prepare_labels()
|
171
|
+
self.df_train = pd.concat([self.df_train, d.df_train])
|
172
|
+
self.util.copy_flags(d, self.df_train)
|
173
|
+
self.df_test = pd.concat([self.df_test, d.df_test])
|
174
|
+
self.util.copy_flags(d, self.df_test)
|
175
|
+
# else:
|
176
|
+
# self.util.error(f'unknown strategy: {strategy}')
|
177
177
|
# save the file lists to disk for later reuse
|
178
178
|
store = self.util.get_path('store')
|
179
179
|
storage_test = f'{store}testdf.csv'
|
@@ -313,7 +313,7 @@ class Experiment:
|
|
313
313
|
"""
|
314
314
|
Augment the selected samples
|
315
315
|
"""
|
316
|
-
from nkululeko.augmenter import Augmenter
|
316
|
+
from nkululeko.augmenting.augmenter import Augmenter
|
317
317
|
sample_selection = self.util.config_val('DATA', 'augment', 'train')
|
318
318
|
if sample_selection=='all':
|
319
319
|
df = pd.concat([self.df_train, self.df_test])
|
@@ -343,43 +343,43 @@ class Experiment:
|
|
343
343
|
targets = self.util.config_val_list('PREDICT', 'targets', ['gender'])
|
344
344
|
for target in targets:
|
345
345
|
if target == 'gender':
|
346
|
-
from nkululeko.ap_gender import GenderPredictor
|
346
|
+
from nkululeko.autopredict.ap_gender import GenderPredictor
|
347
347
|
predictor = GenderPredictor(df)
|
348
348
|
df = predictor.predict(sample_selection)
|
349
349
|
elif target == 'age':
|
350
|
-
from nkululeko.ap_age import AgePredictor
|
350
|
+
from nkululeko.autopredict.ap_age import AgePredictor
|
351
351
|
predictor = AgePredictor(df)
|
352
352
|
df = predictor.predict(sample_selection)
|
353
353
|
elif target == 'snr':
|
354
|
-
from nkululeko.ap_sdr import SNRPredictor
|
354
|
+
from nkululeko.autopredict.ap_sdr import SNRPredictor
|
355
355
|
predictor = SNRPredictor(df)
|
356
356
|
df = predictor.predict(sample_selection)
|
357
357
|
elif target == 'mos':
|
358
|
-
from nkululeko.ap_mos import MOSPredictor
|
358
|
+
from nkululeko.autopredict.ap_mos import MOSPredictor
|
359
359
|
predictor = MOSPredictor(df)
|
360
360
|
df = predictor.predict(sample_selection)
|
361
361
|
elif target == 'pesq':
|
362
|
-
from nkululeko.ap_pesq import PESQPredictor
|
362
|
+
from nkululeko.autopredict.ap_pesq import PESQPredictor
|
363
363
|
predictor = PESQPredictor(df)
|
364
364
|
df = predictor.predict(sample_selection)
|
365
365
|
elif target == 'sdr':
|
366
|
-
from nkululeko.ap_sdr import SDRPredictor
|
366
|
+
from nkululeko.autopredict.ap_sdr import SDRPredictor
|
367
367
|
predictor = SDRPredictor(df)
|
368
368
|
df = predictor.predict(sample_selection)
|
369
369
|
elif target == 'stoi':
|
370
|
-
from nkululeko.ap_stoi import STOIPredictor
|
370
|
+
from nkululeko.autopredict.ap_stoi import STOIPredictor
|
371
371
|
predictor = STOIPredictor(df)
|
372
372
|
df = predictor.predict(sample_selection)
|
373
373
|
elif target == 'arousal':
|
374
|
-
from nkululeko.ap_arousal import ArousalPredictor
|
374
|
+
from nkululeko.autopredict.ap_arousal import ArousalPredictor
|
375
375
|
predictor = ArousalPredictor(df)
|
376
376
|
df = predictor.predict(sample_selection)
|
377
377
|
elif target == 'valence':
|
378
|
-
from nkululeko.ap_valence import ValencePredictor
|
378
|
+
from nkululeko.autopredict.ap_valence import ValencePredictor
|
379
379
|
predictor = ValencePredictor(df)
|
380
380
|
df = predictor.predict(sample_selection)
|
381
381
|
elif target == 'dominance':
|
382
|
-
from nkululeko.ap_dominance import DominancePredictor
|
382
|
+
from nkululeko.autopredict.ap_dominance import DominancePredictor
|
383
383
|
predictor = DominancePredictor(df)
|
384
384
|
df = predictor.predict(sample_selection)
|
385
385
|
else:
|
@@ -390,7 +390,7 @@ class Experiment:
|
|
390
390
|
"""
|
391
391
|
Random-splice the selected samples
|
392
392
|
"""
|
393
|
-
from nkululeko.randomsplicer import Randomsplicer
|
393
|
+
from nkululeko.augmenting.randomsplicer import Randomsplicer
|
394
394
|
sample_selection = self.util.config_val('DATA', 'random_splice', 'train')
|
395
395
|
if sample_selection=='all':
|
396
396
|
df = pd.concat([self.df_train, self.df_test])
|
@@ -552,5 +552,5 @@ class Experiment:
|
|
552
552
|
f = open(filename, 'wb')
|
553
553
|
pickle.dump(self.__dict__, f)
|
554
554
|
f.close()
|
555
|
-
except (AttributeError, TypeError) as error:
|
556
|
-
self.util.
|
555
|
+
except (AttributeError, TypeError, RuntimeError) as error:
|
556
|
+
self.util.warn(f'Save experiment: Can\'t pickle local object: {error}')
|
nkululeko/feature_extractor.py
CHANGED
@@ -5,8 +5,9 @@ Helper class to encapsulate feature extraction methods
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
import pandas as pd
|
8
|
-
|
9
|
-
from nkululeko.feats_opensmile import Opensmileset
|
8
|
+
|
9
|
+
from nkululeko.feat_extract.feats_opensmile import Opensmileset
|
10
|
+
from nkululeko.util import Util
|
10
11
|
|
11
12
|
|
12
13
|
class FeatureExtractor:
|
@@ -16,80 +17,122 @@ class FeatureExtractor:
|
|
16
17
|
data_df (pandas.DataFrame): dataframe with audiofile paths as index
|
17
18
|
feats_types (array of strings): designations of acoustic feature extractors to be used
|
18
19
|
data_name (string): names of databases that are extracted (for the caching)
|
19
|
-
feats_designation (string): the type of split (train/test), also is used for the cache name.
|
20
|
+
feats_designation (string): the type of split (train/test), also is used for the cache name.
|
20
21
|
Returns:
|
21
|
-
df (pandas.DataFrame): dataframe with same index as data_df and acoustic features in columns
|
22
|
+
df (pandas.DataFrame): dataframe with same index as data_df and acoustic features in columns
|
22
23
|
"""
|
23
|
-
df = None # pandas dataframe to store the features (and indexed with the data from the sets)
|
24
|
-
data_df = None # dataframe to get audio paths
|
25
24
|
|
26
|
-
#
|
25
|
+
# pandas dataframe to store the features (and indexed with the data from the sets)
|
26
|
+
df = None
|
27
|
+
data_df = None # dataframe to get audio paths
|
28
|
+
|
29
|
+
# def __init__
|
27
30
|
def __init__(self, data_df, feats_types, data_name, feats_designation):
|
28
31
|
self.data_df = data_df
|
29
32
|
self.data_name = data_name
|
30
33
|
self.feats_types = feats_types
|
31
|
-
self.util = Util(
|
34
|
+
self.util = Util("feature_extractor")
|
32
35
|
self.feats_designation = feats_designation
|
33
|
-
|
36
|
+
|
34
37
|
def extract(self):
|
35
38
|
# feats_types = self.util.config_val_list('FEATS', 'type', ['os'])
|
36
39
|
self.featExtractor = None
|
37
|
-
self.feats= pd.DataFrame()
|
40
|
+
self.feats = pd.DataFrame()
|
38
41
|
_scale = True
|
39
42
|
for feats_type in self.feats_types:
|
40
|
-
store_name = f
|
41
|
-
if feats_type==
|
42
|
-
self.featExtractor = Opensmileset(
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
elif feats_type
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
43
|
+
store_name = f"{self.data_name}_{feats_type}"
|
44
|
+
if feats_type == "os":
|
45
|
+
self.featExtractor = Opensmileset(
|
46
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
47
|
+
)
|
48
|
+
elif feats_type == "trill":
|
49
|
+
from nkululeko.feat_extract.feats_trill import TRILLset
|
50
|
+
self.featExtractor = TRILLset(
|
51
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
52
|
+
)
|
53
|
+
elif feats_type == "wav2vec":
|
54
|
+
from nkululeko.feat_extract.feats_wav2vec2 import Wav2vec2
|
55
|
+
self.featExtractor = Wav2vec2(
|
56
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
57
|
+
)
|
58
|
+
elif feats_type in ("hubert", "hubert_ft", "hubert_large",
|
59
|
+
"hubert_xlarge", "hubert_xlarge_ft"):
|
60
|
+
from nkululeko.feat_extract.feats_hubert import Hubert
|
61
|
+
self.featExtractor = Hubert(
|
62
|
+
f"{store_name}_{self.feats_designation}", self.data_df,
|
63
|
+
feats_type
|
64
|
+
)
|
65
|
+
|
66
|
+
elif feats_type == "audmodel":
|
67
|
+
from nkululeko.feat_extract.feats_audmodel import AudModelSet
|
68
|
+
self.featExtractor = AudModelSet(
|
69
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
70
|
+
)
|
71
|
+
elif feats_type == "auddim":
|
72
|
+
from nkululeko.feat_extract.feats_audmodel_dim import \
|
73
|
+
AudModelDimSet
|
74
|
+
self.featExtractor = AudModelDimSet(
|
75
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
76
|
+
)
|
77
|
+
elif feats_type == "agender":
|
78
|
+
from nkululeko.feat_extract.feats_agender import \
|
79
|
+
AudModelAgenderSet
|
80
|
+
self.featExtractor = AudModelAgenderSet(
|
81
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
82
|
+
)
|
83
|
+
elif feats_type == "agender_agender":
|
84
|
+
from nkululeko.feat_extract.feats_agender_agender import \
|
85
|
+
AgenderAgenderSet
|
86
|
+
self.featExtractor = AgenderAgenderSet(
|
87
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
88
|
+
)
|
89
|
+
elif feats_type == "snr":
|
90
|
+
from nkululeko.feat_extract.feats_snr import SNRSet
|
91
|
+
self.featExtractor = SNRSet(
|
92
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
93
|
+
)
|
94
|
+
elif feats_type == "mos":
|
95
|
+
from nkululeko.feat_extract.feats_mos import MOSSet
|
96
|
+
self.featExtractor = MOSSet(
|
97
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
98
|
+
)
|
99
|
+
elif feats_type == "squim":
|
100
|
+
from nkululeko.feat_extract.feats_squim import SQUIMSet
|
101
|
+
self.featExtractor = SQUIMSet(
|
102
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
103
|
+
)
|
104
|
+
elif feats_type == "clap":
|
105
|
+
from nkululeko.feat_extract.feats_clap import Clap
|
106
|
+
self.featExtractor = Clap(
|
107
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
108
|
+
)
|
109
|
+
elif feats_type == "praat":
|
110
|
+
from nkululeko.feat_extract.feats_praat import Praatset
|
111
|
+
self.featExtractor = Praatset(
|
112
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
113
|
+
)
|
114
|
+
elif feats_type == "mld":
|
115
|
+
from nkululeko.feat_extract.feats_mld import MLD_set
|
116
|
+
self.featExtractor = MLD_set(
|
117
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
118
|
+
)
|
119
|
+
elif feats_type == "import":
|
120
|
+
from nkululeko.feat_extract.feats_import import Importset
|
121
|
+
self.featExtractor = Importset(
|
122
|
+
f"{store_name}_{self.feats_designation}", self.data_df
|
123
|
+
)
|
82
124
|
else:
|
83
|
-
self.util.error(f
|
125
|
+
self.util.error(f"unknown feats_type: {feats_type}")
|
84
126
|
|
85
127
|
self.featExtractor.extract()
|
86
128
|
self.featExtractor.filter()
|
87
129
|
# remove samples that were not extracted by MLD
|
88
|
-
#self.df_test = self.df_test.loc[self.df_test.index.intersection(featExtractor_test.df.index)]
|
89
|
-
#self.df_train = self.df_train.loc[self.df_train.index.intersection(featExtractor_train.df.index)]
|
90
|
-
self.util.debug(
|
91
|
-
|
130
|
+
# self.df_test = self.df_test.loc[self.df_test.index.intersection(featExtractor_test.df.index)]
|
131
|
+
# self.df_train = self.df_train.loc[self.df_train.index.intersection(featExtractor_train.df.index)]
|
132
|
+
self.util.debug(
|
133
|
+
f"{feats_type}: shape : {self.featExtractor.df.shape}")
|
134
|
+
self.feats = pd.concat([self.feats, self.featExtractor.df], axis=1)
|
92
135
|
return self.feats
|
93
136
|
|
94
137
|
def extract_sample(self, signal, sr):
|
95
|
-
return self.featExtractor.extract_sample(signal, sr)
|
138
|
+
return self.featExtractor.extract_sample(signal, sr)
|
nkululeko/modelrunner.py
CHANGED
@@ -59,44 +59,44 @@ class Modelrunner:
|
|
59
59
|
|
60
60
|
def _select_model(self, model_type):
|
61
61
|
if model_type=='svm':
|
62
|
-
from nkululeko.model_svm import SVM_model
|
62
|
+
from nkululeko.models.model_svm import SVM_model
|
63
63
|
self.model = SVM_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
64
64
|
elif model_type=='svr':
|
65
|
-
from nkululeko.model_svr import SVR_model
|
65
|
+
from nkululeko.models.model_svr import SVR_model
|
66
66
|
self.model = SVR_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
67
67
|
elif model_type=='xgb':
|
68
|
-
from nkululeko.model_xgb import XGB_model
|
68
|
+
from nkululeko.models.model_xgb import XGB_model
|
69
69
|
self.model = XGB_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
70
70
|
elif model_type=='xgr':
|
71
|
-
from nkululeko.model_xgr import XGR_model
|
71
|
+
from nkululeko.models.model_xgr import XGR_model
|
72
72
|
self.model = XGR_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
73
73
|
elif model_type=='bayes':
|
74
|
-
from nkululeko.model_bayes import Bayes_model
|
74
|
+
from nkululeko.models.model_bayes import Bayes_model
|
75
75
|
self.model = Bayes_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
76
76
|
elif model_type=='gmm':
|
77
|
-
from nkululeko.model_gmm import GMM_model
|
77
|
+
from nkululeko.models.model_gmm import GMM_model
|
78
78
|
self.model = GMM_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
79
79
|
elif model_type=='knn':
|
80
|
-
from nkululeko.model_knn import KNN_model
|
80
|
+
from nkululeko.models.model_knn import KNN_model
|
81
81
|
self.model = KNN_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
82
82
|
elif model_type=='knn_reg':
|
83
|
-
from nkululeko.model_knn_reg import KNN_reg_model
|
83
|
+
from nkululeko.models.model_knn_reg import KNN_reg_model
|
84
84
|
self.model = KNN_reg_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
85
85
|
elif model_type=='tree':
|
86
|
-
from nkululeko.model_tree import Tree_model
|
86
|
+
from nkululeko.models.model_tree import Tree_model
|
87
87
|
self.model = Tree_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
88
88
|
elif model_type=='tree_reg':
|
89
|
-
from nkululeko.model_tree_reg import Tree_reg_model
|
89
|
+
from nkululeko.models.model_tree_reg import Tree_reg_model
|
90
90
|
self.model = Tree_reg_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
91
91
|
elif model_type=='cnn':
|
92
|
-
from nkululeko.model_cnn import CNN_model
|
93
|
-
from nkululeko.model_cnn import CNN_model
|
92
|
+
from nkululeko.models.model_cnn import CNN_model
|
93
|
+
from nkululeko.models.model_cnn import CNN_model
|
94
94
|
self.model = CNN_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
95
95
|
elif model_type=='mlp':
|
96
|
-
from nkululeko.model_mlp import MLP_model
|
96
|
+
from nkululeko.models.model_mlp import MLP_model
|
97
97
|
self.model = MLP_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
98
98
|
elif model_type=='mlp_reg':
|
99
|
-
from nkululeko.model_mlp_regression import MLP_Reg_model
|
99
|
+
from nkululeko.models.model_mlp_regression import MLP_Reg_model
|
100
100
|
self.model = MLP_Reg_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
|
101
101
|
else:
|
102
102
|
self.util.error(f'unknown model type: \'{model_type}\'')
|
nkululeko/plots.py
CHANGED
@@ -95,6 +95,17 @@ class Plots():
|
|
95
95
|
else:
|
96
96
|
self.util.error(f'plot value counts: the plot distribution descriptor for {att} has more than 2 values')
|
97
97
|
|
98
|
+
def plot_durations(self, df, filename, sample_selection):
|
99
|
+
fig_dir = self.util.get_path('fig_dir')+'../' # one up because of the runs
|
100
|
+
ax = sns.histplot(df, x='duration', hue='class_label', kde=True)
|
101
|
+
ax.set_title(f'{sample_selection} {df.shape[0]}')
|
102
|
+
ax.set_xlabel(f'duration')
|
103
|
+
ax.set_ylabel(f'number of samples')
|
104
|
+
fig = ax.figure
|
105
|
+
plt.tight_layout()
|
106
|
+
plt.savefig(f'{fig_dir}{filename}_{sample_selection}.{self.format}')
|
107
|
+
plt.close(fig)
|
108
|
+
fig.clear()
|
98
109
|
|
99
110
|
def describe_df(self, name, df, target, filename):
|
100
111
|
"""Make a stacked barplot of samples and speakers per sex and target values. speaker, gender and target columns must be present"""
|
nkululeko/segment.py
CHANGED
@@ -7,25 +7,8 @@ from nkululeko.util import Util
|
|
7
7
|
from nkululeko.constants import VERSION
|
8
8
|
import argparse
|
9
9
|
import os
|
10
|
-
import torch
|
11
|
-
import audformat
|
12
|
-
from audformat.utils import to_filewise_index
|
13
|
-
from audformat import segmented_index
|
14
10
|
import pandas as pd
|
15
11
|
|
16
|
-
# initialize the VAD model
|
17
|
-
SAMPLING_RATE = 16000
|
18
|
-
torch.set_num_threads(1)
|
19
|
-
vad_model, vad_utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
|
20
|
-
model='silero_vad',
|
21
|
-
force_reload=False,
|
22
|
-
onnx=False)
|
23
|
-
(get_speech_timestamps,
|
24
|
-
save_audio,
|
25
|
-
read_audio,
|
26
|
-
VADIterator,
|
27
|
-
collect_chunks) = vad_utils
|
28
|
-
|
29
12
|
def main(src_dir):
|
30
13
|
parser = argparse.ArgumentParser(description='Call the nkululeko framework.')
|
31
14
|
parser.add_argument('--config', default='exp.ini', help='The base configuration')
|
@@ -60,7 +43,7 @@ def main(src_dir):
|
|
60
43
|
util.debug(f'train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}')
|
61
44
|
|
62
45
|
# segment
|
63
|
-
segment_target = util.config_val('
|
46
|
+
segment_target = util.config_val('SEGMENT', 'target', '_seg')
|
64
47
|
# this if a specific dataset is to be segmented
|
65
48
|
# segment_db = util.config_val('DATA', 'segment', False)
|
66
49
|
# if segment_db:
|
@@ -72,7 +55,8 @@ def main(src_dir):
|
|
72
55
|
# name = f'{dataset}{segment_target}'
|
73
56
|
# df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
|
74
57
|
|
75
|
-
|
58
|
+
segmenter = util.config_val('SEGMENT', 'method', 'silero')
|
59
|
+
sample_selection = util.config_val('SEGMENT', 'sample_selection', 'all')
|
76
60
|
if sample_selection=='all':
|
77
61
|
df = pd.concat([expr.df_train, expr.df_test])
|
78
62
|
elif sample_selection=='train':
|
@@ -81,26 +65,38 @@ def main(src_dir):
|
|
81
65
|
df = expr.df_test
|
82
66
|
else:
|
83
67
|
util.error(f'unknown segmentation selection specifier {sample_selection}, should be [all | train | test]')
|
68
|
+
# if "duration" not in df.columns:
|
69
|
+
# df = df.drop(columns=['duration'], inplace=True)
|
70
|
+
util.debug(f'segmenting {sample_selection}: {df.shape[0]} samples with {segmenter}')
|
71
|
+
if segmenter=='silero':
|
72
|
+
from nkululeko.segmenting.seg_silero import Silero_segmenter
|
73
|
+
segmenter = Silero_segmenter()
|
74
|
+
df_seg = segmenter.segment_dataframe(df)
|
75
|
+
|
76
|
+
else:
|
77
|
+
util.error(f'unkown segmenter: {segmenter}')
|
84
78
|
|
85
|
-
if "duration" not in df.columns:
|
86
|
-
df = df.drop(columns=['duration'], inplace=True)
|
87
|
-
util.debug(f'segmenting train and test set: {df.shape[0]} samples')
|
88
|
-
df_seg = segment_dataframe(df)
|
89
79
|
def calc_dur(x):
|
80
|
+
from datetime import datetime
|
90
81
|
starts = x[1]
|
91
82
|
ends = x[2]
|
92
83
|
return (ends - starts).total_seconds()
|
93
84
|
df_seg['duration'] = df_seg.index.to_series().map(lambda x:calc_dur(x))
|
85
|
+
if "duration" not in df.columns:
|
86
|
+
df['duration'] = df.index.to_series().map(lambda x:calc_dur(x))
|
87
|
+
num_before = df.shape[0]
|
88
|
+
num_after = df_seg.shape[0]
|
94
89
|
dataname = '_'.join(expr.datasets.keys())
|
95
90
|
name = f'{dataname}{segment_target}'
|
96
91
|
df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
|
92
|
+
from nkululeko.plots import Plots
|
93
|
+
plots = Plots()
|
94
|
+
plots.plot_durations(df, 'original_durations', sample_selection)
|
95
|
+
plots.plot_durations(df_seg, 'segmented_durations', sample_selection)
|
97
96
|
print('')
|
98
|
-
util.debug(f'saved {name}.csv to {expr.data_dir}, {
|
97
|
+
util.debug(f'saved {name}.csv to {expr.data_dir}, {num_after} samples (was {num_before})')
|
99
98
|
print('DONE')
|
100
99
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
100
|
def get_segmentation(file):
|
105
101
|
# print(f'segmenting {file[0]}')
|
106
102
|
print('.', end='')
|
nkululeko/test_predictor.py
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
import nkululeko.glob_conf as glob_conf
|
6
6
|
from nkululeko.util import Util
|
7
7
|
import pandas as pd
|
8
|
-
from nkululeko.dataset import Dataset
|
8
|
+
from nkululeko.data.dataset import Dataset
|
9
9
|
from nkululeko.feature_extractor import FeatureExtractor
|
10
10
|
from nkululeko.scaler import Scaler
|
11
11
|
import numpy as np
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.61.0
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -253,6 +253,18 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
|
|
253
253
|
Changelog
|
254
254
|
=========
|
255
255
|
|
256
|
+
Version 0.61.0
|
257
|
+
--------------
|
258
|
+
* added HUBERT embeddings
|
259
|
+
|
260
|
+
Version 0.60.0
|
261
|
+
--------------
|
262
|
+
* some bugfixes
|
263
|
+
* new package structure
|
264
|
+
* fixed wav2vec2 bugs
|
265
|
+
* removed "cross_data" strategy
|
266
|
+
|
267
|
+
|
256
268
|
Version 0.59.1
|
257
269
|
--------------
|
258
270
|
* bugfix, after fresh install, it seems some libraries have changed
|
@@ -0,0 +1,31 @@
|
|
1
|
+
nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
2
|
+
nkululeko/augment.py,sha256=ebv5QebGD8wLzXInvusjn4kFlET6-yXkYoF132BrubQ,1750
|
3
|
+
nkululeko/balancer.py,sha256=64ftZN68sMDfkvuovCDHpAHmSJgCO6Kdk9bwmpSisec,12
|
4
|
+
nkululeko/cacheddataset.py,sha256=bSJ_SDg7TxL89YL_pJXp-sFvdUXJtHuBTd5KSTE4AkQ,955
|
5
|
+
nkululeko/constants.py,sha256=xSkfM3CWCYGD40D6GhHHCrxrw0JNK-372YdiRKMknWs,18
|
6
|
+
nkululeko/demo.py,sha256=nGP3fUDXuW1ZF12AzMpzRWXct0rdqYRJVNgA9B_QWwA,1821
|
7
|
+
nkululeko/demo_predictor.py,sha256=VVxE2lf5lTkAP5qElG5U2bK6SdDzQ2Jmf0Vn_yHpSro,2302
|
8
|
+
nkululeko/experiment.py,sha256=9qStgy31svY4bBVZOkuJ0JFjEQ1sIT2ibIdJ6IVlfTI,25063
|
9
|
+
nkululeko/explore.py,sha256=aemOk5XYw7axQEJQfdABEUxN3img0NYSb8l6W-nDzZY,2090
|
10
|
+
nkululeko/feature_extractor.py,sha256=2LqPIiDAoaBRhjcKik2hjBEBVBsLLxx8blQvTD43TRg,6324
|
11
|
+
nkululeko/file_checker.py,sha256=Nw05SIp7Ez1U9ZeFhNGz0XivwKr43hHg1WsfzKsrFPQ,3510
|
12
|
+
nkululeko/filter_data.py,sha256=g7giEShbA-dr2ekVycW5WurFG-UaopJvDZWylKNZtpM,6717
|
13
|
+
nkululeko/glob_conf.py,sha256=2Tl0NZQeVeydDO8T2tuJC2lCv99wvaTVRX9Dy1Mq_L4,237
|
14
|
+
nkululeko/modelrunner.py,sha256=zVDi2-UyjtmU0_Ltf4lnPcECVtukuDVuZaj4pydqOBY,5478
|
15
|
+
nkululeko/nkululeko.py,sha256=O2Zw7u-Mb7VP9MPxAlhdTkXV2lW2kETIuSJp7mfj_Tc,1671
|
16
|
+
nkululeko/plots.py,sha256=hoOFLbWXpV5jGDWHEpy345_4vpaGKGMAv2JwvpNUxkw,11454
|
17
|
+
nkululeko/predict.py,sha256=3ei4wn2by0p9Vkv7cllMcszmEjSM2vX0T6x_5rlgT28,1851
|
18
|
+
nkululeko/reporter.py,sha256=359aeQWt0ZGLseaJnOfafYG8BrwumiM2Q58DWiaoyWQ,10177
|
19
|
+
nkululeko/result.py,sha256=Ey5FPsAyfnQVtzO_J6_4hkOAZ191YWmF_vXxlgNjCdc,406
|
20
|
+
nkululeko/runmanager.py,sha256=ll04dEu5Y1nOi8QOtmSiw3oxzcXeASdQsg0t-vxCny8,6765
|
21
|
+
nkululeko/scaler.py,sha256=6NQHbSQZO9HIfhYNlliuDRywjaEH_FVKHRskTJ2xr90,3021
|
22
|
+
nkululeko/segment.py,sha256=GGyovnZ75Sqd8TgBH5fi3fjRkVw_ygqBQD46Yn6GVQ4,4660
|
23
|
+
nkululeko/syllable_nuclei.py,sha256=vK9dj5deqRnyEmlZmhFtKPzqKVGNCgTnWaG8UDITKNg,9913
|
24
|
+
nkululeko/test.py,sha256=BbHGliDChAXqMe2oA579dJpyZSlPGAm5997lX_POboQ,1372
|
25
|
+
nkululeko/test_predictor.py,sha256=QwdAVPHNew9w5PD_sPFhhWVDTYRAbUE6fkAp58X8Hjg,2410
|
26
|
+
nkululeko/util.py,sha256=gZrNTF4C7hKkEMCC_hoNkEAhAViWzWebP8LsHRew7s4,9731
|
27
|
+
nkululeko-0.61.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
28
|
+
nkululeko-0.61.0.dist-info/METADATA,sha256=LXJjW9KpGkPum60eGuYW__gl5QKXVRhnm6RySrKo2b8,21680
|
29
|
+
nkululeko-0.61.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
30
|
+
nkululeko-0.61.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
31
|
+
nkululeko-0.61.0.dist-info/RECORD,,
|
nkululeko/ap_age.py
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
""""
|
2
|
-
A predictor for age.
|
3
|
-
Currently based on audEERING's agender model.
|
4
|
-
"""
|
5
|
-
from nkululeko.util import Util
|
6
|
-
from nkululeko.feature_extractor import FeatureExtractor
|
7
|
-
import ast
|
8
|
-
import nkululeko.glob_conf as glob_conf
|
9
|
-
class AgePredictor:
|
10
|
-
"""
|
11
|
-
AgePredictor
|
12
|
-
predicting age with the audEERING agender model
|
13
|
-
|
14
|
-
"""
|
15
|
-
def __init__(self, df):
|
16
|
-
self.df = df
|
17
|
-
self.util = Util('agePredictor')
|
18
|
-
|
19
|
-
|
20
|
-
def predict(self, split_selection):
|
21
|
-
self.util.debug(f'predicting age for {split_selection} samples')
|
22
|
-
feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
|
23
|
-
self.feature_extractor = FeatureExtractor(self.df, ['agender_agender'], feats_name, split_selection)
|
24
|
-
agender_df = self.feature_extractor.extract()
|
25
|
-
pred_age = agender_df.age * 100
|
26
|
-
# pred_gender = agender_df.drop('age', axis=1).idxmax(axis=1)
|
27
|
-
return_df = self.df.copy()
|
28
|
-
# return_df['gender_pred'] = pred_gender
|
29
|
-
return_df['age_pred'] = pred_age.astype('int')
|
30
|
-
return return_df
|
31
|
-
|