nkululeko 0.59.1__py3-none-any.whl → 0.61.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +43 -43
- nkululeko/feature_extractor.py +101 -58
- nkululeko/modelrunner.py +14 -14
- nkululeko/plots.py +11 -0
- nkululeko/segment.py +23 -27
- nkululeko/test_predictor.py +1 -1
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/METADATA +13 -1
- nkululeko-0.61.0.dist-info/RECORD +31 -0
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/WHEEL +1 -1
- nkululeko/ap_age.py +0 -31
- nkululeko/ap_arousal.py +0 -30
- nkululeko/ap_dominance.py +0 -29
- nkululeko/ap_gender.py +0 -29
- nkululeko/ap_mos.py +0 -35
- nkululeko/ap_pesq.py +0 -35
- nkululeko/ap_sdr.py +0 -36
- nkululeko/ap_snr.py +0 -35
- nkululeko/ap_stoi.py +0 -34
- nkululeko/ap_valence.py +0 -30
- nkululeko/augmenter.py +0 -64
- nkululeko/dataset.py +0 -415
- nkululeko/dataset_csv.py +0 -49
- nkululeko/dataset_ravdess.py +0 -19
- nkululeko/estimate_snr.py +0 -89
- nkululeko/feats_agender.py +0 -63
- nkululeko/feats_agender_agender.py +0 -65
- nkululeko/feats_analyser.py +0 -87
- nkululeko/feats_audmodel.py +0 -63
- nkululeko/feats_audmodel_dim.py +0 -63
- nkululeko/feats_clap.py +0 -74
- nkululeko/feats_import.py +0 -44
- nkululeko/feats_mld.py +0 -47
- nkululeko/feats_mos.py +0 -92
- nkululeko/feats_opensmile.py +0 -84
- nkululeko/feats_oxbow.py +0 -87
- nkululeko/feats_praat.py +0 -72
- nkululeko/feats_snr.py +0 -63
- nkululeko/feats_squim.py +0 -99
- nkululeko/feats_trill.py +0 -74
- nkululeko/feats_wav2vec2.py +0 -94
- nkululeko/featureset.py +0 -41
- nkululeko/feinberg_praat.py +0 -430
- nkululeko/loss_ccc.py +0 -28
- nkululeko/loss_softf1loss.py +0 -40
- nkululeko/model.py +0 -256
- nkululeko/model_bayes.py +0 -14
- nkululeko/model_cnn.py +0 -118
- nkululeko/model_gmm.py +0 -16
- nkululeko/model_knn.py +0 -16
- nkululeko/model_knn_reg.py +0 -16
- nkululeko/model_mlp.py +0 -175
- nkululeko/model_mlp_regression.py +0 -197
- nkululeko/model_svm.py +0 -18
- nkululeko/model_svr.py +0 -18
- nkululeko/model_tree.py +0 -14
- nkululeko/model_tree_reg.py +0 -14
- nkululeko/model_xgb.py +0 -12
- nkululeko/model_xgr.py +0 -12
- nkululeko/randomsplicer.py +0 -76
- nkululeko/randomsplicing.py +0 -74
- nkululeko-0.59.1.dist-info/RECORD +0 -82
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/LICENSE +0 -0
- {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/top_level.txt +0 -0
nkululeko/dataset.py
DELETED
@@ -1,415 +0,0 @@
|
|
1
|
-
# dataset.py
|
2
|
-
import audformat
|
3
|
-
#import audb
|
4
|
-
import pandas as pd
|
5
|
-
import ast
|
6
|
-
import os
|
7
|
-
from random import sample
|
8
|
-
from nkululeko.util import Util
|
9
|
-
from nkululeko.plots import Plots
|
10
|
-
import nkululeko.glob_conf as glob_conf
|
11
|
-
import os.path
|
12
|
-
from audformat.utils import duration
|
13
|
-
import nkululeko.filter_data as filter
|
14
|
-
from nkululeko.filter_data import DataFilter
|
15
|
-
|
16
|
-
class Dataset:
|
17
|
-
""" Class to represent datasets"""
|
18
|
-
name = '' # An identifier for the dataset
|
19
|
-
config = None # The configuration
|
20
|
-
db = None # The database object
|
21
|
-
df = None # The whole dataframe
|
22
|
-
df_train = None # The training split
|
23
|
-
df_test = None # The evaluation split
|
24
|
-
|
25
|
-
def __init__(self, name):
|
26
|
-
"""Constructor setting up name and configuration"""
|
27
|
-
self.name = name
|
28
|
-
self.target = glob_conf.config['DATA']['target']
|
29
|
-
self.util = Util('dataset')
|
30
|
-
self.plot = Plots()
|
31
|
-
self.limit = int(self.util.config_val_data(self.name, 'limit', 0))
|
32
|
-
self.start_fresh = eval(self.util.config_val('DATA', 'no_reuse', 'False'))
|
33
|
-
self.is_labeled, self.got_speaker, self.got_gender, self.got_age = False, False, False, False
|
34
|
-
|
35
|
-
|
36
|
-
def _get_tables(self):
|
37
|
-
tables = []
|
38
|
-
targets = self.util.config_val_data(self.name, 'target_tables', False)
|
39
|
-
if targets:
|
40
|
-
target_tables = ast.literal_eval(targets)
|
41
|
-
tables += target_tables
|
42
|
-
files = self.util.config_val_data(self.name, 'files_tables', False)
|
43
|
-
if files:
|
44
|
-
files_tables = ast.literal_eval(files)
|
45
|
-
tables += files_tables
|
46
|
-
tests = self.util.config_val_data(self.name, 'test_tables', False)
|
47
|
-
if tests:
|
48
|
-
test_tables = ast.literal_eval(tests)
|
49
|
-
tables += test_tables
|
50
|
-
trains = self.util.config_val_data(self.name, 'train_tables', False)
|
51
|
-
if trains:
|
52
|
-
train_tables = ast.literal_eval(trains)
|
53
|
-
tables += train_tables
|
54
|
-
return tables
|
55
|
-
|
56
|
-
def _load_db(self):
|
57
|
-
root = self.util.config_val_data(self.name, '', '')
|
58
|
-
self.util.debug(f'{self.name}: loading from {root}')
|
59
|
-
try:
|
60
|
-
self.db = audformat.Database.load(root)
|
61
|
-
except FileNotFoundError:
|
62
|
-
self.util.error( f'{self.name}: no database found at {root}')
|
63
|
-
return root
|
64
|
-
|
65
|
-
def load(self):
|
66
|
-
"""Load the dataframe with files, speakers and task labels"""
|
67
|
-
# store the dataframe
|
68
|
-
store = self.util.get_path('store')
|
69
|
-
store_file = f'{store}{self.name}.pkl'
|
70
|
-
self.root = self._load_db()
|
71
|
-
# self.got_speaker, self.got_gender = False, False
|
72
|
-
if not self.start_fresh and os.path.isfile(store_file):
|
73
|
-
self.util.debug(f'{self.name}: reusing previously stored file {store_file}')
|
74
|
-
self.df = pd.read_pickle(store_file)
|
75
|
-
self.is_labeled = self.target in self.df
|
76
|
-
self.got_gender = 'gender' in self.df
|
77
|
-
self.got_age = 'age' in self.df
|
78
|
-
self.got_speaker = 'speaker' in self.df
|
79
|
-
self.util.copy_flags(self, self.df)
|
80
|
-
self.util.debug(f'{self.name}: loaded with {self.df.shape[0]} '\
|
81
|
-
f'samples: got targets: {self.is_labeled}, got speakers: {self.got_speaker}, '\
|
82
|
-
f'got sexes: {self.got_gender}')
|
83
|
-
return
|
84
|
-
tables = self._get_tables()
|
85
|
-
self.util.debug(f'{self.name}: loading tables: {tables}')
|
86
|
-
#db = audb.load(root, )
|
87
|
-
# map the audio file paths
|
88
|
-
self.db.map_files(lambda x: os.path.join(self.root, x))
|
89
|
-
# the dataframes (potentially more than one) with at least the file names
|
90
|
-
df_files = self.util.config_val_data(self.name, 'files_tables', '[\'files\']')
|
91
|
-
df_files_tables = ast.literal_eval(df_files)
|
92
|
-
# The label for the target column
|
93
|
-
self.col_label = self.util.config_val_data(self.name, 'label', self.target)
|
94
|
-
df, self.is_labeled, self.got_speaker, self.got_gender, self.got_age = self._get_df_for_lists(self.db, df_files_tables)
|
95
|
-
if False in {self.is_labeled, self.got_speaker, self.got_gender, self.got_age}:
|
96
|
-
try :
|
97
|
-
# There might be a separate table with the targets, e.g. emotion or age
|
98
|
-
df_targets = self.util.config_val_data(self.name, 'target_tables', f'[\'{self.target}\']')
|
99
|
-
df_target_tables = ast.literal_eval(df_targets)
|
100
|
-
df_target, got_target2, got_speaker2, got_gender2, got_age2 = self._get_df_for_lists(self.db, df_target_tables)
|
101
|
-
self.is_labeled = got_target2 or self.is_labeled
|
102
|
-
self.got_speaker = got_speaker2 or self.got_speaker
|
103
|
-
self.got_gender = got_gender2 or self.got_gender
|
104
|
-
self.got_age = got_age2 or self.got_age
|
105
|
-
if got_target2:
|
106
|
-
df[self.target] = df_target[self.target]
|
107
|
-
if got_speaker2:
|
108
|
-
df['speaker'] = df_target['speaker']
|
109
|
-
if got_gender2:
|
110
|
-
df['gender'] = df_target['gender']
|
111
|
-
if got_age2:
|
112
|
-
df['age'] = df_target['age']
|
113
|
-
except audformat.core.errors.BadKeyError:
|
114
|
-
pass
|
115
|
-
|
116
|
-
if self.is_labeled:
|
117
|
-
# remember the target in case they get labelencoded later
|
118
|
-
df['class_label'] = df[self.target]
|
119
|
-
|
120
|
-
self.df = df
|
121
|
-
self.util.debug(f'Loaded database {self.name} with {df.shape[0]} '\
|
122
|
-
f'samples: got targets: {self.is_labeled}, got speakers: {self.got_speaker}, '\
|
123
|
-
f'got sexes: {self.got_gender}, got age: {self.got_age}')
|
124
|
-
|
125
|
-
|
126
|
-
def prepare(self):
|
127
|
-
# ensure segmented index
|
128
|
-
self.df = self.util.make_segmented_index(self.df)
|
129
|
-
self.util.copy_flags(self, self.df)
|
130
|
-
# add duration
|
131
|
-
if 'duration' not in self.df:
|
132
|
-
start = self.df.index.get_level_values(1)
|
133
|
-
end = self.df.index.get_level_values(2)
|
134
|
-
self.df['duration']=(end-start).total_seconds()
|
135
|
-
|
136
|
-
# Perform some filtering if desired
|
137
|
-
required = eval(self.util.config_val_data(self.name, 'required', 'False'))
|
138
|
-
if required:
|
139
|
-
pre = self.df.shape[0]
|
140
|
-
self.df = self.df[self.df[required].notna()]
|
141
|
-
post = self.df.shape[0]
|
142
|
-
self.util.debug(f'{self.name}: kept {post} samples with {required} (from {pre}, filtered {pre-post})')
|
143
|
-
|
144
|
-
datafilter = DataFilter(self.df)
|
145
|
-
self.df = datafilter.all_filters(data_name=self.name)
|
146
|
-
|
147
|
-
self.util.debug(f'{self.name}: loaded data with {self.df.shape[0]} '\
|
148
|
-
f'samples: got targets: {self.is_labeled}, got speakers: {self.got_speaker}, '\
|
149
|
-
f'got sexes: {self.got_gender}')
|
150
|
-
|
151
|
-
if self.got_speaker and self.util.config_val_data(self.name, 'rename_speakers', False):
|
152
|
-
# we might need to append the database name to all speakers in case other datbaases have the same speaker names
|
153
|
-
self.df.speaker = self.df.speaker.apply(lambda x: self.name+x)
|
154
|
-
|
155
|
-
# store the dataframe
|
156
|
-
store = self.util.get_path('store')
|
157
|
-
store_file = f'{store}{self.name}.pkl'
|
158
|
-
self.df.to_pickle(store_file)
|
159
|
-
|
160
|
-
|
161
|
-
def _get_df_for_lists(self, db, df_files):
|
162
|
-
is_labeled, got_speaker, got_gender, got_age = False, False, False, False
|
163
|
-
df = pd.DataFrame()
|
164
|
-
for table in df_files:
|
165
|
-
source_df = db.tables[table].df
|
166
|
-
# create a dataframe with the index (the filenames)
|
167
|
-
df_local = pd.DataFrame(index=source_df.index)
|
168
|
-
# try to get the targets from this dataframe
|
169
|
-
try:
|
170
|
-
# try to get the target values
|
171
|
-
df_local[self.target] = source_df[self.col_label]
|
172
|
-
is_labeled = True
|
173
|
-
except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
|
174
|
-
pass
|
175
|
-
try:
|
176
|
-
# try to get the speaker values
|
177
|
-
df_local['speaker'] = source_df['speaker']
|
178
|
-
got_speaker = True
|
179
|
-
except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
|
180
|
-
pass
|
181
|
-
try:
|
182
|
-
# try to get the gender values
|
183
|
-
df_local['gender'] = source_df['gender']
|
184
|
-
got_gender = True
|
185
|
-
except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
|
186
|
-
pass
|
187
|
-
try:
|
188
|
-
# try to get the age values
|
189
|
-
df_local['age'] = source_df['age']
|
190
|
-
got_age = True
|
191
|
-
except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
|
192
|
-
pass
|
193
|
-
try:
|
194
|
-
# also it might be possible that the sex is part of the speaker description
|
195
|
-
df_local['gender'] = db[table]['speaker'].get(map='gender')
|
196
|
-
got_gender = True
|
197
|
-
except (ValueError, audformat.errors.BadKeyError) as e:
|
198
|
-
pass
|
199
|
-
try:
|
200
|
-
# also it might be possible that the age is part of the speaker description
|
201
|
-
df_local['age'] = db[table]['speaker'].get(map='age')
|
202
|
-
got_age = True
|
203
|
-
except (ValueError, audformat.errors.BadKeyError) as e:
|
204
|
-
pass
|
205
|
-
try:
|
206
|
-
# same for the target, e.g. "age"
|
207
|
-
df_local[self.target] = db[table]['speaker'].get(map=self.target)
|
208
|
-
is_labeled = True
|
209
|
-
except (ValueError, audformat.core.errors.BadKeyError) as e:
|
210
|
-
pass
|
211
|
-
df = pd.concat([df, df_local])
|
212
|
-
return df, is_labeled, got_speaker, got_gender, got_age
|
213
|
-
|
214
|
-
|
215
|
-
def split(self):
|
216
|
-
"""Split the datbase into train and development set"""
|
217
|
-
store = self.util.get_path('store')
|
218
|
-
storage_test = f'{store}{self.name}_testdf.pkl'
|
219
|
-
storage_train = f'{store}{self.name}_traindf.pkl'
|
220
|
-
split_strategy = self.util.config_val_data(self.name,'split_strategy', 'database')
|
221
|
-
self.util.debug(f'splitting database {self.name} with strategy {split_strategy}')
|
222
|
-
# 'database' (default), 'speaker_split', 'specified', 'reuse'
|
223
|
-
if split_strategy != 'speaker_split' and not self.start_fresh:
|
224
|
-
# check if the splits have been computed previously (not for speaker split)
|
225
|
-
if os.path.isfile(storage_train) and os.path.isfile(storage_test):
|
226
|
-
# if self.util.config_val_data(self.name, 'test_tables', False):
|
227
|
-
self.util.debug(f'splits: reusing previously stored test file {storage_test}')
|
228
|
-
self.df_test = pd.read_pickle(storage_test)
|
229
|
-
self.util.debug(f'splits: reusing previously stored train file {storage_train}')
|
230
|
-
self.df_train = pd.read_pickle(storage_train)
|
231
|
-
|
232
|
-
return
|
233
|
-
elif os.path.isfile(storage_train):
|
234
|
-
self.util.debug(f'splits: reusing previously stored train file {storage_train}')
|
235
|
-
self.df_train = pd.read_pickle(storage_train)
|
236
|
-
self.df_test = pd.DataFrame()
|
237
|
-
return
|
238
|
-
elif os.path.isfile(storage_test):
|
239
|
-
self.util.debug(f'splits: reusing previously stored test file {storage_test}')
|
240
|
-
self.df_test = pd.read_pickle(storage_test)
|
241
|
-
self.df_train = pd.DataFrame()
|
242
|
-
return
|
243
|
-
if split_strategy == 'database':
|
244
|
-
# use the splits from the database
|
245
|
-
testdf = self.db.tables[self.target+'.test'].df
|
246
|
-
traindf = self.db.tables[self.target+'.train'].df
|
247
|
-
# use only the train and test samples that were not perhaps filtered out by an earlier processing step
|
248
|
-
self.df_test = self.df.loc[self.df.index.intersection(testdf.index)]
|
249
|
-
self.df_train = self.df.loc[self.df.index.intersection(traindf.index)]
|
250
|
-
elif split_strategy == 'train':
|
251
|
-
self.df_train = self.df
|
252
|
-
self.df_test = pd.DataFrame()
|
253
|
-
elif split_strategy == 'test':
|
254
|
-
self.df_test = self.df
|
255
|
-
self.df_train = pd.DataFrame()
|
256
|
-
elif split_strategy == 'specified':
|
257
|
-
traindf, testdf = pd.DataFrame(), pd.DataFrame()
|
258
|
-
# try to load some dataframes for testing
|
259
|
-
entry_test_tables = self.util.config_val_data(self.name, 'test_tables', False)
|
260
|
-
if entry_test_tables:
|
261
|
-
test_tables = ast.literal_eval(entry_test_tables)
|
262
|
-
for test_table in test_tables:
|
263
|
-
testdf = pd.concat([testdf, self.db.tables[test_table].df])
|
264
|
-
entry_train_tables = self.util.config_val_data(self.name, 'train_tables', False)
|
265
|
-
if entry_train_tables:
|
266
|
-
train_tables = ast.literal_eval(entry_train_tables)
|
267
|
-
for train_table in train_tables:
|
268
|
-
traindf = pd.concat([traindf, self.db.tables[train_table].df])
|
269
|
-
# use only the train and test samples that were not perhaps filtered out by an earlier processing step
|
270
|
-
#testdf.index.map(lambda x: os.path.join(self.root, x))
|
271
|
-
# testdf.index = testdf.index.to_series().apply(lambda x: self.root+x)
|
272
|
-
testdf = testdf.set_index(audformat.utils.to_segmented_index(testdf.index, allow_nat=False))
|
273
|
-
traindf = traindf.set_index(audformat.utils.to_segmented_index(traindf.index, allow_nat=False))
|
274
|
-
self.df_test = self.df.loc[self.df.index.intersection(testdf.index)]
|
275
|
-
self.df_train = self.df.loc[self.df.index.intersection(traindf.index)]
|
276
|
-
# it might be necessary to copy the target values
|
277
|
-
try:
|
278
|
-
self.df_test[self.target] = testdf[self.target]
|
279
|
-
except KeyError:
|
280
|
-
pass # if the dataframe is empty
|
281
|
-
try:
|
282
|
-
self.df_train[self.target] = traindf[self.target]
|
283
|
-
except KeyError:
|
284
|
-
pass # if the dataframe is empty
|
285
|
-
elif split_strategy == 'speaker_split':
|
286
|
-
self.split_speakers()
|
287
|
-
elif split_strategy == 'random':
|
288
|
-
self.random_split()
|
289
|
-
elif split_strategy == 'reuse':
|
290
|
-
self.util.debug(f'{self.name}: trying to reuse data splits')
|
291
|
-
self.df_test = pd.read_pickle(storage_test)
|
292
|
-
self.df_train = pd.read_pickle(storage_train)
|
293
|
-
|
294
|
-
if self.df_test.shape[0]>0:
|
295
|
-
self.df_test = self.finish_up(self.df_test, storage_test)
|
296
|
-
if self.df_train.shape[0]>0:
|
297
|
-
self.df_train = self.finish_up(self.df_train, storage_train)
|
298
|
-
|
299
|
-
self.util.debug(f'{self.name}: {self.df_test.shape[0]} samples in test and {self.df_train.shape[0]} samples in train')
|
300
|
-
|
301
|
-
|
302
|
-
def finish_up(self, df, storage):
|
303
|
-
# Bin target values if they are continuous but a classification experiment should be done
|
304
|
-
# self.check_continuous_classification(df)
|
305
|
-
# remember the splits for future use
|
306
|
-
df.is_labeled = self.is_labeled
|
307
|
-
self.df_test.is_labeled = self.is_labeled
|
308
|
-
df.to_pickle(storage)
|
309
|
-
return df
|
310
|
-
|
311
|
-
def split_speakers(self):
|
312
|
-
"""One way to split train and eval sets: Specify percentage of evaluation speakers"""
|
313
|
-
test_percent = int(self.util.config_val_data(self.name, 'testsplit', 50))
|
314
|
-
df = self.df
|
315
|
-
s_num = df.speaker.nunique()
|
316
|
-
test_num = int(s_num * (test_percent/100))
|
317
|
-
test_spkrs = sample(list(df.speaker.unique()), test_num)
|
318
|
-
self.df_test = df[df.speaker.isin(test_spkrs)]
|
319
|
-
self.df_train = df[~df.index.isin(self.df_test.index)]
|
320
|
-
self.util.debug(f'{self.name}: [{self.df_train.shape[0]}/{self.df_test.shape[0]}] samples in train/test')
|
321
|
-
# because this generates new train/test sample quantaties, the feature extraction has to be done again
|
322
|
-
glob_conf.config['FEATS']['needs_feature_extraction'] = 'True'
|
323
|
-
|
324
|
-
def random_split(self):
|
325
|
-
"""One way to split train and eval sets: Specify percentage of random samples"""
|
326
|
-
test_percent = int(self.util.config_val_data(self.name, 'testsplit', 50))
|
327
|
-
df = self.df
|
328
|
-
s_num = len(df)
|
329
|
-
test_num = int(s_num * (test_percent/100))
|
330
|
-
test_smpls = sample(list(df.index), test_num)
|
331
|
-
self.df_test = df[df.index.isin(test_smpls)]
|
332
|
-
self.df_train = df[~df.index.isin(self.df_test.index)]
|
333
|
-
self.util.debug(f'{self.name}: [{self.df_train.shape[0]}/{self.df_test.shape[0]}] samples in train/test')
|
334
|
-
# because this generates new train/test sample quantaties, the feature extraction has to be done again
|
335
|
-
glob_conf.config['FEATS']['needs_feature_extraction'] = 'True'
|
336
|
-
|
337
|
-
def _add_labels(self, df):
|
338
|
-
df.is_labeled = self.is_labeled
|
339
|
-
df.got_gender = self.got_gender
|
340
|
-
df.got_age = self.got_age
|
341
|
-
df.got_speaker = self.got_speaker
|
342
|
-
return df
|
343
|
-
|
344
|
-
def prepare_labels(self):
|
345
|
-
strategy = self.util.config_val('DATA', 'strategy', 'train_test')
|
346
|
-
only_tests = eval(self.util.config_val('DATA', 'tests', 'False'))
|
347
|
-
if strategy == 'cross_data' or only_tests:
|
348
|
-
self.df = self.map_labels(self.df)
|
349
|
-
# Bin target values if they are continuous but a classification experiment should be done
|
350
|
-
self.map_continuous_classification(self.df)
|
351
|
-
self.df = self._add_labels(self.df)
|
352
|
-
if self.util.config_val_data(self.name, 'value_counts', False):
|
353
|
-
if not self.got_gender or not self.got_speaker:
|
354
|
-
self.util.error('can\'t plot value counts if no speaker or gender is given')
|
355
|
-
else:
|
356
|
-
self.plot.describe_df(self.name, self.df, self.target, f'{self.name}_distplot')
|
357
|
-
elif strategy == 'train_test':
|
358
|
-
self.df_train = self.map_labels(self.df_train)
|
359
|
-
self.df_test = self.map_labels(self.df_test)
|
360
|
-
self.map_continuous_classification(self.df_train)
|
361
|
-
self.map_continuous_classification(self.df_test)
|
362
|
-
self.df_train = self._add_labels(self.df_train)
|
363
|
-
self.df_test = self._add_labels(self.df_test)
|
364
|
-
if self.util.config_val_data(self.name, 'value_counts', False):
|
365
|
-
if not self.got_gender or not self.got_speaker:
|
366
|
-
self.util.error('can\'t plot value counts if no speaker or gender is given')
|
367
|
-
else:
|
368
|
-
self.plot.describe_df(self.name, self.df_train, self.target, f'{self.name}_train_distplot')
|
369
|
-
self.plot.describe_df(self.name, self.df_test, self.target, f'{self.name}_test_distplot')
|
370
|
-
|
371
|
-
|
372
|
-
def map_labels(self, df):
|
373
|
-
pd.options.mode.chained_assignment = None
|
374
|
-
if df.shape[0]==0 or not self.util.exp_is_classification() \
|
375
|
-
or self.check_continuous_classification():
|
376
|
-
return df
|
377
|
-
"""Rename the labels and remove the ones that are not needed."""
|
378
|
-
target = glob_conf.config['DATA']['target']
|
379
|
-
# see if a special mapping should be used
|
380
|
-
mappings = self.util.config_val_data(self.name, 'mapping', False)
|
381
|
-
if mappings:
|
382
|
-
mapping = ast.literal_eval(mappings)
|
383
|
-
df[target] = df[target].map(mapping)
|
384
|
-
self.util.debug(f'{self.name}: mapped {mapping}')
|
385
|
-
# remove labels that are not in the labels list
|
386
|
-
labels = self.util.config_val('DATA', 'labels', False)
|
387
|
-
if labels:
|
388
|
-
labels = ast.literal_eval(labels)
|
389
|
-
df = df[df[target].isin(labels)]
|
390
|
-
else:
|
391
|
-
labels = df[target].unique().codes
|
392
|
-
# try:
|
393
|
-
# except KeyError:
|
394
|
-
# pass
|
395
|
-
# remember in case they get encoded later
|
396
|
-
df['class_label'] = df[target]
|
397
|
-
return df
|
398
|
-
|
399
|
-
def check_continuous_classification(self):
|
400
|
-
datatype = self.util.config_val('DATA', 'type', 'False')
|
401
|
-
if self.util.exp_is_classification() and datatype == 'continuous':
|
402
|
-
return True
|
403
|
-
return False
|
404
|
-
|
405
|
-
def map_continuous_classification(self, df):
|
406
|
-
"""Map labels to bins for continuous data that should be classified"""
|
407
|
-
if self.check_continuous_classification():
|
408
|
-
self.util.debug(f'{self.name}: binning continuous variable to categories')
|
409
|
-
cat_vals = self.util.continuous_to_categorical(df[self.target])
|
410
|
-
df[self.target] = cat_vals
|
411
|
-
labels = ast.literal_eval(glob_conf.config['DATA']['labels'])
|
412
|
-
df['class_label'] = df[self.target]
|
413
|
-
# print(df['class_label'].unique())
|
414
|
-
for i, l in enumerate(labels):
|
415
|
-
df['class_label'] = df['class_label'].replace(i, str(l))
|
nkululeko/dataset_csv.py
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
# dataset_csv.py
|
2
|
-
import pandas as pd
|
3
|
-
import os
|
4
|
-
from nkululeko.dataset import Dataset
|
5
|
-
import os.path
|
6
|
-
import audformat.utils
|
7
|
-
|
8
|
-
class Dataset_CSV(Dataset):
|
9
|
-
""" Class to represent datasets stored as a csv file"""
|
10
|
-
|
11
|
-
def load(self):
|
12
|
-
"""Load the dataframe with files, speakers and task labels"""
|
13
|
-
self.got_target, self.got_speaker, self.got_gender = False, False, False
|
14
|
-
root = self.util.config_val_data(self.name, '', '')
|
15
|
-
absolute_path = eval(self.util.config_val_data(self.name, 'absolute_path', True))
|
16
|
-
self.util.debug(f'loading {self.name}')
|
17
|
-
# df = pd.read_csv(root, index_col='file')
|
18
|
-
if not os.path.isabs(root):
|
19
|
-
exp_root = self.util.config_val('EXP', 'root', '')
|
20
|
-
root = exp_root + root
|
21
|
-
df = audformat.utils.read_csv(root)
|
22
|
-
if not absolute_path:
|
23
|
-
# add the root folder to the relative paths of the files
|
24
|
-
if audformat.index_type(df.index) == 'segmented':
|
25
|
-
df = df.set_index(df.index.set_levels(df.index.levels[0].map(lambda x: os.path.dirname(root)+'/'+x), 0))
|
26
|
-
else:
|
27
|
-
df = df.set_index(df.index.to_series().apply(lambda x: os.path.dirname(root)+'/'+x))
|
28
|
-
self.df = df
|
29
|
-
self.db = None
|
30
|
-
self.got_target = True
|
31
|
-
self.is_labeled = self.got_target
|
32
|
-
self.start_fresh = eval(self.util.config_val('DATA', 'no_reuse', 'False'))
|
33
|
-
if self.is_labeled and not 'class_label' in self.df.columns:
|
34
|
-
self.df['class_label'] = self.df[self.target]
|
35
|
-
if 'gender' in df.columns:
|
36
|
-
self.got_gender = True
|
37
|
-
if 'age' in df.columns:
|
38
|
-
self.got_age = True
|
39
|
-
if 'speaker' in df.columns:
|
40
|
-
self.got_speaker = True
|
41
|
-
ns = df['speaker'].nunique()
|
42
|
-
self.util.debug(f'num of speakers: {ns}')
|
43
|
-
|
44
|
-
self.util.debug(f'Loaded database {self.name} with {df.shape[0]} '\
|
45
|
-
f'samples: got targets: {self.got_target}, got speakers: {self.got_speaker}, '\
|
46
|
-
f'got sexes: {self.got_gender}')
|
47
|
-
|
48
|
-
def prepare(self):
|
49
|
-
super().prepare()
|
nkululeko/dataset_ravdess.py
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
# dataset.py
|
2
|
-
from nkululeko.dataset import Dataset
|
3
|
-
|
4
|
-
class Ravdess(Dataset):
|
5
|
-
"""Class to represent the Berlin EmoDB"""
|
6
|
-
name = 'ravdess' # The name
|
7
|
-
|
8
|
-
def __init__(self):
|
9
|
-
"""Constructor setting the name"""
|
10
|
-
Dataset.__init__(self, self.name)
|
11
|
-
|
12
|
-
def load(self):
|
13
|
-
Dataset.load(self)
|
14
|
-
df = self.df
|
15
|
-
prev = df.shape[0]
|
16
|
-
df = df[~df.index.str.contains('song')]
|
17
|
-
now = df.shape[0]
|
18
|
-
self.util.debug(f'removed {prev-now} songs from ravdess dataframe')
|
19
|
-
self.df = df
|
nkululeko/estimate_snr.py
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
# estimate.snr
|
2
|
-
import numpy as np
|
3
|
-
import audiofile
|
4
|
-
import matplotlib.pyplot as plt
|
5
|
-
from scipy.signal.windows import hamming
|
6
|
-
import argparse
|
7
|
-
|
8
|
-
|
9
|
-
class SNREstimator:
|
10
|
-
"""Estimate SNR from audio signal using log energy and energy thresholds
|
11
|
-
Args:
|
12
|
-
input_data (ndarray): Input audio signal
|
13
|
-
sample_rate (int): Sampling rate of input audio signal
|
14
|
-
window_size (int): Window size in samples
|
15
|
-
hop_size (int): Hop size in samples
|
16
|
-
|
17
|
-
Returns:
|
18
|
-
object: SNREstimator object
|
19
|
-
estimated_snr (float): Estimated SNR in dB, extracted from SNREstimator.estimate_snr()
|
20
|
-
|
21
|
-
Usage:
|
22
|
-
>>> input_data, sample_rate = audiofile.read('input.wav')
|
23
|
-
>>> snr_estimator = SNREstimator(input_data, sample_rate, window_size=320, hop_size=160)
|
24
|
-
>>> estimated_snr, log_energies, energy_threshold_low, energy_threshold_high = snr_estimator.estimate_snr()
|
25
|
-
"""
|
26
|
-
def __init__(self, input_data, sample_rate, window_size=320, hop_size=160):
|
27
|
-
self.audio_data = input_data
|
28
|
-
self.sample_rate = sample_rate
|
29
|
-
self.frame_length = window_size
|
30
|
-
self.hop_length = hop_size
|
31
|
-
|
32
|
-
def frame_audio(self, signal):
|
33
|
-
num_frames = 1 + (len(signal) - self.frame_length) // self.hop_length
|
34
|
-
frames = [signal[i * self.hop_length: (i * self.hop_length) + self.frame_length] for i in range(num_frames)]
|
35
|
-
return frames
|
36
|
-
|
37
|
-
def calculate_log_energy(self, frame):
|
38
|
-
energy = np.sum(frame ** 2)
|
39
|
-
return np.log(energy)
|
40
|
-
|
41
|
-
def calculate_snr(self, energy_high, energy_low):
|
42
|
-
return 10 * np.log10(energy_high / energy_low)
|
43
|
-
|
44
|
-
def estimate_snr(self):
|
45
|
-
frames = self.frame_audio(self.audio_data)
|
46
|
-
log_energies = [self.calculate_log_energy(frame * hamming(self.frame_length)) for frame in frames]
|
47
|
-
|
48
|
-
energy_threshold_low = np.percentile(log_energies, 25) #First quartile
|
49
|
-
energy_threshold_high = np.percentile(log_energies, 75) #Third quartile
|
50
|
-
|
51
|
-
low_energy_frames = [log_energy for log_energy in log_energies if log_energy <= energy_threshold_low]
|
52
|
-
high_energy_frames = [log_energy for log_energy in log_energies if log_energy >= energy_threshold_high]
|
53
|
-
|
54
|
-
mean_low_energy = np.mean(low_energy_frames)
|
55
|
-
mean_high_energy = np.mean(high_energy_frames)
|
56
|
-
|
57
|
-
estimated_snr = self.calculate_snr(np.exp(mean_high_energy), np.exp(mean_low_energy))
|
58
|
-
return estimated_snr, log_energies, energy_threshold_low, energy_threshold_high
|
59
|
-
|
60
|
-
def plot_energy(self, log_energies, energy_threshold_low, energy_threshold_high):
|
61
|
-
plt.figure(figsize=(10, 6))
|
62
|
-
plt.plot(log_energies, label='Log Energy')
|
63
|
-
plt.axhline(y=energy_threshold_low, color='r', linestyle='--', label='Low Energy Threshold (25th Percentile)')
|
64
|
-
plt.axhline(y=energy_threshold_high, color='g', linestyle='--', label='High Energy Threshold (75th Percentile)')
|
65
|
-
plt.xlabel('Frame')
|
66
|
-
plt.ylabel('Log Energy')
|
67
|
-
plt.title('Log Energy and Energy Thresholds')
|
68
|
-
plt.legend()
|
69
|
-
plt.show()
|
70
|
-
|
71
|
-
def main():
|
72
|
-
parser = argparse.ArgumentParser(description='Estimate SNR from audio signal', usage='python3 estimate_snr.py -i <input_file> -ws <window_size> -hs <hop_size> -p')
|
73
|
-
parser.add_argument('-i', '--input', required=True, help='Input audio file in WAV format')
|
74
|
-
parser.add_argument('-ws', '--window_size', type=int, default=int(0.02 * 16000), help='Window size in samples (default: 320)')
|
75
|
-
parser.add_argument('-hs', '--hop_size', type=int, default=int(0.01 * 16000), help='Hop size in samples (default: 160)')
|
76
|
-
parser.add_argument('-p', '--plot', action='store_true', help='Plot log energy and energy thresholds')
|
77
|
-
args = parser.parse_args()
|
78
|
-
|
79
|
-
signal, sr = audiofile.read(args.input)
|
80
|
-
snr_estimator = SNREstimator(signal, sr, args.window_size, args.hop_size)
|
81
|
-
estimated_snr, log_energies, energy_threshold_low, energy_threshold_high = snr_estimator.estimate_snr()
|
82
|
-
|
83
|
-
print("Estimated SNR:", estimated_snr)
|
84
|
-
|
85
|
-
if args.plot:
|
86
|
-
snr_estimator.plot_energy(log_energies, energy_threshold_low, energy_threshold_high)
|
87
|
-
|
88
|
-
if __name__ == '__main__':
|
89
|
-
main()
|
nkululeko/feats_agender.py
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
# feats_agender.py
|
2
|
-
from nkululeko.featureset import Featureset
|
3
|
-
import os
|
4
|
-
import pandas as pd
|
5
|
-
import audeer
|
6
|
-
import nkululeko.glob_conf as glob_conf
|
7
|
-
import audonnx
|
8
|
-
import numpy as np
|
9
|
-
import audinterface
|
10
|
-
|
11
|
-
class AudModelAgenderSet(Featureset):
|
12
|
-
"""
|
13
|
-
Embeddings from the wav2vec2. based model finetuned on agender data, described in the paper
|
14
|
-
"Speech-based Age and Gender Prediction with Transformers"
|
15
|
-
https://arxiv.org/abs/2306.16962
|
16
|
-
"""
|
17
|
-
def __init__(self, name, data_df):
|
18
|
-
super().__init__(name, data_df)
|
19
|
-
model_url = 'https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip'
|
20
|
-
model_root = self.util.config_val('FEATS', 'agender.model', './audmodel_agender/')
|
21
|
-
if not os.path.isdir(model_root):
|
22
|
-
cache_root = audeer.mkdir('cache')
|
23
|
-
model_root = audeer.mkdir(model_root)
|
24
|
-
archive_path = audeer.download_url(model_url, cache_root, verbose=True)
|
25
|
-
audeer.extract_archive(archive_path, model_root)
|
26
|
-
device = self.util.config_val('MODEL', 'device', 'cpu')
|
27
|
-
self.model = audonnx.load(model_root, device=device)
|
28
|
-
|
29
|
-
|
30
|
-
def extract(self):
|
31
|
-
"""Extract the features based on the initialized dataset or re-open them when found on disk."""
|
32
|
-
store = self.util.get_path('store')
|
33
|
-
store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
|
34
|
-
storage = f'{store}{self.name}.{store_format}'
|
35
|
-
extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', 'False'))
|
36
|
-
no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
|
37
|
-
if no_reuse or extract or not os.path.isfile(storage):
|
38
|
-
self.util.debug('extracting agender model embeddings, this might take a while...')
|
39
|
-
hidden_states = audinterface.Feature(
|
40
|
-
self.model.labels('hidden_states'),
|
41
|
-
process_func=self.model,
|
42
|
-
process_func_args={
|
43
|
-
'outputs': 'hidden_states',
|
44
|
-
},
|
45
|
-
sampling_rate=16000,
|
46
|
-
resample=True,
|
47
|
-
num_workers=5,
|
48
|
-
verbose=True,
|
49
|
-
)
|
50
|
-
self.df = hidden_states.process_index(self.data_df.index)
|
51
|
-
self.util.write_store(self.df, storage, store_format)
|
52
|
-
try:
|
53
|
-
glob_conf.config['DATA']['needs_feature_extraction'] = 'False'
|
54
|
-
except KeyError:
|
55
|
-
pass
|
56
|
-
else:
|
57
|
-
self.util.debug('reusing extracted audmodel features.')
|
58
|
-
self.df = self.util.get_store(storage, store_format)
|
59
|
-
|
60
|
-
|
61
|
-
def extract_sample(self, signal, sr):
|
62
|
-
result = self.model(signal, sr)
|
63
|
-
return np.asarray(result['hidden_states'].flatten())
|