nkululeko 0.59.1__py3-none-any.whl → 0.61.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. nkululeko/constants.py +1 -1
  2. nkululeko/experiment.py +43 -43
  3. nkululeko/feature_extractor.py +101 -58
  4. nkululeko/modelrunner.py +14 -14
  5. nkululeko/plots.py +11 -0
  6. nkululeko/segment.py +23 -27
  7. nkululeko/test_predictor.py +1 -1
  8. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/METADATA +13 -1
  9. nkululeko-0.61.0.dist-info/RECORD +31 -0
  10. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/WHEEL +1 -1
  11. nkululeko/ap_age.py +0 -31
  12. nkululeko/ap_arousal.py +0 -30
  13. nkululeko/ap_dominance.py +0 -29
  14. nkululeko/ap_gender.py +0 -29
  15. nkululeko/ap_mos.py +0 -35
  16. nkululeko/ap_pesq.py +0 -35
  17. nkululeko/ap_sdr.py +0 -36
  18. nkululeko/ap_snr.py +0 -35
  19. nkululeko/ap_stoi.py +0 -34
  20. nkululeko/ap_valence.py +0 -30
  21. nkululeko/augmenter.py +0 -64
  22. nkululeko/dataset.py +0 -415
  23. nkululeko/dataset_csv.py +0 -49
  24. nkululeko/dataset_ravdess.py +0 -19
  25. nkululeko/estimate_snr.py +0 -89
  26. nkululeko/feats_agender.py +0 -63
  27. nkululeko/feats_agender_agender.py +0 -65
  28. nkululeko/feats_analyser.py +0 -87
  29. nkululeko/feats_audmodel.py +0 -63
  30. nkululeko/feats_audmodel_dim.py +0 -63
  31. nkululeko/feats_clap.py +0 -74
  32. nkululeko/feats_import.py +0 -44
  33. nkululeko/feats_mld.py +0 -47
  34. nkululeko/feats_mos.py +0 -92
  35. nkululeko/feats_opensmile.py +0 -84
  36. nkululeko/feats_oxbow.py +0 -87
  37. nkululeko/feats_praat.py +0 -72
  38. nkululeko/feats_snr.py +0 -63
  39. nkululeko/feats_squim.py +0 -99
  40. nkululeko/feats_trill.py +0 -74
  41. nkululeko/feats_wav2vec2.py +0 -94
  42. nkululeko/featureset.py +0 -41
  43. nkululeko/feinberg_praat.py +0 -430
  44. nkululeko/loss_ccc.py +0 -28
  45. nkululeko/loss_softf1loss.py +0 -40
  46. nkululeko/model.py +0 -256
  47. nkululeko/model_bayes.py +0 -14
  48. nkululeko/model_cnn.py +0 -118
  49. nkululeko/model_gmm.py +0 -16
  50. nkululeko/model_knn.py +0 -16
  51. nkululeko/model_knn_reg.py +0 -16
  52. nkululeko/model_mlp.py +0 -175
  53. nkululeko/model_mlp_regression.py +0 -197
  54. nkululeko/model_svm.py +0 -18
  55. nkululeko/model_svr.py +0 -18
  56. nkululeko/model_tree.py +0 -14
  57. nkululeko/model_tree_reg.py +0 -14
  58. nkululeko/model_xgb.py +0 -12
  59. nkululeko/model_xgr.py +0 -12
  60. nkululeko/randomsplicer.py +0 -76
  61. nkululeko/randomsplicing.py +0 -74
  62. nkululeko-0.59.1.dist-info/RECORD +0 -82
  63. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/LICENSE +0 -0
  64. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/top_level.txt +0 -0
nkululeko/dataset.py DELETED
@@ -1,415 +0,0 @@
1
- # dataset.py
2
- import audformat
3
- #import audb
4
- import pandas as pd
5
- import ast
6
- import os
7
- from random import sample
8
- from nkululeko.util import Util
9
- from nkululeko.plots import Plots
10
- import nkululeko.glob_conf as glob_conf
11
- import os.path
12
- from audformat.utils import duration
13
- import nkululeko.filter_data as filter
14
- from nkululeko.filter_data import DataFilter
15
-
16
- class Dataset:
17
- """ Class to represent datasets"""
18
- name = '' # An identifier for the dataset
19
- config = None # The configuration
20
- db = None # The database object
21
- df = None # The whole dataframe
22
- df_train = None # The training split
23
- df_test = None # The evaluation split
24
-
25
- def __init__(self, name):
26
- """Constructor setting up name and configuration"""
27
- self.name = name
28
- self.target = glob_conf.config['DATA']['target']
29
- self.util = Util('dataset')
30
- self.plot = Plots()
31
- self.limit = int(self.util.config_val_data(self.name, 'limit', 0))
32
- self.start_fresh = eval(self.util.config_val('DATA', 'no_reuse', 'False'))
33
- self.is_labeled, self.got_speaker, self.got_gender, self.got_age = False, False, False, False
34
-
35
-
36
- def _get_tables(self):
37
- tables = []
38
- targets = self.util.config_val_data(self.name, 'target_tables', False)
39
- if targets:
40
- target_tables = ast.literal_eval(targets)
41
- tables += target_tables
42
- files = self.util.config_val_data(self.name, 'files_tables', False)
43
- if files:
44
- files_tables = ast.literal_eval(files)
45
- tables += files_tables
46
- tests = self.util.config_val_data(self.name, 'test_tables', False)
47
- if tests:
48
- test_tables = ast.literal_eval(tests)
49
- tables += test_tables
50
- trains = self.util.config_val_data(self.name, 'train_tables', False)
51
- if trains:
52
- train_tables = ast.literal_eval(trains)
53
- tables += train_tables
54
- return tables
55
-
56
- def _load_db(self):
57
- root = self.util.config_val_data(self.name, '', '')
58
- self.util.debug(f'{self.name}: loading from {root}')
59
- try:
60
- self.db = audformat.Database.load(root)
61
- except FileNotFoundError:
62
- self.util.error( f'{self.name}: no database found at {root}')
63
- return root
64
-
65
- def load(self):
66
- """Load the dataframe with files, speakers and task labels"""
67
- # store the dataframe
68
- store = self.util.get_path('store')
69
- store_file = f'{store}{self.name}.pkl'
70
- self.root = self._load_db()
71
- # self.got_speaker, self.got_gender = False, False
72
- if not self.start_fresh and os.path.isfile(store_file):
73
- self.util.debug(f'{self.name}: reusing previously stored file {store_file}')
74
- self.df = pd.read_pickle(store_file)
75
- self.is_labeled = self.target in self.df
76
- self.got_gender = 'gender' in self.df
77
- self.got_age = 'age' in self.df
78
- self.got_speaker = 'speaker' in self.df
79
- self.util.copy_flags(self, self.df)
80
- self.util.debug(f'{self.name}: loaded with {self.df.shape[0]} '\
81
- f'samples: got targets: {self.is_labeled}, got speakers: {self.got_speaker}, '\
82
- f'got sexes: {self.got_gender}')
83
- return
84
- tables = self._get_tables()
85
- self.util.debug(f'{self.name}: loading tables: {tables}')
86
- #db = audb.load(root, )
87
- # map the audio file paths
88
- self.db.map_files(lambda x: os.path.join(self.root, x))
89
- # the dataframes (potentially more than one) with at least the file names
90
- df_files = self.util.config_val_data(self.name, 'files_tables', '[\'files\']')
91
- df_files_tables = ast.literal_eval(df_files)
92
- # The label for the target column
93
- self.col_label = self.util.config_val_data(self.name, 'label', self.target)
94
- df, self.is_labeled, self.got_speaker, self.got_gender, self.got_age = self._get_df_for_lists(self.db, df_files_tables)
95
- if False in {self.is_labeled, self.got_speaker, self.got_gender, self.got_age}:
96
- try :
97
- # There might be a separate table with the targets, e.g. emotion or age
98
- df_targets = self.util.config_val_data(self.name, 'target_tables', f'[\'{self.target}\']')
99
- df_target_tables = ast.literal_eval(df_targets)
100
- df_target, got_target2, got_speaker2, got_gender2, got_age2 = self._get_df_for_lists(self.db, df_target_tables)
101
- self.is_labeled = got_target2 or self.is_labeled
102
- self.got_speaker = got_speaker2 or self.got_speaker
103
- self.got_gender = got_gender2 or self.got_gender
104
- self.got_age = got_age2 or self.got_age
105
- if got_target2:
106
- df[self.target] = df_target[self.target]
107
- if got_speaker2:
108
- df['speaker'] = df_target['speaker']
109
- if got_gender2:
110
- df['gender'] = df_target['gender']
111
- if got_age2:
112
- df['age'] = df_target['age']
113
- except audformat.core.errors.BadKeyError:
114
- pass
115
-
116
- if self.is_labeled:
117
- # remember the target in case they get labelencoded later
118
- df['class_label'] = df[self.target]
119
-
120
- self.df = df
121
- self.util.debug(f'Loaded database {self.name} with {df.shape[0]} '\
122
- f'samples: got targets: {self.is_labeled}, got speakers: {self.got_speaker}, '\
123
- f'got sexes: {self.got_gender}, got age: {self.got_age}')
124
-
125
-
126
- def prepare(self):
127
- # ensure segmented index
128
- self.df = self.util.make_segmented_index(self.df)
129
- self.util.copy_flags(self, self.df)
130
- # add duration
131
- if 'duration' not in self.df:
132
- start = self.df.index.get_level_values(1)
133
- end = self.df.index.get_level_values(2)
134
- self.df['duration']=(end-start).total_seconds()
135
-
136
- # Perform some filtering if desired
137
- required = eval(self.util.config_val_data(self.name, 'required', 'False'))
138
- if required:
139
- pre = self.df.shape[0]
140
- self.df = self.df[self.df[required].notna()]
141
- post = self.df.shape[0]
142
- self.util.debug(f'{self.name}: kept {post} samples with {required} (from {pre}, filtered {pre-post})')
143
-
144
- datafilter = DataFilter(self.df)
145
- self.df = datafilter.all_filters(data_name=self.name)
146
-
147
- self.util.debug(f'{self.name}: loaded data with {self.df.shape[0]} '\
148
- f'samples: got targets: {self.is_labeled}, got speakers: {self.got_speaker}, '\
149
- f'got sexes: {self.got_gender}')
150
-
151
- if self.got_speaker and self.util.config_val_data(self.name, 'rename_speakers', False):
152
- # we might need to append the database name to all speakers in case other datbaases have the same speaker names
153
- self.df.speaker = self.df.speaker.apply(lambda x: self.name+x)
154
-
155
- # store the dataframe
156
- store = self.util.get_path('store')
157
- store_file = f'{store}{self.name}.pkl'
158
- self.df.to_pickle(store_file)
159
-
160
-
161
- def _get_df_for_lists(self, db, df_files):
162
- is_labeled, got_speaker, got_gender, got_age = False, False, False, False
163
- df = pd.DataFrame()
164
- for table in df_files:
165
- source_df = db.tables[table].df
166
- # create a dataframe with the index (the filenames)
167
- df_local = pd.DataFrame(index=source_df.index)
168
- # try to get the targets from this dataframe
169
- try:
170
- # try to get the target values
171
- df_local[self.target] = source_df[self.col_label]
172
- is_labeled = True
173
- except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
174
- pass
175
- try:
176
- # try to get the speaker values
177
- df_local['speaker'] = source_df['speaker']
178
- got_speaker = True
179
- except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
180
- pass
181
- try:
182
- # try to get the gender values
183
- df_local['gender'] = source_df['gender']
184
- got_gender = True
185
- except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
186
- pass
187
- try:
188
- # try to get the age values
189
- df_local['age'] = source_df['age']
190
- got_age = True
191
- except (KeyError, ValueError, audformat.errors.BadKeyError) as e:
192
- pass
193
- try:
194
- # also it might be possible that the sex is part of the speaker description
195
- df_local['gender'] = db[table]['speaker'].get(map='gender')
196
- got_gender = True
197
- except (ValueError, audformat.errors.BadKeyError) as e:
198
- pass
199
- try:
200
- # also it might be possible that the age is part of the speaker description
201
- df_local['age'] = db[table]['speaker'].get(map='age')
202
- got_age = True
203
- except (ValueError, audformat.errors.BadKeyError) as e:
204
- pass
205
- try:
206
- # same for the target, e.g. "age"
207
- df_local[self.target] = db[table]['speaker'].get(map=self.target)
208
- is_labeled = True
209
- except (ValueError, audformat.core.errors.BadKeyError) as e:
210
- pass
211
- df = pd.concat([df, df_local])
212
- return df, is_labeled, got_speaker, got_gender, got_age
213
-
214
-
215
- def split(self):
216
- """Split the datbase into train and development set"""
217
- store = self.util.get_path('store')
218
- storage_test = f'{store}{self.name}_testdf.pkl'
219
- storage_train = f'{store}{self.name}_traindf.pkl'
220
- split_strategy = self.util.config_val_data(self.name,'split_strategy', 'database')
221
- self.util.debug(f'splitting database {self.name} with strategy {split_strategy}')
222
- # 'database' (default), 'speaker_split', 'specified', 'reuse'
223
- if split_strategy != 'speaker_split' and not self.start_fresh:
224
- # check if the splits have been computed previously (not for speaker split)
225
- if os.path.isfile(storage_train) and os.path.isfile(storage_test):
226
- # if self.util.config_val_data(self.name, 'test_tables', False):
227
- self.util.debug(f'splits: reusing previously stored test file {storage_test}')
228
- self.df_test = pd.read_pickle(storage_test)
229
- self.util.debug(f'splits: reusing previously stored train file {storage_train}')
230
- self.df_train = pd.read_pickle(storage_train)
231
-
232
- return
233
- elif os.path.isfile(storage_train):
234
- self.util.debug(f'splits: reusing previously stored train file {storage_train}')
235
- self.df_train = pd.read_pickle(storage_train)
236
- self.df_test = pd.DataFrame()
237
- return
238
- elif os.path.isfile(storage_test):
239
- self.util.debug(f'splits: reusing previously stored test file {storage_test}')
240
- self.df_test = pd.read_pickle(storage_test)
241
- self.df_train = pd.DataFrame()
242
- return
243
- if split_strategy == 'database':
244
- # use the splits from the database
245
- testdf = self.db.tables[self.target+'.test'].df
246
- traindf = self.db.tables[self.target+'.train'].df
247
- # use only the train and test samples that were not perhaps filtered out by an earlier processing step
248
- self.df_test = self.df.loc[self.df.index.intersection(testdf.index)]
249
- self.df_train = self.df.loc[self.df.index.intersection(traindf.index)]
250
- elif split_strategy == 'train':
251
- self.df_train = self.df
252
- self.df_test = pd.DataFrame()
253
- elif split_strategy == 'test':
254
- self.df_test = self.df
255
- self.df_train = pd.DataFrame()
256
- elif split_strategy == 'specified':
257
- traindf, testdf = pd.DataFrame(), pd.DataFrame()
258
- # try to load some dataframes for testing
259
- entry_test_tables = self.util.config_val_data(self.name, 'test_tables', False)
260
- if entry_test_tables:
261
- test_tables = ast.literal_eval(entry_test_tables)
262
- for test_table in test_tables:
263
- testdf = pd.concat([testdf, self.db.tables[test_table].df])
264
- entry_train_tables = self.util.config_val_data(self.name, 'train_tables', False)
265
- if entry_train_tables:
266
- train_tables = ast.literal_eval(entry_train_tables)
267
- for train_table in train_tables:
268
- traindf = pd.concat([traindf, self.db.tables[train_table].df])
269
- # use only the train and test samples that were not perhaps filtered out by an earlier processing step
270
- #testdf.index.map(lambda x: os.path.join(self.root, x))
271
- # testdf.index = testdf.index.to_series().apply(lambda x: self.root+x)
272
- testdf = testdf.set_index(audformat.utils.to_segmented_index(testdf.index, allow_nat=False))
273
- traindf = traindf.set_index(audformat.utils.to_segmented_index(traindf.index, allow_nat=False))
274
- self.df_test = self.df.loc[self.df.index.intersection(testdf.index)]
275
- self.df_train = self.df.loc[self.df.index.intersection(traindf.index)]
276
- # it might be necessary to copy the target values
277
- try:
278
- self.df_test[self.target] = testdf[self.target]
279
- except KeyError:
280
- pass # if the dataframe is empty
281
- try:
282
- self.df_train[self.target] = traindf[self.target]
283
- except KeyError:
284
- pass # if the dataframe is empty
285
- elif split_strategy == 'speaker_split':
286
- self.split_speakers()
287
- elif split_strategy == 'random':
288
- self.random_split()
289
- elif split_strategy == 'reuse':
290
- self.util.debug(f'{self.name}: trying to reuse data splits')
291
- self.df_test = pd.read_pickle(storage_test)
292
- self.df_train = pd.read_pickle(storage_train)
293
-
294
- if self.df_test.shape[0]>0:
295
- self.df_test = self.finish_up(self.df_test, storage_test)
296
- if self.df_train.shape[0]>0:
297
- self.df_train = self.finish_up(self.df_train, storage_train)
298
-
299
- self.util.debug(f'{self.name}: {self.df_test.shape[0]} samples in test and {self.df_train.shape[0]} samples in train')
300
-
301
-
302
- def finish_up(self, df, storage):
303
- # Bin target values if they are continuous but a classification experiment should be done
304
- # self.check_continuous_classification(df)
305
- # remember the splits for future use
306
- df.is_labeled = self.is_labeled
307
- self.df_test.is_labeled = self.is_labeled
308
- df.to_pickle(storage)
309
- return df
310
-
311
- def split_speakers(self):
312
- """One way to split train and eval sets: Specify percentage of evaluation speakers"""
313
- test_percent = int(self.util.config_val_data(self.name, 'testsplit', 50))
314
- df = self.df
315
- s_num = df.speaker.nunique()
316
- test_num = int(s_num * (test_percent/100))
317
- test_spkrs = sample(list(df.speaker.unique()), test_num)
318
- self.df_test = df[df.speaker.isin(test_spkrs)]
319
- self.df_train = df[~df.index.isin(self.df_test.index)]
320
- self.util.debug(f'{self.name}: [{self.df_train.shape[0]}/{self.df_test.shape[0]}] samples in train/test')
321
- # because this generates new train/test sample quantaties, the feature extraction has to be done again
322
- glob_conf.config['FEATS']['needs_feature_extraction'] = 'True'
323
-
324
- def random_split(self):
325
- """One way to split train and eval sets: Specify percentage of random samples"""
326
- test_percent = int(self.util.config_val_data(self.name, 'testsplit', 50))
327
- df = self.df
328
- s_num = len(df)
329
- test_num = int(s_num * (test_percent/100))
330
- test_smpls = sample(list(df.index), test_num)
331
- self.df_test = df[df.index.isin(test_smpls)]
332
- self.df_train = df[~df.index.isin(self.df_test.index)]
333
- self.util.debug(f'{self.name}: [{self.df_train.shape[0]}/{self.df_test.shape[0]}] samples in train/test')
334
- # because this generates new train/test sample quantaties, the feature extraction has to be done again
335
- glob_conf.config['FEATS']['needs_feature_extraction'] = 'True'
336
-
337
- def _add_labels(self, df):
338
- df.is_labeled = self.is_labeled
339
- df.got_gender = self.got_gender
340
- df.got_age = self.got_age
341
- df.got_speaker = self.got_speaker
342
- return df
343
-
344
- def prepare_labels(self):
345
- strategy = self.util.config_val('DATA', 'strategy', 'train_test')
346
- only_tests = eval(self.util.config_val('DATA', 'tests', 'False'))
347
- if strategy == 'cross_data' or only_tests:
348
- self.df = self.map_labels(self.df)
349
- # Bin target values if they are continuous but a classification experiment should be done
350
- self.map_continuous_classification(self.df)
351
- self.df = self._add_labels(self.df)
352
- if self.util.config_val_data(self.name, 'value_counts', False):
353
- if not self.got_gender or not self.got_speaker:
354
- self.util.error('can\'t plot value counts if no speaker or gender is given')
355
- else:
356
- self.plot.describe_df(self.name, self.df, self.target, f'{self.name}_distplot')
357
- elif strategy == 'train_test':
358
- self.df_train = self.map_labels(self.df_train)
359
- self.df_test = self.map_labels(self.df_test)
360
- self.map_continuous_classification(self.df_train)
361
- self.map_continuous_classification(self.df_test)
362
- self.df_train = self._add_labels(self.df_train)
363
- self.df_test = self._add_labels(self.df_test)
364
- if self.util.config_val_data(self.name, 'value_counts', False):
365
- if not self.got_gender or not self.got_speaker:
366
- self.util.error('can\'t plot value counts if no speaker or gender is given')
367
- else:
368
- self.plot.describe_df(self.name, self.df_train, self.target, f'{self.name}_train_distplot')
369
- self.plot.describe_df(self.name, self.df_test, self.target, f'{self.name}_test_distplot')
370
-
371
-
372
- def map_labels(self, df):
373
- pd.options.mode.chained_assignment = None
374
- if df.shape[0]==0 or not self.util.exp_is_classification() \
375
- or self.check_continuous_classification():
376
- return df
377
- """Rename the labels and remove the ones that are not needed."""
378
- target = glob_conf.config['DATA']['target']
379
- # see if a special mapping should be used
380
- mappings = self.util.config_val_data(self.name, 'mapping', False)
381
- if mappings:
382
- mapping = ast.literal_eval(mappings)
383
- df[target] = df[target].map(mapping)
384
- self.util.debug(f'{self.name}: mapped {mapping}')
385
- # remove labels that are not in the labels list
386
- labels = self.util.config_val('DATA', 'labels', False)
387
- if labels:
388
- labels = ast.literal_eval(labels)
389
- df = df[df[target].isin(labels)]
390
- else:
391
- labels = df[target].unique().codes
392
- # try:
393
- # except KeyError:
394
- # pass
395
- # remember in case they get encoded later
396
- df['class_label'] = df[target]
397
- return df
398
-
399
- def check_continuous_classification(self):
400
- datatype = self.util.config_val('DATA', 'type', 'False')
401
- if self.util.exp_is_classification() and datatype == 'continuous':
402
- return True
403
- return False
404
-
405
- def map_continuous_classification(self, df):
406
- """Map labels to bins for continuous data that should be classified"""
407
- if self.check_continuous_classification():
408
- self.util.debug(f'{self.name}: binning continuous variable to categories')
409
- cat_vals = self.util.continuous_to_categorical(df[self.target])
410
- df[self.target] = cat_vals
411
- labels = ast.literal_eval(glob_conf.config['DATA']['labels'])
412
- df['class_label'] = df[self.target]
413
- # print(df['class_label'].unique())
414
- for i, l in enumerate(labels):
415
- df['class_label'] = df['class_label'].replace(i, str(l))
nkululeko/dataset_csv.py DELETED
@@ -1,49 +0,0 @@
1
- # dataset_csv.py
2
- import pandas as pd
3
- import os
4
- from nkululeko.dataset import Dataset
5
- import os.path
6
- import audformat.utils
7
-
8
- class Dataset_CSV(Dataset):
9
- """ Class to represent datasets stored as a csv file"""
10
-
11
- def load(self):
12
- """Load the dataframe with files, speakers and task labels"""
13
- self.got_target, self.got_speaker, self.got_gender = False, False, False
14
- root = self.util.config_val_data(self.name, '', '')
15
- absolute_path = eval(self.util.config_val_data(self.name, 'absolute_path', True))
16
- self.util.debug(f'loading {self.name}')
17
- # df = pd.read_csv(root, index_col='file')
18
- if not os.path.isabs(root):
19
- exp_root = self.util.config_val('EXP', 'root', '')
20
- root = exp_root + root
21
- df = audformat.utils.read_csv(root)
22
- if not absolute_path:
23
- # add the root folder to the relative paths of the files
24
- if audformat.index_type(df.index) == 'segmented':
25
- df = df.set_index(df.index.set_levels(df.index.levels[0].map(lambda x: os.path.dirname(root)+'/'+x), 0))
26
- else:
27
- df = df.set_index(df.index.to_series().apply(lambda x: os.path.dirname(root)+'/'+x))
28
- self.df = df
29
- self.db = None
30
- self.got_target = True
31
- self.is_labeled = self.got_target
32
- self.start_fresh = eval(self.util.config_val('DATA', 'no_reuse', 'False'))
33
- if self.is_labeled and not 'class_label' in self.df.columns:
34
- self.df['class_label'] = self.df[self.target]
35
- if 'gender' in df.columns:
36
- self.got_gender = True
37
- if 'age' in df.columns:
38
- self.got_age = True
39
- if 'speaker' in df.columns:
40
- self.got_speaker = True
41
- ns = df['speaker'].nunique()
42
- self.util.debug(f'num of speakers: {ns}')
43
-
44
- self.util.debug(f'Loaded database {self.name} with {df.shape[0]} '\
45
- f'samples: got targets: {self.got_target}, got speakers: {self.got_speaker}, '\
46
- f'got sexes: {self.got_gender}')
47
-
48
- def prepare(self):
49
- super().prepare()
@@ -1,19 +0,0 @@
1
- # dataset.py
2
- from nkululeko.dataset import Dataset
3
-
4
- class Ravdess(Dataset):
5
- """Class to represent the Berlin EmoDB"""
6
- name = 'ravdess' # The name
7
-
8
- def __init__(self):
9
- """Constructor setting the name"""
10
- Dataset.__init__(self, self.name)
11
-
12
- def load(self):
13
- Dataset.load(self)
14
- df = self.df
15
- prev = df.shape[0]
16
- df = df[~df.index.str.contains('song')]
17
- now = df.shape[0]
18
- self.util.debug(f'removed {prev-now} songs from ravdess dataframe')
19
- self.df = df
nkululeko/estimate_snr.py DELETED
@@ -1,89 +0,0 @@
1
- # estimate.snr
2
- import numpy as np
3
- import audiofile
4
- import matplotlib.pyplot as plt
5
- from scipy.signal.windows import hamming
6
- import argparse
7
-
8
-
9
- class SNREstimator:
10
- """Estimate SNR from audio signal using log energy and energy thresholds
11
- Args:
12
- input_data (ndarray): Input audio signal
13
- sample_rate (int): Sampling rate of input audio signal
14
- window_size (int): Window size in samples
15
- hop_size (int): Hop size in samples
16
-
17
- Returns:
18
- object: SNREstimator object
19
- estimated_snr (float): Estimated SNR in dB, extracted from SNREstimator.estimate_snr()
20
-
21
- Usage:
22
- >>> input_data, sample_rate = audiofile.read('input.wav')
23
- >>> snr_estimator = SNREstimator(input_data, sample_rate, window_size=320, hop_size=160)
24
- >>> estimated_snr, log_energies, energy_threshold_low, energy_threshold_high = snr_estimator.estimate_snr()
25
- """
26
- def __init__(self, input_data, sample_rate, window_size=320, hop_size=160):
27
- self.audio_data = input_data
28
- self.sample_rate = sample_rate
29
- self.frame_length = window_size
30
- self.hop_length = hop_size
31
-
32
- def frame_audio(self, signal):
33
- num_frames = 1 + (len(signal) - self.frame_length) // self.hop_length
34
- frames = [signal[i * self.hop_length: (i * self.hop_length) + self.frame_length] for i in range(num_frames)]
35
- return frames
36
-
37
- def calculate_log_energy(self, frame):
38
- energy = np.sum(frame ** 2)
39
- return np.log(energy)
40
-
41
- def calculate_snr(self, energy_high, energy_low):
42
- return 10 * np.log10(energy_high / energy_low)
43
-
44
- def estimate_snr(self):
45
- frames = self.frame_audio(self.audio_data)
46
- log_energies = [self.calculate_log_energy(frame * hamming(self.frame_length)) for frame in frames]
47
-
48
- energy_threshold_low = np.percentile(log_energies, 25) #First quartile
49
- energy_threshold_high = np.percentile(log_energies, 75) #Third quartile
50
-
51
- low_energy_frames = [log_energy for log_energy in log_energies if log_energy <= energy_threshold_low]
52
- high_energy_frames = [log_energy for log_energy in log_energies if log_energy >= energy_threshold_high]
53
-
54
- mean_low_energy = np.mean(low_energy_frames)
55
- mean_high_energy = np.mean(high_energy_frames)
56
-
57
- estimated_snr = self.calculate_snr(np.exp(mean_high_energy), np.exp(mean_low_energy))
58
- return estimated_snr, log_energies, energy_threshold_low, energy_threshold_high
59
-
60
- def plot_energy(self, log_energies, energy_threshold_low, energy_threshold_high):
61
- plt.figure(figsize=(10, 6))
62
- plt.plot(log_energies, label='Log Energy')
63
- plt.axhline(y=energy_threshold_low, color='r', linestyle='--', label='Low Energy Threshold (25th Percentile)')
64
- plt.axhline(y=energy_threshold_high, color='g', linestyle='--', label='High Energy Threshold (75th Percentile)')
65
- plt.xlabel('Frame')
66
- plt.ylabel('Log Energy')
67
- plt.title('Log Energy and Energy Thresholds')
68
- plt.legend()
69
- plt.show()
70
-
71
- def main():
72
- parser = argparse.ArgumentParser(description='Estimate SNR from audio signal', usage='python3 estimate_snr.py -i <input_file> -ws <window_size> -hs <hop_size> -p')
73
- parser.add_argument('-i', '--input', required=True, help='Input audio file in WAV format')
74
- parser.add_argument('-ws', '--window_size', type=int, default=int(0.02 * 16000), help='Window size in samples (default: 320)')
75
- parser.add_argument('-hs', '--hop_size', type=int, default=int(0.01 * 16000), help='Hop size in samples (default: 160)')
76
- parser.add_argument('-p', '--plot', action='store_true', help='Plot log energy and energy thresholds')
77
- args = parser.parse_args()
78
-
79
- signal, sr = audiofile.read(args.input)
80
- snr_estimator = SNREstimator(signal, sr, args.window_size, args.hop_size)
81
- estimated_snr, log_energies, energy_threshold_low, energy_threshold_high = snr_estimator.estimate_snr()
82
-
83
- print("Estimated SNR:", estimated_snr)
84
-
85
- if args.plot:
86
- snr_estimator.plot_energy(log_energies, energy_threshold_low, energy_threshold_high)
87
-
88
- if __name__ == '__main__':
89
- main()
@@ -1,63 +0,0 @@
1
- # feats_agender.py
2
- from nkululeko.featureset import Featureset
3
- import os
4
- import pandas as pd
5
- import audeer
6
- import nkululeko.glob_conf as glob_conf
7
- import audonnx
8
- import numpy as np
9
- import audinterface
10
-
11
- class AudModelAgenderSet(Featureset):
12
- """
13
- Embeddings from the wav2vec2. based model finetuned on agender data, described in the paper
14
- "Speech-based Age and Gender Prediction with Transformers"
15
- https://arxiv.org/abs/2306.16962
16
- """
17
- def __init__(self, name, data_df):
18
- super().__init__(name, data_df)
19
- model_url = 'https://zenodo.org/record/7761387/files/w2v2-L-robust-6-age-gender.25c844af-1.1.1.zip'
20
- model_root = self.util.config_val('FEATS', 'agender.model', './audmodel_agender/')
21
- if not os.path.isdir(model_root):
22
- cache_root = audeer.mkdir('cache')
23
- model_root = audeer.mkdir(model_root)
24
- archive_path = audeer.download_url(model_url, cache_root, verbose=True)
25
- audeer.extract_archive(archive_path, model_root)
26
- device = self.util.config_val('MODEL', 'device', 'cpu')
27
- self.model = audonnx.load(model_root, device=device)
28
-
29
-
30
- def extract(self):
31
- """Extract the features based on the initialized dataset or re-open them when found on disk."""
32
- store = self.util.get_path('store')
33
- store_format = self.util.config_val('FEATS', 'store_format', 'pkl')
34
- storage = f'{store}{self.name}.{store_format}'
35
- extract = eval(self.util.config_val('FEATS', 'needs_feature_extraction', 'False'))
36
- no_reuse = eval(self.util.config_val('FEATS', 'no_reuse', 'False'))
37
- if no_reuse or extract or not os.path.isfile(storage):
38
- self.util.debug('extracting agender model embeddings, this might take a while...')
39
- hidden_states = audinterface.Feature(
40
- self.model.labels('hidden_states'),
41
- process_func=self.model,
42
- process_func_args={
43
- 'outputs': 'hidden_states',
44
- },
45
- sampling_rate=16000,
46
- resample=True,
47
- num_workers=5,
48
- verbose=True,
49
- )
50
- self.df = hidden_states.process_index(self.data_df.index)
51
- self.util.write_store(self.df, storage, store_format)
52
- try:
53
- glob_conf.config['DATA']['needs_feature_extraction'] = 'False'
54
- except KeyError:
55
- pass
56
- else:
57
- self.util.debug('reusing extracted audmodel features.')
58
- self.df = self.util.get_store(storage, store_format)
59
-
60
-
61
- def extract_sample(self, signal, sr):
62
- result = self.model(signal, sr)
63
- return np.asarray(result['hidden_states'].flatten())