nkululeko 0.59.1__py3-none-any.whl → 0.61.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. nkululeko/constants.py +1 -1
  2. nkululeko/experiment.py +43 -43
  3. nkululeko/feature_extractor.py +101 -58
  4. nkululeko/modelrunner.py +14 -14
  5. nkululeko/plots.py +11 -0
  6. nkululeko/segment.py +23 -27
  7. nkululeko/test_predictor.py +1 -1
  8. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/METADATA +13 -1
  9. nkululeko-0.61.0.dist-info/RECORD +31 -0
  10. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/WHEEL +1 -1
  11. nkululeko/ap_age.py +0 -31
  12. nkululeko/ap_arousal.py +0 -30
  13. nkululeko/ap_dominance.py +0 -29
  14. nkululeko/ap_gender.py +0 -29
  15. nkululeko/ap_mos.py +0 -35
  16. nkululeko/ap_pesq.py +0 -35
  17. nkululeko/ap_sdr.py +0 -36
  18. nkululeko/ap_snr.py +0 -35
  19. nkululeko/ap_stoi.py +0 -34
  20. nkululeko/ap_valence.py +0 -30
  21. nkululeko/augmenter.py +0 -64
  22. nkululeko/dataset.py +0 -415
  23. nkululeko/dataset_csv.py +0 -49
  24. nkululeko/dataset_ravdess.py +0 -19
  25. nkululeko/estimate_snr.py +0 -89
  26. nkululeko/feats_agender.py +0 -63
  27. nkululeko/feats_agender_agender.py +0 -65
  28. nkululeko/feats_analyser.py +0 -87
  29. nkululeko/feats_audmodel.py +0 -63
  30. nkululeko/feats_audmodel_dim.py +0 -63
  31. nkululeko/feats_clap.py +0 -74
  32. nkululeko/feats_import.py +0 -44
  33. nkululeko/feats_mld.py +0 -47
  34. nkululeko/feats_mos.py +0 -92
  35. nkululeko/feats_opensmile.py +0 -84
  36. nkululeko/feats_oxbow.py +0 -87
  37. nkululeko/feats_praat.py +0 -72
  38. nkululeko/feats_snr.py +0 -63
  39. nkululeko/feats_squim.py +0 -99
  40. nkululeko/feats_trill.py +0 -74
  41. nkululeko/feats_wav2vec2.py +0 -94
  42. nkululeko/featureset.py +0 -41
  43. nkululeko/feinberg_praat.py +0 -430
  44. nkululeko/loss_ccc.py +0 -28
  45. nkululeko/loss_softf1loss.py +0 -40
  46. nkululeko/model.py +0 -256
  47. nkululeko/model_bayes.py +0 -14
  48. nkululeko/model_cnn.py +0 -118
  49. nkululeko/model_gmm.py +0 -16
  50. nkululeko/model_knn.py +0 -16
  51. nkululeko/model_knn_reg.py +0 -16
  52. nkululeko/model_mlp.py +0 -175
  53. nkululeko/model_mlp_regression.py +0 -197
  54. nkululeko/model_svm.py +0 -18
  55. nkululeko/model_svr.py +0 -18
  56. nkululeko/model_tree.py +0 -14
  57. nkululeko/model_tree_reg.py +0 -14
  58. nkululeko/model_xgb.py +0 -12
  59. nkululeko/model_xgr.py +0 -12
  60. nkululeko/randomsplicer.py +0 -76
  61. nkululeko/randomsplicing.py +0 -74
  62. nkululeko-0.59.1.dist-info/RECORD +0 -82
  63. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/LICENSE +0 -0
  64. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/top_level.txt +0 -0
nkululeko/constants.py CHANGED
@@ -1 +1 @@
1
- VERSION = '0.59.1'
1
+ VERSION = '0.61.0'
nkululeko/experiment.py CHANGED
@@ -2,13 +2,13 @@ import random
2
2
  import os
3
3
  import time
4
4
  import numpy as np
5
- from nkululeko.dataset import Dataset
6
- from nkululeko.dataset_csv import Dataset_CSV
7
- from nkululeko.dataset_ravdess import Ravdess
5
+ from nkululeko.data.dataset import Dataset
6
+ from nkululeko.data.dataset_csv import Dataset_CSV
7
+ from nkululeko.data.dataset_ravdess import Ravdess
8
8
  from nkululeko.filter_data import filter_min_dur
9
9
  from nkululeko.runmanager import Runmanager
10
10
  from nkululeko.test_predictor import Test_predictor
11
- from nkululeko.feats_analyser import FeatureAnalyser
11
+ from nkululeko.feat_extract.feats_analyser import FeatureAnalyser
12
12
  from nkululeko.util import Util
13
13
  from nkululeko.feature_extractor import FeatureExtractor
14
14
  from nkululeko.plots import Plots
@@ -148,32 +148,32 @@ class Experiment:
148
148
  self.df_train = self._import_csv(storage_train)
149
149
  else:
150
150
  self.df_train, self.df_test = pd.DataFrame(), pd.DataFrame()
151
- strategy = self.util.config_val('DATA', 'strategy', 'traintest')
152
- # some datasets against others in their entierty
153
- if strategy == 'cross_data':
154
- train_dbs = ast.literal_eval(glob_conf.config['DATA']['trains'])
155
- test_dbs = ast.literal_eval(glob_conf.config['DATA']['tests'])
156
- for dn in train_dbs:
157
- d = self.datasets[dn]
158
- d.prepare_labels()
159
- self.df_train = self.df_train.append(self.util.make_segmented_index(d.df))
160
- self.util.copy_flags(d, self.df_train)
161
- for dn in test_dbs:
162
- d = self.datasets[dn]
163
- d.prepare_labels()
164
- self.df_test = self.df_test.append(self.util.make_segmented_index(d.df))
165
- self.util.copy_flags(d, self.df_test)
166
- elif strategy == 'traintest':
151
+ # strategy = self.util.config_val('DATA', 'strategy', 'traintest')
152
+ # # some datasets against others in their entierty
153
+ # if strategy == 'cross_data':
154
+ # train_dbs = ast.literal_eval(glob_conf.config['DATA']['trains'])
155
+ # test_dbs = ast.literal_eval(glob_conf.config['DATA']['tests'])
156
+ # for dn in train_dbs:
157
+ # d = self.datasets[dn]
158
+ # d.prepare_labels()
159
+ # self.df_train = self.df_train.append(self.util.make_segmented_index(d.df))
160
+ # self.util.copy_flags(d, self.df_train)
161
+ # for dn in test_dbs:
162
+ # d = self.datasets[dn]
163
+ # d.prepare_labels()
164
+ # self.df_test = self.df_test.append(self.util.make_segmented_index(d.df))
165
+ # self.util.copy_flags(d, self.df_test)
166
+ # elif strategy == 'traintest':
167
167
  # default: train vs. test combined from all datasets
168
- for d in self.datasets.values():
169
- d.split()
170
- d.prepare_labels()
171
- self.df_train = pd.concat([self.df_train, d.df_train])
172
- self.util.copy_flags(d, self.df_train)
173
- self.df_test = pd.concat([self.df_test, d.df_test])
174
- self.util.copy_flags(d, self.df_test)
175
- else:
176
- self.util.error(f'unknown strategy: {strategy}')
168
+ for d in self.datasets.values():
169
+ d.split()
170
+ d.prepare_labels()
171
+ self.df_train = pd.concat([self.df_train, d.df_train])
172
+ self.util.copy_flags(d, self.df_train)
173
+ self.df_test = pd.concat([self.df_test, d.df_test])
174
+ self.util.copy_flags(d, self.df_test)
175
+ # else:
176
+ # self.util.error(f'unknown strategy: {strategy}')
177
177
  # save the file lists to disk for later reuse
178
178
  store = self.util.get_path('store')
179
179
  storage_test = f'{store}testdf.csv'
@@ -313,7 +313,7 @@ class Experiment:
313
313
  """
314
314
  Augment the selected samples
315
315
  """
316
- from nkululeko.augmenter import Augmenter
316
+ from nkululeko.augmenting.augmenter import Augmenter
317
317
  sample_selection = self.util.config_val('DATA', 'augment', 'train')
318
318
  if sample_selection=='all':
319
319
  df = pd.concat([self.df_train, self.df_test])
@@ -343,43 +343,43 @@ class Experiment:
343
343
  targets = self.util.config_val_list('PREDICT', 'targets', ['gender'])
344
344
  for target in targets:
345
345
  if target == 'gender':
346
- from nkululeko.ap_gender import GenderPredictor
346
+ from nkululeko.autopredict.ap_gender import GenderPredictor
347
347
  predictor = GenderPredictor(df)
348
348
  df = predictor.predict(sample_selection)
349
349
  elif target == 'age':
350
- from nkululeko.ap_age import AgePredictor
350
+ from nkululeko.autopredict.ap_age import AgePredictor
351
351
  predictor = AgePredictor(df)
352
352
  df = predictor.predict(sample_selection)
353
353
  elif target == 'snr':
354
- from nkululeko.ap_sdr import SNRPredictor
354
+ from nkululeko.autopredict.ap_sdr import SNRPredictor
355
355
  predictor = SNRPredictor(df)
356
356
  df = predictor.predict(sample_selection)
357
357
  elif target == 'mos':
358
- from nkululeko.ap_mos import MOSPredictor
358
+ from nkululeko.autopredict.ap_mos import MOSPredictor
359
359
  predictor = MOSPredictor(df)
360
360
  df = predictor.predict(sample_selection)
361
361
  elif target == 'pesq':
362
- from nkululeko.ap_pesq import PESQPredictor
362
+ from nkululeko.autopredict.ap_pesq import PESQPredictor
363
363
  predictor = PESQPredictor(df)
364
364
  df = predictor.predict(sample_selection)
365
365
  elif target == 'sdr':
366
- from nkululeko.ap_sdr import SDRPredictor
366
+ from nkululeko.autopredict.ap_sdr import SDRPredictor
367
367
  predictor = SDRPredictor(df)
368
368
  df = predictor.predict(sample_selection)
369
369
  elif target == 'stoi':
370
- from nkululeko.ap_stoi import STOIPredictor
370
+ from nkululeko.autopredict.ap_stoi import STOIPredictor
371
371
  predictor = STOIPredictor(df)
372
372
  df = predictor.predict(sample_selection)
373
373
  elif target == 'arousal':
374
- from nkululeko.ap_arousal import ArousalPredictor
374
+ from nkululeko.autopredict.ap_arousal import ArousalPredictor
375
375
  predictor = ArousalPredictor(df)
376
376
  df = predictor.predict(sample_selection)
377
377
  elif target == 'valence':
378
- from nkululeko.ap_valence import ValencePredictor
378
+ from nkululeko.autopredict.ap_valence import ValencePredictor
379
379
  predictor = ValencePredictor(df)
380
380
  df = predictor.predict(sample_selection)
381
381
  elif target == 'dominance':
382
- from nkululeko.ap_dominance import DominancePredictor
382
+ from nkululeko.autopredict.ap_dominance import DominancePredictor
383
383
  predictor = DominancePredictor(df)
384
384
  df = predictor.predict(sample_selection)
385
385
  else:
@@ -390,7 +390,7 @@ class Experiment:
390
390
  """
391
391
  Random-splice the selected samples
392
392
  """
393
- from nkululeko.randomsplicer import Randomsplicer
393
+ from nkululeko.augmenting.randomsplicer import Randomsplicer
394
394
  sample_selection = self.util.config_val('DATA', 'random_splice', 'train')
395
395
  if sample_selection=='all':
396
396
  df = pd.concat([self.df_train, self.df_test])
@@ -552,5 +552,5 @@ class Experiment:
552
552
  f = open(filename, 'wb')
553
553
  pickle.dump(self.__dict__, f)
554
554
  f.close()
555
- except (AttributeError, TypeError) as error:
556
- self.util.debug(f'Save experiment: Can\'t pickle local object: {error}')
555
+ except (AttributeError, TypeError, RuntimeError) as error:
556
+ self.util.warn(f'Save experiment: Can\'t pickle local object: {error}')
@@ -5,8 +5,9 @@ Helper class to encapsulate feature extraction methods
5
5
 
6
6
  """
7
7
  import pandas as pd
8
- from nkululeko.util import Util
9
- from nkululeko.feats_opensmile import Opensmileset
8
+
9
+ from nkululeko.feat_extract.feats_opensmile import Opensmileset
10
+ from nkululeko.util import Util
10
11
 
11
12
 
12
13
  class FeatureExtractor:
@@ -16,80 +17,122 @@ class FeatureExtractor:
16
17
  data_df (pandas.DataFrame): dataframe with audiofile paths as index
17
18
  feats_types (array of strings): designations of acoustic feature extractors to be used
18
19
  data_name (string): names of databases that are extracted (for the caching)
19
- feats_designation (string): the type of split (train/test), also is used for the cache name.
20
+ feats_designation (string): the type of split (train/test), also is used for the cache name.
20
21
  Returns:
21
- df (pandas.DataFrame): dataframe with same index as data_df and acoustic features in columns
22
+ df (pandas.DataFrame): dataframe with same index as data_df and acoustic features in columns
22
23
  """
23
- df = None # pandas dataframe to store the features (and indexed with the data from the sets)
24
- data_df = None # dataframe to get audio paths
25
24
 
26
- # def __init__
25
+ # pandas dataframe to store the features (and indexed with the data from the sets)
26
+ df = None
27
+ data_df = None # dataframe to get audio paths
28
+
29
+ # def __init__
27
30
  def __init__(self, data_df, feats_types, data_name, feats_designation):
28
31
  self.data_df = data_df
29
32
  self.data_name = data_name
30
33
  self.feats_types = feats_types
31
- self.util = Util('feature_extractor')
34
+ self.util = Util("feature_extractor")
32
35
  self.feats_designation = feats_designation
33
-
36
+
34
37
  def extract(self):
35
38
  # feats_types = self.util.config_val_list('FEATS', 'type', ['os'])
36
39
  self.featExtractor = None
37
- self.feats= pd.DataFrame()
40
+ self.feats = pd.DataFrame()
38
41
  _scale = True
39
42
  for feats_type in self.feats_types:
40
- store_name = f'{self.data_name}_{feats_type}'
41
- if feats_type=='os':
42
- self.featExtractor = Opensmileset(f'{store_name}_{self.feats_designation}', self.data_df)
43
- elif feats_type=='trill':
44
- from nkululeko.feats_trill import TRILLset
45
- self.featExtractor = TRILLset(f'{store_name}_{self.feats_designation}', self.data_df)
46
- elif feats_type=='wav2vec':
47
- from nkululeko.feats_wav2vec2 import Wav2vec2
48
- self.featExtractor = Wav2vec2(f'{store_name}_{self.feats_designation}', self.data_df)
49
- elif feats_type=='audmodel':
50
- from nkululeko.feats_audmodel import AudModelSet
51
- self.featExtractor = AudModelSet(f'{store_name}_{self.feats_designation}', self.data_df)
52
- elif feats_type=='auddim':
53
- from nkululeko.feats_audmodel_dim import AudModelDimSet
54
- self.featExtractor = AudModelDimSet(f'{store_name}_{self.feats_designation}', self.data_df)
55
- elif feats_type=='agender':
56
- from nkululeko.feats_agender import AudModelAgenderSet
57
- self.featExtractor = AudModelAgenderSet(f'{store_name}_{self.feats_designation}', self.data_df)
58
- elif feats_type=='agender_agender':
59
- from nkululeko.feats_agender_agender import AgenderAgenderSet
60
- self.featExtractor = AgenderAgenderSet(f'{store_name}_{self.feats_designation}', self.data_df)
61
- elif feats_type=='snr':
62
- from nkululeko.feats_snr import SNRSet
63
- self.featExtractor = SNRSet(f'{store_name}_{self.feats_designation}', self.data_df)
64
- elif feats_type=='mos':
65
- from nkululeko.feats_mos import MOSSet
66
- self.featExtractor = MOSSet(f'{store_name}_{self.feats_designation}', self.data_df)
67
- elif feats_type=='squim':
68
- from nkululeko.feats_squim import SQUIMSet
69
- self.featExtractor = SQUIMSet(f'{store_name}_{self.feats_designation}', self.data_df)
70
- elif feats_type=='clap':
71
- from nkululeko.feats_clap import Clap
72
- self.featExtractor = Clap(f'{store_name}_{self.feats_designation}', self.data_df)
73
- elif feats_type=='praat':
74
- from nkululeko.feats_praat import Praatset
75
- self.featExtractor = Praatset(f'{store_name}_{self.feats_designation}', self.data_df)
76
- elif feats_type=='mld':
77
- from nkululeko.feats_mld import MLD_set
78
- self.featExtractor = MLD_set(f'{store_name}_{self.feats_designation}', self.data_df)
79
- elif feats_type=='import':
80
- from nkululeko.feats_import import Importset
81
- self.featExtractor = Importset(f'{store_name}_{self.feats_designation}', self.data_df)
43
+ store_name = f"{self.data_name}_{feats_type}"
44
+ if feats_type == "os":
45
+ self.featExtractor = Opensmileset(
46
+ f"{store_name}_{self.feats_designation}", self.data_df
47
+ )
48
+ elif feats_type == "trill":
49
+ from nkululeko.feat_extract.feats_trill import TRILLset
50
+ self.featExtractor = TRILLset(
51
+ f"{store_name}_{self.feats_designation}", self.data_df
52
+ )
53
+ elif feats_type == "wav2vec":
54
+ from nkululeko.feat_extract.feats_wav2vec2 import Wav2vec2
55
+ self.featExtractor = Wav2vec2(
56
+ f"{store_name}_{self.feats_designation}", self.data_df
57
+ )
58
+ elif feats_type in ("hubert", "hubert_ft", "hubert_large",
59
+ "hubert_xlarge", "hubert_xlarge_ft"):
60
+ from nkululeko.feat_extract.feats_hubert import Hubert
61
+ self.featExtractor = Hubert(
62
+ f"{store_name}_{self.feats_designation}", self.data_df,
63
+ feats_type
64
+ )
65
+
66
+ elif feats_type == "audmodel":
67
+ from nkululeko.feat_extract.feats_audmodel import AudModelSet
68
+ self.featExtractor = AudModelSet(
69
+ f"{store_name}_{self.feats_designation}", self.data_df
70
+ )
71
+ elif feats_type == "auddim":
72
+ from nkululeko.feat_extract.feats_audmodel_dim import \
73
+ AudModelDimSet
74
+ self.featExtractor = AudModelDimSet(
75
+ f"{store_name}_{self.feats_designation}", self.data_df
76
+ )
77
+ elif feats_type == "agender":
78
+ from nkululeko.feat_extract.feats_agender import \
79
+ AudModelAgenderSet
80
+ self.featExtractor = AudModelAgenderSet(
81
+ f"{store_name}_{self.feats_designation}", self.data_df
82
+ )
83
+ elif feats_type == "agender_agender":
84
+ from nkululeko.feat_extract.feats_agender_agender import \
85
+ AgenderAgenderSet
86
+ self.featExtractor = AgenderAgenderSet(
87
+ f"{store_name}_{self.feats_designation}", self.data_df
88
+ )
89
+ elif feats_type == "snr":
90
+ from nkululeko.feat_extract.feats_snr import SNRSet
91
+ self.featExtractor = SNRSet(
92
+ f"{store_name}_{self.feats_designation}", self.data_df
93
+ )
94
+ elif feats_type == "mos":
95
+ from nkululeko.feat_extract.feats_mos import MOSSet
96
+ self.featExtractor = MOSSet(
97
+ f"{store_name}_{self.feats_designation}", self.data_df
98
+ )
99
+ elif feats_type == "squim":
100
+ from nkululeko.feat_extract.feats_squim import SQUIMSet
101
+ self.featExtractor = SQUIMSet(
102
+ f"{store_name}_{self.feats_designation}", self.data_df
103
+ )
104
+ elif feats_type == "clap":
105
+ from nkululeko.feat_extract.feats_clap import Clap
106
+ self.featExtractor = Clap(
107
+ f"{store_name}_{self.feats_designation}", self.data_df
108
+ )
109
+ elif feats_type == "praat":
110
+ from nkululeko.feat_extract.feats_praat import Praatset
111
+ self.featExtractor = Praatset(
112
+ f"{store_name}_{self.feats_designation}", self.data_df
113
+ )
114
+ elif feats_type == "mld":
115
+ from nkululeko.feat_extract.feats_mld import MLD_set
116
+ self.featExtractor = MLD_set(
117
+ f"{store_name}_{self.feats_designation}", self.data_df
118
+ )
119
+ elif feats_type == "import":
120
+ from nkululeko.feat_extract.feats_import import Importset
121
+ self.featExtractor = Importset(
122
+ f"{store_name}_{self.feats_designation}", self.data_df
123
+ )
82
124
  else:
83
- self.util.error(f'unknown feats_type: {feats_type}')
125
+ self.util.error(f"unknown feats_type: {feats_type}")
84
126
 
85
127
  self.featExtractor.extract()
86
128
  self.featExtractor.filter()
87
129
  # remove samples that were not extracted by MLD
88
- #self.df_test = self.df_test.loc[self.df_test.index.intersection(featExtractor_test.df.index)]
89
- #self.df_train = self.df_train.loc[self.df_train.index.intersection(featExtractor_train.df.index)]
90
- self.util.debug(f'{feats_type}: shape : {self.featExtractor.df.shape}')
91
- self.feats = pd.concat([self.feats, self.featExtractor.df], axis = 1)
130
+ # self.df_test = self.df_test.loc[self.df_test.index.intersection(featExtractor_test.df.index)]
131
+ # self.df_train = self.df_train.loc[self.df_train.index.intersection(featExtractor_train.df.index)]
132
+ self.util.debug(
133
+ f"{feats_type}: shape : {self.featExtractor.df.shape}")
134
+ self.feats = pd.concat([self.feats, self.featExtractor.df], axis=1)
92
135
  return self.feats
93
136
 
94
137
  def extract_sample(self, signal, sr):
95
- return self.featExtractor.extract_sample(signal, sr)
138
+ return self.featExtractor.extract_sample(signal, sr)
nkululeko/modelrunner.py CHANGED
@@ -59,44 +59,44 @@ class Modelrunner:
59
59
 
60
60
  def _select_model(self, model_type):
61
61
  if model_type=='svm':
62
- from nkululeko.model_svm import SVM_model
62
+ from nkululeko.models.model_svm import SVM_model
63
63
  self.model = SVM_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
64
64
  elif model_type=='svr':
65
- from nkululeko.model_svr import SVR_model
65
+ from nkululeko.models.model_svr import SVR_model
66
66
  self.model = SVR_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
67
67
  elif model_type=='xgb':
68
- from nkululeko.model_xgb import XGB_model
68
+ from nkululeko.models.model_xgb import XGB_model
69
69
  self.model = XGB_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
70
70
  elif model_type=='xgr':
71
- from nkululeko.model_xgr import XGR_model
71
+ from nkululeko.models.model_xgr import XGR_model
72
72
  self.model = XGR_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
73
73
  elif model_type=='bayes':
74
- from nkululeko.model_bayes import Bayes_model
74
+ from nkululeko.models.model_bayes import Bayes_model
75
75
  self.model = Bayes_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
76
76
  elif model_type=='gmm':
77
- from nkululeko.model_gmm import GMM_model
77
+ from nkululeko.models.model_gmm import GMM_model
78
78
  self.model = GMM_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
79
79
  elif model_type=='knn':
80
- from nkululeko.model_knn import KNN_model
80
+ from nkululeko.models.model_knn import KNN_model
81
81
  self.model = KNN_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
82
82
  elif model_type=='knn_reg':
83
- from nkululeko.model_knn_reg import KNN_reg_model
83
+ from nkululeko.models.model_knn_reg import KNN_reg_model
84
84
  self.model = KNN_reg_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
85
85
  elif model_type=='tree':
86
- from nkululeko.model_tree import Tree_model
86
+ from nkululeko.models.model_tree import Tree_model
87
87
  self.model = Tree_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
88
88
  elif model_type=='tree_reg':
89
- from nkululeko.model_tree_reg import Tree_reg_model
89
+ from nkululeko.models.model_tree_reg import Tree_reg_model
90
90
  self.model = Tree_reg_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
91
91
  elif model_type=='cnn':
92
- from nkululeko.model_cnn import CNN_model
93
- from nkululeko.model_cnn import CNN_model
92
+ from nkululeko.models.model_cnn import CNN_model
93
+ from nkululeko.models.model_cnn import CNN_model
94
94
  self.model = CNN_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
95
95
  elif model_type=='mlp':
96
- from nkululeko.model_mlp import MLP_model
96
+ from nkululeko.models.model_mlp import MLP_model
97
97
  self.model = MLP_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
98
98
  elif model_type=='mlp_reg':
99
- from nkululeko.model_mlp_regression import MLP_Reg_model
99
+ from nkululeko.models.model_mlp_regression import MLP_Reg_model
100
100
  self.model = MLP_Reg_model(self.df_train, self.df_test, self.feats_train, self.feats_test)
101
101
  else:
102
102
  self.util.error(f'unknown model type: \'{model_type}\'')
nkululeko/plots.py CHANGED
@@ -95,6 +95,17 @@ class Plots():
95
95
  else:
96
96
  self.util.error(f'plot value counts: the plot distribution descriptor for {att} has more than 2 values')
97
97
 
98
+ def plot_durations(self, df, filename, sample_selection):
99
+ fig_dir = self.util.get_path('fig_dir')+'../' # one up because of the runs
100
+ ax = sns.histplot(df, x='duration', hue='class_label', kde=True)
101
+ ax.set_title(f'{sample_selection} {df.shape[0]}')
102
+ ax.set_xlabel(f'duration')
103
+ ax.set_ylabel(f'number of samples')
104
+ fig = ax.figure
105
+ plt.tight_layout()
106
+ plt.savefig(f'{fig_dir}{filename}_{sample_selection}.{self.format}')
107
+ plt.close(fig)
108
+ fig.clear()
98
109
 
99
110
  def describe_df(self, name, df, target, filename):
100
111
  """Make a stacked barplot of samples and speakers per sex and target values. speaker, gender and target columns must be present"""
nkululeko/segment.py CHANGED
@@ -7,25 +7,8 @@ from nkululeko.util import Util
7
7
  from nkululeko.constants import VERSION
8
8
  import argparse
9
9
  import os
10
- import torch
11
- import audformat
12
- from audformat.utils import to_filewise_index
13
- from audformat import segmented_index
14
10
  import pandas as pd
15
11
 
16
- # initialize the VAD model
17
- SAMPLING_RATE = 16000
18
- torch.set_num_threads(1)
19
- vad_model, vad_utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
20
- model='silero_vad',
21
- force_reload=False,
22
- onnx=False)
23
- (get_speech_timestamps,
24
- save_audio,
25
- read_audio,
26
- VADIterator,
27
- collect_chunks) = vad_utils
28
-
29
12
  def main(src_dir):
30
13
  parser = argparse.ArgumentParser(description='Call the nkululeko framework.')
31
14
  parser.add_argument('--config', default='exp.ini', help='The base configuration')
@@ -60,7 +43,7 @@ def main(src_dir):
60
43
  util.debug(f'train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}')
61
44
 
62
45
  # segment
63
- segment_target = util.config_val('DATA', 'segment_target', '_seg')
46
+ segment_target = util.config_val('SEGMENT', 'target', '_seg')
64
47
  # this if a specific dataset is to be segmented
65
48
  # segment_db = util.config_val('DATA', 'segment', False)
66
49
  # if segment_db:
@@ -72,7 +55,8 @@ def main(src_dir):
72
55
  # name = f'{dataset}{segment_target}'
73
56
  # df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
74
57
 
75
- sample_selection = util.config_val('DATA', 'segment', 'all')
58
+ segmenter = util.config_val('SEGMENT', 'method', 'silero')
59
+ sample_selection = util.config_val('SEGMENT', 'sample_selection', 'all')
76
60
  if sample_selection=='all':
77
61
  df = pd.concat([expr.df_train, expr.df_test])
78
62
  elif sample_selection=='train':
@@ -81,26 +65,38 @@ def main(src_dir):
81
65
  df = expr.df_test
82
66
  else:
83
67
  util.error(f'unknown segmentation selection specifier {sample_selection}, should be [all | train | test]')
68
+ # if "duration" not in df.columns:
69
+ # df = df.drop(columns=['duration'], inplace=True)
70
+ util.debug(f'segmenting {sample_selection}: {df.shape[0]} samples with {segmenter}')
71
+ if segmenter=='silero':
72
+ from nkululeko.segmenting.seg_silero import Silero_segmenter
73
+ segmenter = Silero_segmenter()
74
+ df_seg = segmenter.segment_dataframe(df)
75
+
76
+ else:
77
+ util.error(f'unkown segmenter: {segmenter}')
84
78
 
85
- if "duration" not in df.columns:
86
- df = df.drop(columns=['duration'], inplace=True)
87
- util.debug(f'segmenting train and test set: {df.shape[0]} samples')
88
- df_seg = segment_dataframe(df)
89
79
  def calc_dur(x):
80
+ from datetime import datetime
90
81
  starts = x[1]
91
82
  ends = x[2]
92
83
  return (ends - starts).total_seconds()
93
84
  df_seg['duration'] = df_seg.index.to_series().map(lambda x:calc_dur(x))
85
+ if "duration" not in df.columns:
86
+ df['duration'] = df.index.to_series().map(lambda x:calc_dur(x))
87
+ num_before = df.shape[0]
88
+ num_after = df_seg.shape[0]
94
89
  dataname = '_'.join(expr.datasets.keys())
95
90
  name = f'{dataname}{segment_target}'
96
91
  df_seg.to_csv(f'{expr.data_dir}/{name}.csv')
92
+ from nkululeko.plots import Plots
93
+ plots = Plots()
94
+ plots.plot_durations(df, 'original_durations', sample_selection)
95
+ plots.plot_durations(df_seg, 'segmented_durations', sample_selection)
97
96
  print('')
98
- util.debug(f'saved {name}.csv to {expr.data_dir}, {df_seg.shape[0]} samples')
97
+ util.debug(f'saved {name}.csv to {expr.data_dir}, {num_after} samples (was {num_before})')
99
98
  print('DONE')
100
99
 
101
-
102
-
103
-
104
100
  def get_segmentation(file):
105
101
  # print(f'segmenting {file[0]}')
106
102
  print('.', end='')
@@ -5,7 +5,7 @@
5
5
  import nkululeko.glob_conf as glob_conf
6
6
  from nkululeko.util import Util
7
7
  import pandas as pd
8
- from nkululeko.dataset import Dataset
8
+ from nkululeko.data.dataset import Dataset
9
9
  from nkululeko.feature_extractor import FeatureExtractor
10
10
  from nkululeko.scaler import Scaler
11
11
  import numpy as np
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.59.1
3
+ Version: 0.61.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -253,6 +253,18 @@ Nkululeko can be used under the [MIT license](https://choosealicense.com/license
253
253
  Changelog
254
254
  =========
255
255
 
256
+ Version 0.61.0
257
+ --------------
258
+ * added HUBERT embeddings
259
+
260
+ Version 0.60.0
261
+ --------------
262
+ * some bugfixes
263
+ * new package structure
264
+ * fixed wav2vec2 bugs
265
+ * removed "cross_data" strategy
266
+
267
+
256
268
  Version 0.59.1
257
269
  --------------
258
270
  * bugfix, after fresh install, it seems some libraries have changed
@@ -0,0 +1,31 @@
1
+ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
+ nkululeko/augment.py,sha256=ebv5QebGD8wLzXInvusjn4kFlET6-yXkYoF132BrubQ,1750
3
+ nkululeko/balancer.py,sha256=64ftZN68sMDfkvuovCDHpAHmSJgCO6Kdk9bwmpSisec,12
4
+ nkululeko/cacheddataset.py,sha256=bSJ_SDg7TxL89YL_pJXp-sFvdUXJtHuBTd5KSTE4AkQ,955
5
+ nkululeko/constants.py,sha256=xSkfM3CWCYGD40D6GhHHCrxrw0JNK-372YdiRKMknWs,18
6
+ nkululeko/demo.py,sha256=nGP3fUDXuW1ZF12AzMpzRWXct0rdqYRJVNgA9B_QWwA,1821
7
+ nkululeko/demo_predictor.py,sha256=VVxE2lf5lTkAP5qElG5U2bK6SdDzQ2Jmf0Vn_yHpSro,2302
8
+ nkululeko/experiment.py,sha256=9qStgy31svY4bBVZOkuJ0JFjEQ1sIT2ibIdJ6IVlfTI,25063
9
+ nkululeko/explore.py,sha256=aemOk5XYw7axQEJQfdABEUxN3img0NYSb8l6W-nDzZY,2090
10
+ nkululeko/feature_extractor.py,sha256=2LqPIiDAoaBRhjcKik2hjBEBVBsLLxx8blQvTD43TRg,6324
11
+ nkululeko/file_checker.py,sha256=Nw05SIp7Ez1U9ZeFhNGz0XivwKr43hHg1WsfzKsrFPQ,3510
12
+ nkululeko/filter_data.py,sha256=g7giEShbA-dr2ekVycW5WurFG-UaopJvDZWylKNZtpM,6717
13
+ nkululeko/glob_conf.py,sha256=2Tl0NZQeVeydDO8T2tuJC2lCv99wvaTVRX9Dy1Mq_L4,237
14
+ nkululeko/modelrunner.py,sha256=zVDi2-UyjtmU0_Ltf4lnPcECVtukuDVuZaj4pydqOBY,5478
15
+ nkululeko/nkululeko.py,sha256=O2Zw7u-Mb7VP9MPxAlhdTkXV2lW2kETIuSJp7mfj_Tc,1671
16
+ nkululeko/plots.py,sha256=hoOFLbWXpV5jGDWHEpy345_4vpaGKGMAv2JwvpNUxkw,11454
17
+ nkululeko/predict.py,sha256=3ei4wn2by0p9Vkv7cllMcszmEjSM2vX0T6x_5rlgT28,1851
18
+ nkululeko/reporter.py,sha256=359aeQWt0ZGLseaJnOfafYG8BrwumiM2Q58DWiaoyWQ,10177
19
+ nkululeko/result.py,sha256=Ey5FPsAyfnQVtzO_J6_4hkOAZ191YWmF_vXxlgNjCdc,406
20
+ nkululeko/runmanager.py,sha256=ll04dEu5Y1nOi8QOtmSiw3oxzcXeASdQsg0t-vxCny8,6765
21
+ nkululeko/scaler.py,sha256=6NQHbSQZO9HIfhYNlliuDRywjaEH_FVKHRskTJ2xr90,3021
22
+ nkululeko/segment.py,sha256=GGyovnZ75Sqd8TgBH5fi3fjRkVw_ygqBQD46Yn6GVQ4,4660
23
+ nkululeko/syllable_nuclei.py,sha256=vK9dj5deqRnyEmlZmhFtKPzqKVGNCgTnWaG8UDITKNg,9913
24
+ nkululeko/test.py,sha256=BbHGliDChAXqMe2oA579dJpyZSlPGAm5997lX_POboQ,1372
25
+ nkululeko/test_predictor.py,sha256=QwdAVPHNew9w5PD_sPFhhWVDTYRAbUE6fkAp58X8Hjg,2410
26
+ nkululeko/util.py,sha256=gZrNTF4C7hKkEMCC_hoNkEAhAViWzWebP8LsHRew7s4,9731
27
+ nkululeko-0.61.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
28
+ nkululeko-0.61.0.dist-info/METADATA,sha256=LXJjW9KpGkPum60eGuYW__gl5QKXVRhnm6RySrKo2b8,21680
29
+ nkululeko-0.61.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
30
+ nkululeko-0.61.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
31
+ nkululeko-0.61.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.1)
2
+ Generator: bdist_wheel (0.41.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
nkululeko/ap_age.py DELETED
@@ -1,31 +0,0 @@
1
- """"
2
- A predictor for age.
3
- Currently based on audEERING's agender model.
4
- """
5
- from nkululeko.util import Util
6
- from nkululeko.feature_extractor import FeatureExtractor
7
- import ast
8
- import nkululeko.glob_conf as glob_conf
9
- class AgePredictor:
10
- """
11
- AgePredictor
12
- predicting age with the audEERING agender model
13
-
14
- """
15
- def __init__(self, df):
16
- self.df = df
17
- self.util = Util('agePredictor')
18
-
19
-
20
- def predict(self, split_selection):
21
- self.util.debug(f'predicting age for {split_selection} samples')
22
- feats_name = "_".join(ast.literal_eval(glob_conf.config['DATA']['databases']))
23
- self.feature_extractor = FeatureExtractor(self.df, ['agender_agender'], feats_name, split_selection)
24
- agender_df = self.feature_extractor.extract()
25
- pred_age = agender_df.age * 100
26
- # pred_gender = agender_df.drop('age', axis=1).idxmax(axis=1)
27
- return_df = self.df.copy()
28
- # return_df['gender_pred'] = pred_gender
29
- return_df['age_pred'] = pred_age.astype('int')
30
- return return_df
31
-