nkululeko 0.45.0__py3-none-any.whl → 0.45.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1 +1 @@
1
- VERSION = '0.45.0'
1
+ VERSION = '0.45.2'
nkululeko/experiment.py CHANGED
@@ -247,17 +247,20 @@ class Experiment:
247
247
  def plot_distribution(self):
248
248
  """Plot the distribution of samples and speaker per target class and biological sex"""
249
249
  plot = Plots()
250
- if self.util.exp_is_classification():
251
- # self.df_train['labels'] = self.label_encoder.inverse_transform(self.df_train[self.target])
252
- # if self.df_test.is_labeled:
253
- # self.df_test['labels'] = self.label_encoder.inverse_transform(self.df_test[self.target])
254
- if self.df_test.shape[0] > 0:
255
- plot.describe_df('dev_set', self.df_test, self.target, f'test_distplot')
256
- plot.describe_df('train_set', self.df_train, self.target, f'train_distplot')
250
+ sample_selection = self.util.config_val('EXPL', 'sample_selection', 'all')
251
+ if sample_selection=='all':
252
+ df_labels = pd.concat([self.df_train, self.df_test])
253
+ self.util.copy_flags(self.df_train, df_labels)
254
+ elif sample_selection=='train':
255
+ df_labels = self.df_train
256
+ self.util.copy_flags(self.df_train, df_labels)
257
+ elif sample_selection=='test':
258
+ df_labels = self.df_test
259
+ self.util.copy_flags(self.df_test, df_labels)
257
260
  else:
258
- if self.df_test.shape[0] > 0:
259
- plot.describe_df('dev_set', self.df_test, self.target, f'test_distplot')
260
- plot.describe_df('train_set', self.df_train, self.target, f'train_distplot')
261
+ self.util.error(f'unkown sample selection specifier {sample_selection}, should be [all | train | test]')
262
+
263
+ plot.describe_df(f'{sample_selection}_set', df_labels, self.target, f'{sample_selection}_distplot')
261
264
 
262
265
  def extract_test_feats(self):
263
266
  self.feats_test = pd.DataFrame()
@@ -303,7 +306,7 @@ class Experiment:
303
306
  # self.df_train = self.df_train.append(df_train_aug)
304
307
 
305
308
 
306
- def analyse_features(self):
309
+ def analyse_features(self, needs_feats):
307
310
  """
308
311
  Do a feature exploration
309
312
 
@@ -311,7 +314,8 @@ class Experiment:
311
314
 
312
315
  if self.util.config_val('EXPL', 'value_counts', False):
313
316
  self.plot_distribution()
314
-
317
+ if not needs_feats:
318
+ return
315
319
  sample_selection = self.util.config_val('EXPL', 'sample_selection', 'False')
316
320
  if sample_selection=='all':
317
321
  df_feats = pd.concat([self.feats_train, self.feats_test])
@@ -325,7 +329,7 @@ class Experiment:
325
329
  elif sample_selection=='False':
326
330
  pass
327
331
  else:
328
- self.util.error(f'unkown feature_distribution specifier {sample_selection}, should be [all | train | test]')
332
+ self.util.error(f'unkown sample selection specifier {sample_selection}, should be [all | train | test]')
329
333
  if sample_selection in ('all', 'train', 'test'):
330
334
  feat_analyser = FeatureAnalyser(sample_selection, df_labels, df_feats)
331
335
  feat_analyser.analyse()
@@ -336,8 +340,8 @@ class Experiment:
336
340
  scatters = ast.literal_eval(glob_conf.config['EXPL']['scatter'])
337
341
  if self.util.exp_is_classification():
338
342
  plots = Plots()
339
- all_feats =self.feats_train.append(self.feats_test)
340
- all_labels = self.df_train['class_label'].append(self.df_test['class_label'])
343
+ all_feats = pd.concat([self.feats_train, self.feats_test])
344
+ all_labels = pd.concat([self.df_train['class_label'], self.df_test['class_label']])
341
345
  for scatter in scatters:
342
346
  plots.scatter_plot(all_feats, all_labels, scatter)
343
347
  else:
nkululeko/explore.py CHANGED
@@ -42,12 +42,13 @@ def main(src_dir):
42
42
  scatter = eval(util.config_val('EXPL', 'scatter', 'False'))
43
43
  model_type = util.config_val('EXPL', 'model', False)
44
44
  plot_tree = eval(util.config_val('EXPL', 'plot_tree', 'False'))
45
+ needs_feats = False
45
46
  if plot_feats or tsne or scatter or model_type or plot_tree:
46
47
  # these investigations need features to explore
47
48
  expr.extract_feats()
48
-
49
+ needs_feats = True
49
50
  # explore
50
- expr.analyse_features()
51
+ expr.analyse_features(needs_feats)
51
52
 
52
53
  print('DONE')
53
54
 
nkululeko/util.py CHANGED
@@ -226,4 +226,9 @@ class Util:
226
226
  elif format == 'csv':
227
227
  return audformat.utils.read_csv(name)
228
228
  else:
229
- self.error(f'unkown store format: {format}')
229
+ self.error(f'unkown store format: {format}')
230
+
231
+ def copy_flags(self, df_source, df_target):
232
+ df_target.is_labeled = df_source.is_labeled
233
+ df_target.got_gender = df_source.got_gender
234
+ df_target.got_speaker = df_source.got_speaker
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.45.0
3
+ Version: 0.45.2
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -148,7 +148,6 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
148
148
  * [Combine feature sets](http://blog.syntheticspeech.de/2022/06/30/how-to-combine-feature-sets-with-nkululeko/)
149
149
  * [Classifying continuous variables](http://blog.syntheticspeech.de/2022/01/26/nkululeko-classifying-continuous-variables/)
150
150
  * [Try out / demo a trained model](http://blog.syntheticspeech.de/2022/01/24/nkululeko-try-out-demo-a-trained-model/)
151
- * [Plot distributions of feature values](http://blog.syntheticspeech.de/2023/02/16/nkululeko-how-to-plot-distributions-of-feature-values/)
152
151
  * [Perform cross database experiments](http://blog.syntheticspeech.de/2021/10/05/nkululeko-perform-cross-database-experiments/)
153
152
  * [Meta parameter optimization](http://blog.syntheticspeech.de/2021/09/03/perform-optimization-with-nkululeko/)
154
153
  * [How to set up wav2vec embedding](http://blog.syntheticspeech.de/2021/12/03/how-to-set-up-wav2vec-embedding-for-nkululeko/)
@@ -242,6 +241,13 @@ Version 0.44.1
242
241
  * bugfixing: feature importance: https://github.com/felixbur/nkululeko/issues/23
243
242
  * bugfixing: loading csv database with filewise index https://github.com/felixbur/nkululeko/issues/24
244
243
 
244
+ Version 0.45.2
245
+ --------------
246
+ * added sample_selection for sample distribution plots
247
+
248
+ Version 0.45.1
249
+ --------------
250
+ * fixed dataframe.append bug
245
251
 
246
252
  Version 0.45.0
247
253
  --------------
@@ -3,14 +3,14 @@ nkululeko/augment.py,sha256=J-HxHKU7u1xZdIiVw_abYVYWOUmZI84HMQNQpcoYk9s,1385
3
3
  nkululeko/augmenter.py,sha256=whDW3mnda33Wl9TUk2yG_My6507PFCqCzBgy0027Q2c,2399
4
4
  nkululeko/balancer.py,sha256=64ftZN68sMDfkvuovCDHpAHmSJgCO6Kdk9bwmpSisec,12
5
5
  nkululeko/cacheddataset.py,sha256=bSJ_SDg7TxL89YL_pJXp-sFvdUXJtHuBTd5KSTE4AkQ,955
6
- nkululeko/constants.py,sha256=DvIzFa8FFrnFyfxPyiG3RFaXtX4c_fNT94jB-Kwoa-M,19
6
+ nkululeko/constants.py,sha256=UCi6ILbrogDPyhI2hfa7nLbmAUE0U_qAdhqXFjpYGkI,19
7
7
  nkululeko/dataset.py,sha256=BRjWXtg5FWfg-mhBQABStgSB8bUj67VAl9h1Zdd5U0Q,21627
8
8
  nkululeko/dataset_csv.py,sha256=vWhs72Nc5kDDbPw90EIdAlZpX9VUxreFp509gqmOB6M,1908
9
9
  nkululeko/dataset_ravdess.py,sha256=pTt98sr4_egdUCv2fWepkZTlkQ6x3A2YshO_n302DNg,537
10
10
  nkululeko/demo.py,sha256=Y5CscOdEuUC7V4QBDCem-Rvdc5CYd9zGEl80rqFPQ_M,1789
11
11
  nkululeko/demo_predictor.py,sha256=hYuvvKyW-DxbORAq1Y63owqhRgq-Bl8qPOymVISvO7M,2286
12
- nkululeko/experiment.py,sha256=NwoiyCXQI7RBVzM1-SikIf0LaxyaB6bZvdjJmAF7DEE,20413
13
- nkululeko/explore.py,sha256=EbNXDzsF6PY1lxljSTvox9LeHiOf4cei5gqppfxQuDQ,1873
12
+ nkululeko/experiment.py,sha256=It6uRIFl3ObAsJscVFS5II3mdVoZG3GC8e2hy_RwOUM,20481
13
+ nkululeko/explore.py,sha256=3mx0iuGbP1w3cteG0DM0QhKZnavOeW5OcV-vzfopVuU,1934
14
14
  nkululeko/feats_analyser.py,sha256=3ECjWSbsEarnoyvQf-dsW5ax7gbf7zukf89M_DTB15I,3726
15
15
  nkululeko/feats_audmodel.py,sha256=w-cV9Fgk_9wAV5u4ELzxvB9KM06IptPFneDenXXEmV0,2748
16
16
  nkululeko/feats_audmodel_dim.py,sha256=kug4wKRiXjceAXyT4XA8WJwXKIFNP-hTy0g5HAELVcQ,2739
@@ -52,9 +52,9 @@ nkululeko/runmanager.py,sha256=No6l8422bAI-GmPchA41KnTIFMFe4J3kdcBlj2tuaiU,6753
52
52
  nkululeko/scaler.py,sha256=ryYFGxlBqlAVGM5eaiRQTO4YYfEk8fKpAHePsaz0odk,3013
53
53
  nkululeko/test.py,sha256=4u0W18KqfpFY6jHqmTyGTGbC3StkyIOP3mbwe7kFUxU,1340
54
54
  nkululeko/test_predictor.py,sha256=4XGFa9AsHNtOkGdY0X23RxPpESyAlICqoXqR-YidHXA,2315
55
- nkululeko/util.py,sha256=AjVH94iYD9pimBXmZlzCDSJq2ATDsDlFsxyRRlD8n28,8174
56
- nkululeko-0.45.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
57
- nkululeko-0.45.0.dist-info/METADATA,sha256=8yAGOXZAz5bd_T07uWtOMlWgz64DEClInEhHJJGQBuA,17328
58
- nkululeko-0.45.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
59
- nkululeko-0.45.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
60
- nkululeko-0.45.0.dist-info/RECORD,,
55
+ nkululeko/util.py,sha256=06iTrqxStMpr0CU1D0neywkfZd-N5SMswOvg0OsKfNY,8381
56
+ nkululeko-0.45.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
57
+ nkululeko-0.45.2.dist-info/METADATA,sha256=mOe14oOubo5mE7xeZNyxdHQxOwxiNvbW9kNDJ76L4Ks,17334
58
+ nkululeko-0.45.2.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
59
+ nkululeko-0.45.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
60
+ nkululeko-0.45.2.dist-info/RECORD,,