nkululeko 0.85.1__py3-none-any.whl → 0.85.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.85.1"
1
+ VERSION="0.85.2"
2
2
  SAMPLING_RATE = 16000
@@ -21,7 +21,7 @@ class Dataset_CSV(Dataset):
21
21
  # exp_root = self.util.config_val("EXP", "root", "")
22
22
  # data_file = os.path.join(exp_root, data_file)
23
23
  root = os.path.dirname(data_file)
24
- audio_path = self.util.config_val_data(self.name, "audio_path", "")
24
+ audio_path = self.util.config_val_data(self.name, "audio_path", "./")
25
25
  df = pd.read_csv(data_file)
26
26
  # special treatment for segmented dataframes with only one column:
27
27
  if "start" in df.columns and len(df.columns) == 4:
@@ -49,7 +49,8 @@ class Dataset_CSV(Dataset):
49
49
  .map(lambda x: root + "/" + audio_path + "/" + x)
50
50
  .values
51
51
  )
52
- df = df.set_index(df.index.set_levels(file_index, level="file"))
52
+ df = df.set_index(df.index.set_levels(
53
+ file_index, level="file"))
53
54
  else:
54
55
  if not isinstance(df, pd.DataFrame):
55
56
  df = pd.DataFrame(df)
@@ -63,7 +64,8 @@ class Dataset_CSV(Dataset):
63
64
  self.db = None
64
65
  self.got_target = True
65
66
  self.is_labeled = self.got_target
66
- self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
67
+ self.start_fresh = eval(
68
+ self.util.config_val("DATA", "no_reuse", "False"))
67
69
  is_index = False
68
70
  try:
69
71
  if self.is_labeled and not "class_label" in self.df.columns:
@@ -90,7 +92,8 @@ class Dataset_CSV(Dataset):
90
92
  f" {self.got_gender}, got age: {self.got_age}"
91
93
  )
92
94
  self.util.debug(r_string)
93
- glob_conf.report.add_item(ReportItem("Data", "Loaded report", r_string))
95
+ glob_conf.report.add_item(ReportItem(
96
+ "Data", "Loaded report", r_string))
94
97
 
95
98
  def prepare(self):
96
99
  super().prepare()
nkululeko/experiment.py CHANGED
@@ -109,14 +109,15 @@ class Experiment:
109
109
  # print keys/column
110
110
  dbs = ",".join(list(self.datasets.keys()))
111
111
  labels = self.util.config_val("DATA", "labels", False)
112
+ auto_labels = list(
113
+ next(iter(self.datasets.values())).df[self.target].unique()
114
+ )
112
115
  if labels:
113
116
  self.labels = ast.literal_eval(labels)
114
117
  self.util.debug(f"Target labels (from config): {labels}")
115
118
  else:
116
- self.labels = list(
117
- next(iter(self.datasets.values())).df[self.target].unique()
118
- )
119
- self.util.debug(f"Target labels (from database): {labels}")
119
+ self.labels = auto_labels
120
+ self.util.debug(f"Target labels (from database): {auto_labels}")
120
121
  glob_conf.set_labels(self.labels)
121
122
  self.util.debug(f"loaded databases {dbs}")
122
123
 
@@ -159,7 +160,8 @@ class Experiment:
159
160
  data.split()
160
161
  data.prepare_labels()
161
162
  self.df_test = pd.concat(
162
- [self.df_test, self.util.make_segmented_index(data.df_test)]
163
+ [self.df_test, self.util.make_segmented_index(
164
+ data.df_test)]
163
165
  )
164
166
  self.df_test.is_labeled = data.is_labeled
165
167
  self.df_test.got_gender = self.got_gender
@@ -260,7 +262,8 @@ class Experiment:
260
262
  test_cats = self.df_test[self.target].unique()
261
263
  else:
262
264
  # if there is no target, copy a dummy label
263
- self.df_test = self._add_random_target(self.df_test).astype("str")
265
+ self.df_test = self._add_random_target(
266
+ self.df_test).astype("str")
264
267
  train_cats = self.df_train[self.target].unique()
265
268
  # print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
266
269
  # print(f"train_cats with target {self.target}: {train_cats}")
@@ -268,7 +271,8 @@ class Experiment:
268
271
  if type(test_cats) == np.ndarray:
269
272
  self.util.debug(f"Categories test (nd.array): {test_cats}")
270
273
  else:
271
- self.util.debug(f"Categories test (list): {list(test_cats)}")
274
+ self.util.debug(
275
+ f"Categories test (list): {list(test_cats)}")
272
276
  if type(train_cats) == np.ndarray:
273
277
  self.util.debug(f"Categories train (nd.array): {train_cats}")
274
278
  else:
@@ -291,7 +295,8 @@ class Experiment:
291
295
 
292
296
  target_factor = self.util.config_val("DATA", "target_divide_by", False)
293
297
  if target_factor:
294
- self.df_test[self.target] = self.df_test[self.target] / float(target_factor)
298
+ self.df_test[self.target] = self.df_test[self.target] / \
299
+ float(target_factor)
295
300
  self.df_train[self.target] = self.df_train[self.target] / float(
296
301
  target_factor
297
302
  )
@@ -314,14 +319,16 @@ class Experiment:
314
319
  def plot_distribution(self, df_labels):
315
320
  """Plot the distribution of samples and speaker per target class and biological sex"""
316
321
  plot = Plots()
317
- sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
322
+ sample_selection = self.util.config_val(
323
+ "EXPL", "sample_selection", "all")
318
324
  plot.plot_distributions(df_labels)
319
325
  if self.got_speaker:
320
326
  plot.plot_distributions_speaker(df_labels)
321
327
 
322
328
  def extract_test_feats(self):
323
329
  self.feats_test = pd.DataFrame()
324
- feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["tests"]))
330
+ feats_name = "_".join(ast.literal_eval(
331
+ glob_conf.config["DATA"]["tests"]))
325
332
  feats_types = self.util.config_val_list("FEATS", "type", ["os"])
326
333
  self.feature_extractor = FeatureExtractor(
327
334
  self.df_test, feats_types, feats_name, "test"
@@ -338,7 +345,8 @@ class Experiment:
338
345
 
339
346
  """
340
347
  df_train, df_test = self.df_train, self.df_test
341
- feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
348
+ feats_name = "_".join(ast.literal_eval(
349
+ glob_conf.config["DATA"]["databases"]))
342
350
  self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
343
351
  feats_types = self.util.config_val_list("FEATS", "type", [])
344
352
  # for some models no features are needed
@@ -372,7 +380,8 @@ class Experiment:
372
380
  f"test feats ({self.feats_test.shape[0]}) != test labels"
373
381
  f" ({self.df_test.shape[0]})"
374
382
  )
375
- self.df_test = self.df_test[self.df_test.index.isin(self.feats_test.index)]
383
+ self.df_test = self.df_test[self.df_test.index.isin(
384
+ self.feats_test.index)]
376
385
  self.util.warn(f"mew test labels shape: {self.df_test.shape[0]}")
377
386
 
378
387
  self._check_scale()
@@ -383,7 +392,8 @@ class Experiment:
383
392
  """
384
393
  from nkululeko.augmenting.augmenter import Augmenter
385
394
 
386
- sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
395
+ sample_selection = self.util.config_val(
396
+ "AUGMENT", "sample_selection", "all")
387
397
  if sample_selection == "all":
388
398
  df = pd.concat([self.df_train, self.df_test])
389
399
  elif sample_selection == "train":
@@ -478,7 +488,8 @@ class Experiment:
478
488
  """
479
489
  from nkululeko.augmenting.randomsplicer import Randomsplicer
480
490
 
481
- sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
491
+ sample_selection = self.util.config_val(
492
+ "AUGMENT", "sample_selection", "all")
482
493
  if sample_selection == "all":
483
494
  df = pd.concat([self.df_train, self.df_test])
484
495
  elif sample_selection == "train":
@@ -499,7 +510,8 @@ class Experiment:
499
510
  plot_feats = eval(
500
511
  self.util.config_val("EXPL", "feature_distributions", "False")
501
512
  )
502
- sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
513
+ sample_selection = self.util.config_val(
514
+ "EXPL", "sample_selection", "all")
503
515
  # get the data labels
504
516
  if sample_selection == "all":
505
517
  df_labels = pd.concat([self.df_train, self.df_test])
@@ -562,7 +574,8 @@ class Experiment:
562
574
  for scat_target in scat_targets:
563
575
  if self.util.is_categorical(df_labels[scat_target]):
564
576
  for scatter in scatters:
565
- plots.scatter_plot(df_feats, df_labels, scat_target, scatter)
577
+ plots.scatter_plot(
578
+ df_feats, df_labels, scat_target, scatter)
566
579
  else:
567
580
  self.util.debug(
568
581
  f"{self.name}: binning continuous variable to categories"
@@ -651,7 +664,8 @@ class Experiment:
651
664
  preds = best.preds
652
665
  speakers = self.df_test.speaker.values
653
666
  print(f"{len(truths)} {len(preds)} {len(speakers) }")
654
- df = pd.DataFrame(data={"truth": truths, "pred": preds, "speaker": speakers})
667
+ df = pd.DataFrame(
668
+ data={"truth": truths, "pred": preds, "speaker": speakers})
655
669
  plot_name = "result_combined_per_speaker"
656
670
  self.util.debug(
657
671
  f"plotting speaker combination ({function}) confusion matrix to"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.85.1
3
+ Version: 0.85.2
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -333,6 +333,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
333
333
  Changelog
334
334
  =========
335
335
 
336
+ Version 0.85.2
337
+ --------------
338
+ * added data, and automatic task label detection
339
+
336
340
  Version 0.85.1
337
341
  --------------
338
342
  * fixed bug in model_finetuned that label_num was constant 2
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=WnTSXQjJmWE-IrXcNSEa5FFV_83-z0EOGXa9trq00uE,39
5
+ nkululeko/constants.py,sha256=l15EMSj8vmejkCKCzQ6jMrgj5PuNrcHIREXt9kbSw7U,39
6
6
  nkululeko/demo.py,sha256=8bl15Kitoesnz8oa8yrs52T6YCSOhWbbq9PnZ8Hj6D0,3232
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
9
- nkululeko/experiment.py,sha256=9Nw23b7sVOciH8IaOuAAKbY7otXYSsPrj_rQCA_U9cc,30465
9
+ nkululeko/experiment.py,sha256=ZsSWdasWUyIBF_4vxb4FxvHs42pytG7ErUOABA-WWTo,30722
10
10
  nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
11
11
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
12
12
  nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
@@ -46,7 +46,7 @@ nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzW
46
46
  nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
47
47
  nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
48
  nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,27650
49
- nkululeko/data/dataset_csv.py,sha256=uLa7jW4w2ft299NkpXZMD361kPHF8oSYoIZ_ucxhuOM,3884
49
+ nkululeko/data/dataset_csv.py,sha256=vTnjIc2UdSJT7foL-ltE9MWrZTCg0nplwKdEtMPxt2o,3933
50
50
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
52
52
  nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
@@ -106,8 +106,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
106
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
107
107
  nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
108
108
  nkululeko/utils/util.py,sha256=b1IHFucRNuF9Iyv5IJeK4AEg0Rga0xKG80UM5GWWdHA,13816
109
- nkululeko-0.85.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
110
- nkululeko-0.85.1.dist-info/METADATA,sha256=RonY9PdKyHjwYsZ3T9TgEs1JNnY1qbMdDr-Sp6kcCW8,36591
111
- nkululeko-0.85.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
112
- nkululeko-0.85.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
113
- nkululeko-0.85.1.dist-info/RECORD,,
109
+ nkululeko-0.85.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
110
+ nkululeko-0.85.2.dist-info/METADATA,sha256=RVGREhA1jakUtQ707C0ecklnUZwx4skVHV0UbPwEsn0,36671
111
+ nkululeko-0.85.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
112
+ nkululeko-0.85.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
113
+ nkululeko-0.85.2.dist-info/RECORD,,