nkululeko 0.86.7__py3-none-any.whl → 0.86.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.86.7"
1
+ VERSION="0.86.8"
2
2
  SAMPLING_RATE = 16000
nkululeko/experiment.py CHANGED
@@ -5,13 +5,13 @@ import pickle
5
5
  import random
6
6
  import time
7
7
 
8
+ import audeer
9
+ import audformat
8
10
  import numpy as np
9
11
  import pandas as pd
10
12
  from sklearn.preprocessing import LabelEncoder
11
13
 
12
- import audeer
13
- import audformat
14
-
14
+ import nkululeko.glob_conf as glob_conf
15
15
  from nkululeko.data.dataset import Dataset
16
16
  from nkululeko.data.dataset_csv import Dataset_CSV
17
17
  from nkululeko.demo_predictor import Demo_predictor
@@ -19,8 +19,6 @@ from nkululeko.feat_extract.feats_analyser import FeatureAnalyser
19
19
  from nkululeko.feature_extractor import FeatureExtractor
20
20
  from nkululeko.file_checker import FileChecker
21
21
  from nkululeko.filter_data import DataFilter
22
- from nkululeko.filter_data import filter_min_dur
23
- import nkululeko.glob_conf as glob_conf
24
22
  from nkululeko.plots import Plots
25
23
  from nkululeko.reporting.report import Report
26
24
  from nkululeko.runmanager import Runmanager
@@ -109,7 +107,8 @@ class Experiment:
109
107
  # print keys/column
110
108
  dbs = ",".join(list(self.datasets.keys()))
111
109
  labels = self.util.config_val("DATA", "labels", False)
112
- auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
110
+ auto_labels = list(
111
+ next(iter(self.datasets.values())).df[self.target].unique())
113
112
  if labels:
114
113
  self.labels = ast.literal_eval(labels)
115
114
  self.util.debug(f"Using target labels (from config): {labels}")
@@ -159,7 +158,8 @@ class Experiment:
159
158
  data.split()
160
159
  data.prepare_labels()
161
160
  self.df_test = pd.concat(
162
- [self.df_test, self.util.make_segmented_index(data.df_test)]
161
+ [self.df_test, self.util.make_segmented_index(
162
+ data.df_test)]
163
163
  )
164
164
  self.df_test.is_labeled = data.is_labeled
165
165
  self.df_test.got_gender = self.got_gender
@@ -260,7 +260,8 @@ class Experiment:
260
260
  test_cats = self.df_test[self.target].unique()
261
261
  else:
262
262
  # if there is no target, copy a dummy label
263
- self.df_test = self._add_random_target(self.df_test).astype("str")
263
+ self.df_test = self._add_random_target(
264
+ self.df_test).astype("str")
264
265
  train_cats = self.df_train[self.target].unique()
265
266
  # print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
266
267
  # print(f"train_cats with target {self.target}: {train_cats}")
@@ -268,7 +269,8 @@ class Experiment:
268
269
  if type(test_cats) == np.ndarray:
269
270
  self.util.debug(f"Categories test (nd.array): {test_cats}")
270
271
  else:
271
- self.util.debug(f"Categories test (list): {list(test_cats)}")
272
+ self.util.debug(
273
+ f"Categories test (list): {list(test_cats)}")
272
274
  if type(train_cats) == np.ndarray:
273
275
  self.util.debug(f"Categories train (nd.array): {train_cats}")
274
276
  else:
@@ -291,7 +293,8 @@ class Experiment:
291
293
 
292
294
  target_factor = self.util.config_val("DATA", "target_divide_by", False)
293
295
  if target_factor:
294
- self.df_test[self.target] = self.df_test[self.target] / float(target_factor)
296
+ self.df_test[self.target] = self.df_test[self.target] / \
297
+ float(target_factor)
295
298
  self.df_train[self.target] = self.df_train[self.target] / float(
296
299
  target_factor
297
300
  )
@@ -314,14 +317,16 @@ class Experiment:
314
317
  def plot_distribution(self, df_labels):
315
318
  """Plot the distribution of samples and speaker per target class and biological sex"""
316
319
  plot = Plots()
317
- sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
320
+ sample_selection = self.util.config_val(
321
+ "EXPL", "sample_selection", "all")
318
322
  plot.plot_distributions(df_labels)
319
323
  if self.got_speaker:
320
324
  plot.plot_distributions_speaker(df_labels)
321
325
 
322
326
  def extract_test_feats(self):
323
327
  self.feats_test = pd.DataFrame()
324
- feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["tests"]))
328
+ feats_name = "_".join(ast.literal_eval(
329
+ glob_conf.config["DATA"]["tests"]))
325
330
  feats_types = self.util.config_val_list("FEATS", "type", ["os"])
326
331
  self.feature_extractor = FeatureExtractor(
327
332
  self.df_test, feats_types, feats_name, "test"
@@ -338,9 +343,17 @@ class Experiment:
338
343
 
339
344
  """
340
345
  df_train, df_test = self.df_train, self.df_test
341
- feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
346
+ feats_name = "_".join(ast.literal_eval(
347
+ glob_conf.config["DATA"]["databases"]))
342
348
  self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
343
- feats_types = self.util.config_val_list("FEATS", "type", [])
349
+ feats_types = self.util.config_val("FEATS", "type", "os")
350
+ # Ensure feats_types is always a list of strings
351
+ if isinstance(feats_types, str):
352
+ if feats_types.startswith("[") and feats_types.endswith("]"):
353
+ feats_types = ast.literal_eval(feats_types)
354
+ else:
355
+ feats_types = [feats_types]
356
+ # print(f"feats_types: {feats_types}")
344
357
  # for some models no features are needed
345
358
  if len(feats_types) == 0:
346
359
  self.util.debug("no feature extractor specified.")
@@ -372,7 +385,8 @@ class Experiment:
372
385
  f"test feats ({self.feats_test.shape[0]}) != test labels"
373
386
  f" ({self.df_test.shape[0]})"
374
387
  )
375
- self.df_test = self.df_test[self.df_test.index.isin(self.feats_test.index)]
388
+ self.df_test = self.df_test[self.df_test.index.isin(
389
+ self.feats_test.index)]
376
390
  self.util.warn(f"new test labels shape: {self.df_test.shape[0]}")
377
391
 
378
392
  self._check_scale()
@@ -387,7 +401,8 @@ class Experiment:
387
401
  """Augment the selected samples."""
388
402
  from nkululeko.augmenting.augmenter import Augmenter
389
403
 
390
- sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
404
+ sample_selection = self.util.config_val(
405
+ "AUGMENT", "sample_selection", "all")
391
406
  if sample_selection == "all":
392
407
  df = pd.concat([self.df_train, self.df_test])
393
408
  elif sample_selection == "train":
@@ -482,7 +497,8 @@ class Experiment:
482
497
  """
483
498
  from nkululeko.augmenting.randomsplicer import Randomsplicer
484
499
 
485
- sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
500
+ sample_selection = self.util.config_val(
501
+ "AUGMENT", "sample_selection", "all")
486
502
  if sample_selection == "all":
487
503
  df = pd.concat([self.df_train, self.df_test])
488
504
  elif sample_selection == "train":
@@ -503,7 +519,8 @@ class Experiment:
503
519
  plot_feats = eval(
504
520
  self.util.config_val("EXPL", "feature_distributions", "False")
505
521
  )
506
- sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
522
+ sample_selection = self.util.config_val(
523
+ "EXPL", "sample_selection", "all")
507
524
  # get the data labels
508
525
  if sample_selection == "all":
509
526
  df_labels = pd.concat([self.df_train, self.df_test])
@@ -566,7 +583,8 @@ class Experiment:
566
583
  for scat_target in scat_targets:
567
584
  if self.util.is_categorical(df_labels[scat_target]):
568
585
  for scatter in scatters:
569
- plots.scatter_plot(df_feats, df_labels, scat_target, scatter)
586
+ plots.scatter_plot(
587
+ df_feats, df_labels, scat_target, scatter)
570
588
  else:
571
589
  self.util.debug(
572
590
  f"{self.name}: binning continuous variable to categories"
@@ -657,7 +675,8 @@ class Experiment:
657
675
  preds = best.preds
658
676
  speakers = self.df_test.speaker.values
659
677
  print(f"{len(truths)} {len(preds)} {len(speakers) }")
660
- df = pd.DataFrame(data={"truth": truths, "pred": preds, "speaker": speakers})
678
+ df = pd.DataFrame(
679
+ data={"truth": truths, "pred": preds, "speaker": speakers})
661
680
  plot_name = "result_combined_per_speaker"
662
681
  self.util.debug(
663
682
  f"plotting speaker combination ({function}) confusion matrix to"
@@ -733,7 +752,6 @@ class Experiment:
733
752
  if model.is_ann():
734
753
  print("converting to onnx from torch")
735
754
  else:
736
- from skl2onnx import to_onnx
737
755
 
738
756
  print("converting to onnx from sklearn")
739
757
  # save the rest
@@ -39,16 +39,20 @@ class FeatureExtractor:
39
39
  self.feats = pd.DataFrame()
40
40
  for feats_type in self.feats_types:
41
41
  store_name = f"{self.data_name}_{feats_type}"
42
- self.feat_extractor = self._get_feat_extractor(store_name, feats_type)
42
+ self.feat_extractor = self._get_feat_extractor(
43
+ store_name, feats_type)
43
44
  self.feat_extractor.extract()
44
45
  self.feat_extractor.filter()
45
- self.feats = pd.concat([self.feats, self.feat_extractor.df], axis=1)
46
+ self.feats = pd.concat(
47
+ [self.feats, self.feat_extractor.df], axis=1)
46
48
  return self.feats
47
49
 
48
50
  def extract_sample(self, signal, sr):
49
51
  return self.feat_extractor.extract_sample(signal, sr)
50
52
 
51
53
  def _get_feat_extractor(self, store_name, feats_type):
54
+ if isinstance(feats_type, list) and len(feats_type) == 1:
55
+ feats_type = feats_type[0]
52
56
  feat_extractor_class = self._get_feat_extractor_class(feats_type)
53
57
  if feat_extractor_class is None:
54
58
  self.util.error(f"unknown feats_type: {feats_type}")
@@ -103,13 +107,15 @@ class FeatureExtractor:
103
107
  prefix, _, ext = feats_type.partition("-")
104
108
  from importlib import import_module
105
109
 
106
- module = import_module(f"nkululeko.feat_extract.feats_{prefix.lower()}")
110
+ module = import_module(
111
+ f"nkululeko.feat_extract.feats_{prefix.lower()}")
107
112
  class_name = f"{prefix.capitalize()}"
108
113
  return getattr(module, class_name)
109
114
 
110
115
  def _get_feat_extractor_by_name(self, feats_type):
111
116
  from importlib import import_module
112
117
 
113
- module = import_module(f"nkululeko.feat_extract.feats_{feats_type.lower()}")
118
+ module = import_module(
119
+ f"nkululeko.feat_extract.feats_{feats_type.lower()}")
114
120
  class_name = f"{feats_type.capitalize()}Set"
115
121
  return getattr(module, class_name)
nkululeko/utils/util.py CHANGED
@@ -5,15 +5,15 @@ import os.path
5
5
  import pickle
6
6
  import sys
7
7
 
8
- import numpy as np
9
- import pandas as pd
10
-
11
8
  import audeer
12
9
  import audformat
10
+ import numpy as np
11
+ import pandas as pd
13
12
 
14
13
 
15
14
  class Util:
16
- # a list of words that need not to be warned upon if default values are used
15
+ # a list of words that need not to be warned upon if default values are
16
+ # used
17
17
  stopvals = [
18
18
  "all",
19
19
  False,
@@ -40,7 +40,8 @@ class Util:
40
40
  self.got_data_roots = self.config_val(
41
41
  "DATA", "root_folders", False)
42
42
  if self.got_data_roots:
43
- # if there is a global data rootfolder file, read from there
43
+ # if there is a global data rootfolder file, read from
44
+ # there
44
45
  if not os.path.isfile(self.got_data_roots):
45
46
  self.error(f"no such file: {self.got_data_roots}")
46
47
  self.data_roots = configparser.ConfigParser()
@@ -107,16 +108,17 @@ class Util:
107
108
  if self.got_data_roots:
108
109
  try:
109
110
  if len(key) > 0:
110
- return self.data_roots["DATA"][dataset + "." + key].strip("'\"")
111
+ return self.data_roots["DATA"][dataset +
112
+ "." + key].strip("'\"")
111
113
  else:
112
114
  return self.data_roots["DATA"][dataset].strip("'\"")
113
115
  except KeyError:
114
- if not default in self.stopvals:
116
+ if default not in self.stopvals:
115
117
  self.debug(
116
- f"value for {key} not found, using default:" f" {default}"
117
- )
118
+ f"value for {key} not found, using default:"
119
+ f" {default}")
118
120
  return default
119
- if not default in self.stopvals:
121
+ if default not in self.stopvals:
120
122
  self.debug(
121
123
  f"value for {key} not found, using default: {default}")
122
124
  return default
@@ -182,7 +184,7 @@ class Util:
182
184
 
183
185
  def get_feattype_name(self):
184
186
  """
185
- Get a string as name from all feature sets that are useed
187
+ Get a string as name from all feature sets that are used
186
188
  """
187
189
  return "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
188
190
 
@@ -205,7 +207,12 @@ class Util:
205
207
  def get_model_description(self):
206
208
  mt = ""
207
209
  mt = f'{self.config["MODEL"]["type"]}'
208
- ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
210
+ # ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
211
+ ft_value = self.config["FEATS"]["type"]
212
+ if isinstance(ft_value, str) and ft_value.startswith("[") and ft_value.endswith("]"):
213
+ ft = "_".join(ast.literal_eval(ft_value))
214
+ else:
215
+ ft = ft_value
209
216
  ft += "_"
210
217
  layer_string = ""
211
218
  layer_s = self.config_val("MODEL", "layers", False)
@@ -230,9 +237,8 @@ class Util:
230
237
  ["FEATS", "wav2vec2.layer"],
231
238
  ]
232
239
  for option in options:
233
- return_string += self._get_value_descript(option[0], option[1]).replace(
234
- ".", "-"
235
- )
240
+ return_string += self._get_value_descript(
241
+ option[0], option[1]).replace(".", "-")
236
242
  return return_string
237
243
 
238
244
  def get_plot_name(self):
@@ -286,7 +292,7 @@ class Util:
286
292
  try:
287
293
  return ast.literal_eval(self.config[section][key])
288
294
  except KeyError:
289
- if not default in self.stopvals:
295
+ if default not in self.stopvals:
290
296
  self.debug(
291
297
  f"value for {key} not found, using default: {default}")
292
298
  return default
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.86.7
3
+ Version: 0.86.8
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -343,6 +343,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
343
343
  Changelog
344
344
  =========
345
345
 
346
+ Version 0.86.8
347
+ --------------
348
+ * handle single feature sets as strings in the config
349
+
346
350
  Version 0.86.7
347
351
  --------------
348
352
  * handles now audformat tables where the target is in a file index
@@ -2,14 +2,14 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=CscqJhC7nceHk2wmZd2bBFSeFExtr0HkXt99qpAZU4E,39
5
+ nkululeko/constants.py,sha256=FOK-XF_DHGNFHsO_OMLof3jwgrn2buWnPVfrHy5QBm8,39
6
6
  nkululeko/demo.py,sha256=WSKr-W5uJ9DQfemK923g7Hd5V3kgAn03Er0JX1Pa45I,5142
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
9
- nkululeko/experiment.py,sha256=5nF-eDf8OCp6KRIU7KnryWL5SLJQUtr2BueHhEdcKw0,31040
9
+ nkululeko/experiment.py,sha256=s9PIjm45dR9yzmHu_69JpBjX9qMVzi5wIgPfMR3F44A,31530
10
10
  nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
11
11
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
12
- nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzwM,3976
12
+ nkululeko/feature_extractor.py,sha256=rL-TybLmjZz5uxT9LNTORaDat9FKp_1qloxbyMrinyE,4141
13
13
  nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
14
14
  nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
15
15
  nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
@@ -104,9 +104,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
104
104
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
105
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
106
106
  nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
107
- nkululeko/utils/util.py,sha256=ILpfNuaeq-hy1bUkRhVrzO2wG9z9Upaozs9EBoIaMG0,14123
108
- nkululeko-0.86.7.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
109
- nkululeko-0.86.7.dist-info/METADATA,sha256=t5cI43YRp3qmyJj03ACfgCbKoAuLYImDCLS1QkYbMQM,38024
110
- nkululeko-0.86.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
111
- nkululeko-0.86.7.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
112
- nkululeko-0.86.7.dist-info/RECORD,,
107
+ nkululeko/utils/util.py,sha256=ZCS02mE2c3_h9_q4hpsSm4XAooCranqRF_5pY-6055E,14432
108
+ nkululeko-0.86.8.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
109
+ nkululeko-0.86.8.dist-info/METADATA,sha256=5TQSWqzrN9E7XJGcVn5oPKGl6qy-RliYGEG2Ycl46qk,38109
110
+ nkululeko-0.86.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
111
+ nkululeko-0.86.8.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
112
+ nkululeko-0.86.8.dist-info/RECORD,,