nkululeko 0.79.5__py3-none-any.whl → 0.80.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.79.5"
1
+ VERSION="0.80.1"
2
2
  SAMPLING_RATE = 16000
@@ -0,0 +1,71 @@
1
+ # demo_feats.py
2
+ # Test some features extracted
3
+
4
+ from nkululeko.experiment import Experiment
5
+ import configparser
6
+ from nkululeko.utils.util import Util
7
+ from nkululeko.constants import VERSION
8
+ import argparse
9
+ import os
10
+
11
+
12
+ def main(src_dir):
13
+ parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
14
+ parser.add_argument("--config", default="exp.ini", help="The base configuration")
15
+ parser.add_argument(
16
+ "--file", help="A file that should be processed (16kHz mono wav)"
17
+ )
18
+ parser.add_argument(
19
+ "--list",
20
+ help=(
21
+ "A file with a list of files, one per line, that should be"
22
+ " processed (16kHz mono wav)"
23
+ ),
24
+ nargs="?",
25
+ default=None,
26
+ )
27
+ parser.add_argument(
28
+ "--outfile",
29
+ help=("A filename to store the results in CSV"),
30
+ nargs="?",
31
+ default=None,
32
+ )
33
+ args = parser.parse_args()
34
+ if args.config is not None:
35
+ config_file = args.config
36
+ else:
37
+ config_file = f"{src_dir}/exp.ini"
38
+
39
+ # test if the configuration file exists
40
+ if not os.path.isfile(config_file):
41
+ print(f"ERROR: no such file: {config_file}")
42
+ exit()
43
+
44
+ # load one configuration per experiment
45
+ config = configparser.ConfigParser()
46
+ config.read(config_file)
47
+
48
+ # create a new experiment
49
+ expr = Experiment(config)
50
+ module = "demo_feats"
51
+ expr.set_module(module)
52
+ util = Util(module)
53
+ util.debug(
54
+ f"running {expr.name} from config {config_file}, nkululeko version"
55
+ f" {VERSION}"
56
+ )
57
+
58
+ if args.file is None and args.list is None:
59
+ expr.demo_feats(None, False, args.outfile)
60
+ else:
61
+ if args.list is None:
62
+ expr.demo_feats(args.file, False, args.outfile)
63
+ else:
64
+ expr.demo_feats(args.list, True, args.outfile)
65
+
66
+ print("DONE")
67
+
68
+
69
+ if __name__ == "__main__":
70
+ cwd = os.path.dirname(os.path.abspath(__file__))
71
+ main(cwd) # use this if you want to state the config file path on command line
nkululeko/experiment.py CHANGED
@@ -357,7 +357,7 @@ class Experiment:
357
357
  self.df_train = self.df_train[
358
358
  self.df_train.index.isin(self.feats_train.index)
359
359
  ]
360
- self.util.warn(f"mew train labels shape: {self.df_train.shape[0]}")
360
+ self.util.warn(f"new train labels shape: {self.df_train.shape[0]}")
361
361
  if self.feats_test.shape[0] < self.df_test.shape[0]:
362
362
  self.util.warn(
363
363
  f"test feats ({self.feats_test.shape[0]}) != test labels"
@@ -1,10 +1,11 @@
1
1
  # feats_import.py
2
2
 
3
- from nkululeko.utils.util import Util
4
- from nkululeko.feat_extract.featureset import Featureset
5
3
  import os
6
- import pandas as pd
4
+ import ast
7
5
  import audformat
6
+ import pandas as pd
7
+ from nkululeko.utils.util import Util
8
+ from nkululeko.feat_extract.featureset import Featureset
8
9
 
9
10
 
10
11
  class Importset(Featureset):
@@ -14,32 +15,29 @@ class Importset(Featureset):
14
15
  super().__init__(name, data_df)
15
16
 
16
17
  def extract(self):
17
- """Import the features or load them from disk if present."""
18
- store = self.util.get_path("store")
19
- storage = f"{store}{self.name}.pkl"
20
- extract = eval(self.util.config_val("FEATS", "needs_feature_extraction", False))
21
- no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
22
- feat_import_file = self.util.config_val("FEATS", "import_file", False)
23
- if not os.path.isfile(feat_import_file):
24
- self.util.warn(f"no import file: {feat_import_file}")
25
- if extract or no_reuse or not os.path.isfile(storage):
26
- self.util.debug(f"importing features for {self.name}")
27
- # df = pd.read_csv(feat_import_file, sep=',', header=0,
28
- # index_col=['file', 'start', 'end'])
18
+ """Import the features."""
19
+ self.util.debug(f"importing features for {self.name}")
20
+ try:
21
+ feat_import_files = self.util.config_val("FEATS", "import_file", False)
22
+ feat_import_files = ast.literal_eval(feat_import_files)
23
+ except ValueError as e:
24
+ self.util.error(
25
+ "feature type == import needs import_file = ['file1', 'filex']"
26
+ )
27
+ except SyntaxError as se:
28
+ if type(feat_import_files) == str:
29
+ feat_import_files = [feat_import_files]
30
+ else:
31
+ self.util.error(f"import_file is wrong: {feat_import_files}")
32
+
33
+ feat_df = pd.DataFrame()
34
+ for feat_import_file in feat_import_files:
35
+ if not os.path.isfile(feat_import_file):
36
+ self.util.error(f"no import file: {feat_import_file}")
29
37
  df = audformat.utils.read_csv(feat_import_file)
30
- # scale features before use?
31
- # from sklearn.preprocessing import StandardScaler
32
- # scaler = StandardScaler()
33
- # scaled_features = scaler.fit_transform(df.values)
34
- # df = pd.DataFrame(scaled_features, index=df.index, columns=df.columns)
35
- # use only the rows from the data index
36
- # df = self.data_df.join(df).drop(columns=self.data_df.columns)
37
- df = df.loc[self.data_df.index]
38
- # df = pd.concat([self.data_df, df], axis=1, join="inner").drop(columns=self.data_df.columns)
39
- # in any case, store to disk for later use
40
- df.to_pickle(storage)
41
- # and assign to be the "official" feature set
42
- self.df = df
43
- else:
44
- self.util.debug("reusing imported features.")
45
- self.df = pd.read_pickle(storage)
38
+ df = df[df.index.isin(self.data_df.index)]
39
+ feat_df = pd.concat([feat_df, df])
40
+ if feat_df.shape[0] == 0:
41
+ self.util.error(f"Imported features for data set {self.name} not found!")
42
+ # and assign to be the "official" feature set
43
+ self.df = feat_df
nkululeko/multidb.py CHANGED
@@ -9,7 +9,6 @@ import matplotlib.pyplot as plt
9
9
  import matplotlib.cm as cm
10
10
  import numpy as np
11
11
  import os
12
- import audeer
13
12
  from nkululeko.experiment import Experiment
14
13
  import configparser
15
14
  from nkululeko.utils.util import Util
@@ -41,9 +40,9 @@ def main(src_dir):
41
40
  last_epochs = np.zeros(dim * dim).reshape([dim, dim])
42
41
  # check if some data should be added to training
43
42
  try:
44
- extra_train = config["CROSSDB"]["train_extra"]
43
+ extra_trains = config["CROSSDB"]["train_extra"]
45
44
  except KeyError:
46
- extra_train = False
45
+ extra_trains = False
47
46
 
48
47
  for i in range(dim):
49
48
  for j in range(dim):
@@ -54,9 +53,12 @@ def main(src_dir):
54
53
  if i == j:
55
54
  dataset = datasets[i]
56
55
  print(f"running {dataset}")
57
- if extra_train:
58
- config["DATA"]["databases"] = f"['{dataset}', '{extra_train}']"
59
- config["DATA"][f"{extra_train}.split_strategy"] = "train"
56
+ if extra_trains:
57
+ extra_trains_1 = extra_trains.removeprefix("[").removesuffix("]")
58
+ config["DATA"]["databases"] = f"['{dataset}', {extra_trains_1}]"
59
+ extra_trains_2 = ast.literal_eval(extra_trains)
60
+ for extra_train in extra_trains_2:
61
+ config["DATA"][f"{extra_train}.split_strategy"] = "train"
60
62
  else:
61
63
  config["DATA"]["databases"] = f"['{dataset}']"
62
64
  config["EXP"]["name"] = dataset
@@ -64,13 +66,16 @@ def main(src_dir):
64
66
  train = datasets[i]
65
67
  test = datasets[j]
66
68
  print(f"running train: {train}, test: {test}")
67
- if extra_train:
69
+ if extra_trains:
70
+ extra_trains_1 = extra_trains.removeprefix("[").removesuffix("]")
68
71
  config["DATA"][
69
72
  "databases"
70
- ] = f"['{train}', '{test}', '{extra_train}']"
73
+ ] = f"['{train}', '{test}', {extra_trains_1}]"
71
74
  config["DATA"][f"{test}.split_strategy"] = "test"
72
75
  config["DATA"][f"{train}.split_strategy"] = "train"
73
- config["DATA"][f"{extra_train}.split_strategy"] = "train"
76
+ extra_trains_2 = ast.literal_eval(extra_trains)
77
+ for extra_train in extra_trains_2:
78
+ config["DATA"][f"{extra_train}.split_strategy"] = "train"
74
79
  else:
75
80
  config["DATA"]["databases"] = f"['{train}', '{test}']"
76
81
  config["DATA"][f"{test}.split_strategy"] = "test"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.79.5
3
+ Version: 0.80.1
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -187,16 +187,22 @@ combine_per_speaker = mode
187
187
  Read the [Hello World example](#hello-world-example) for initial usage with Emo-DB dataset.
188
188
 
189
189
  Here is an overview of the interfaces/modules:
190
+
191
+ All of them take *--config <my_config.ini>* as an argument.
192
+
190
193
  * **nkululeko.nkululeko**: do machine learning experiments combining features and learners
191
- * **nkululeko.multidb**: do multiple experiments, comparing several databases cross and in itself
192
- * **nkululeko.demo**: demo the current best model on the command line
193
- * **nkululeko.test**: predict a series of files with the current best model
194
- * **nkululeko.explore**: perform data exploration
195
- * **nkululeko.augment**: augment the current training data
196
- * **nkululeko.aug_train**: augment the current training data and do a training including this data
197
- * **nkululeko.predict**: predict features like SNR, MOS, arousal/valence, age/gender, with DNN models
198
- * **nkululeko.segment**: segment a database based on VAD (voice activity detection)
199
- * **nkululeko.resample**: check on all sampling rates and change to 16kHz
194
+ * **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
195
+ * **nkululeko.demo**: [demo the current best model](http://blog.syntheticspeech.de/2022/09/01/nkululeko-how-to-evaluate-a-test-set-with-a-given-best-model/) on the command line
196
+ * *--list* (optional) list of input files
197
+ * *--file* (optional) name of input file
198
+ * *--outfile* (optional) name of CSV file for output
199
+ * **nkululeko.test**: predict a [given data set](http://blog.syntheticspeech.de/2022/09/01/nkululeko-how-to-evaluate-a-test-set-with-a-given-best-model/) with the current best model
200
+ * **nkululeko.explore**: perform [data exploration](http://blog.syntheticspeech.de/2023/05/11/nkululeko-how-to-visualize-your-data-distribution/)
201
+ * **nkululeko.augment**: [augment](http://blog.syntheticspeech.de/2023/03/13/nkululeko-how-to-augment-the-training-set/) the current training data
202
+ * **nkululeko.aug_train**: augment the current training data [and do a training](http://blog.syntheticspeech.de/2023/03/13/nkululeko-how-to-augment-the-training-set/) including this data
203
+ * **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
204
+ * **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
205
+ * **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
200
206
 
201
207
  There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
202
208
  * [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
@@ -316,6 +322,16 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
316
322
  Changelog
317
323
  =========
318
324
 
325
+ Version 0.80.1
326
+ --------------
327
+ * added support for string value in import_features
328
+ + added support for multiple extra training databases when doing multi-db experiments
329
+
330
+ Version 0.80.0
331
+ --------------
332
+ * fixed bug no feature import
333
+ * add support for multiple import feature files
334
+
319
335
  Version 0.79.5
320
336
  --------------
321
337
  * fixed bug on demo without in- or output
@@ -323,7 +339,7 @@ Version 0.79.5
323
339
 
324
340
  Version 0.79.4
325
341
  --------------
326
- * added funcionality in demo for regression
342
+ * added functionality in demo for regression
327
343
 
328
344
  Version 0.79.3
329
345
  --------------
@@ -2,10 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=C00eVCYmrYEMiZCYO88sFFchP1FC_YVNsA0z_GS9OSA,39
5
+ nkululeko/constants.py,sha256=PjrmxRyljCQ53PzG5WIALMzgVNR8dJ4k70ZhEHPpQ98,39
6
6
  nkululeko/demo.py,sha256=l_rxmDCaVO3hchWSQYzh3dJ5U-6CHE0umbfxIsES5s0,2196
7
+ nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
7
8
  nkululeko/demo_predictor.py,sha256=o8bXdH2r_7K-9N033iL9i7vYJXS2baPxi8rYqFlhodk,3751
8
- nkululeko/experiment.py,sha256=pP8rLLWW_shVmZ3tPLHoSX1Vh5WVBQkw_aZua77iRNc,29542
9
+ nkululeko/experiment.py,sha256=972eUHFCF08J2HbtFtOzu7vRl2sU7x3fqjxTvY5htjQ,29542
9
10
  nkululeko/explore.py,sha256=5c89hGpjt5mRMN7w2Ajjnr2VjoFF0hOFs0O1BQruw80,2250
10
11
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
11
12
  nkululeko/feature_extractor.py,sha256=tKv1b1-o7xNMgBavTR8VY8_H5HKoJEnnosS-KcjmOEU,7281
@@ -13,7 +14,7 @@ nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,347
13
14
  nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
14
15
  nkululeko/glob_conf.py,sha256=iHiVSxDYgmYwdx6z0HuGUMSWrfZfufPHxHb60q2dLRY,453
15
16
  nkululeko/modelrunner.py,sha256=cU6FHbpI2mrG0BY7pn5UgFFpYh3u-v_GH7q73Kknhug,9337
16
- nkululeko/multidb.py,sha256=qJECRCCtIk97i8SY7E_vRY3nSmbBTDp_kylewUub044,5314
17
+ nkululeko/multidb.py,sha256=4ceCu9LFrMGlrcgtz4pWuOQb2KA3jR5uo3FjZgAEBD4,5732
17
18
  nkululeko/nkululeko.py,sha256=Ty8cdusXUec9BHml8Gsp1r7DXuvIBMFXUckMpzILBnQ,1966
18
19
  nkululeko/plots.py,sha256=K88ZRPFGX_r03BT742H06Dde20xZYdltv7dxjgUiAFA,23025
19
20
  nkululeko/predict.py,sha256=dRXX-sQVESa7cNi_56S6UkUOa_pV1g_K4xYtYVM1SJs,1876
@@ -54,7 +55,7 @@ nkululeko/feat_extract/feats_audmodel.py,sha256=ifXo4ItOGiSD8QUA64Ha-lf_4-_0MQuJ
54
55
  nkululeko/feat_extract/feats_audmodel_dim.py,sha256=HZtQc7_4lIbReUz41Ks-EewcKOmkMc0V98pHTCCIMto,2849
55
56
  nkululeko/feat_extract/feats_clap.py,sha256=v82mbjdjGDSKUUBggttw7jW0oka22fWAmfUf-4VmaDU,3379
56
57
  nkululeko/feat_extract/feats_hubert.py,sha256=uL-9mgQHuGPQi1nuUaw6aNU9DscsO89uJAmBdmnCegM,5205
57
- nkululeko/feat_extract/feats_import.py,sha256=SqTuNdbInOO_oXucSlwCTfNz6OUCNyJfUrGX_cS9Mn0,2054
58
+ nkululeko/feat_extract/feats_import.py,sha256=m7Yh1sj7C1yrDDbZAqS75oMMF5rAtO7XC_sdWdQN5Iw,1598
58
59
  nkululeko/feat_extract/feats_mld.py,sha256=Vvu7GZOkn7Vda8eIOXqHjg78zegkFe3vTUaCXyVM0eA,2021
59
60
  nkululeko/feat_extract/feats_mos.py,sha256=XZI7U99QcSuzd1v5pVAo0JwdcrXrRICUNt_K5G6eRPU,4149
60
61
  nkululeko/feat_extract/feats_opensmile.py,sha256=yDRGSiUQV3K3oLxVqq8Cxj5bkc-RiLzDYbAGKC9I5vc,4140
@@ -100,8 +101,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
101
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
101
102
  nkululeko/utils/stats.py,sha256=29otJpUp1VqbtDKmlLkPPzBmVfTFiHZ70rUdR4860rM,2788
102
103
  nkululeko/utils/util.py,sha256=Hn27x0f2rjSR-iae2h9_70J4SdXKJTduLFIH13w3db0,12363
103
- nkululeko-0.79.5.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
104
- nkululeko-0.79.5.dist-info/METADATA,sha256=sr4nNPwmT3-EJMnW1x-28aP3lq17n0rybKrqcpu7vA8,32490
105
- nkululeko-0.79.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
106
- nkululeko-0.79.5.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
107
- nkululeko-0.79.5.dist-info/RECORD,,
104
+ nkululeko-0.80.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
105
+ nkululeko-0.80.1.dist-info/METADATA,sha256=_kjligQaChrCbn5le4rfDAJrRKOZcAGiPNwNjrbSqA0,33856
106
+ nkululeko-0.80.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
107
+ nkululeko-0.80.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
108
+ nkululeko-0.80.1.dist-info/RECORD,,