nkululeko 0.89.2__py3-none-any.whl → 0.90.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/augment.py CHANGED
@@ -37,8 +37,8 @@ def doit(config_file):
37
37
 
38
38
  filename = util.config_val("AUGMENT", "result", "augmented.csv")
39
39
  filename = f"{expr.data_dir}/{filename}"
40
-
41
- if os.path.exists(filename):
40
+ no_reuse = eval(util.config_val("DATA", "no_reuse", "False"))
41
+ if os.path.exists(filename) and not no_reuse:
42
42
  util.debug("files already augmented")
43
43
  else:
44
44
  # load the data
@@ -41,14 +41,17 @@ class Randomsplicer:
41
41
  * top_db: top db level for silence to be recognized (default: 12)
42
42
  """
43
43
 
44
- p_reverse = 0.3
45
- top_db = 12
44
+ p_reverse = float(self.util.config_val("AUGMENT", "p_reverse", "0.3"))
45
+ top_db = float(self.util.config_val("AUGMENT", "top_db", "12"))
46
46
 
47
47
  files = self.df.index.get_level_values(0).values
48
48
  store = self.util.get_path("store")
49
49
  filepath = f"{store}randomspliced/"
50
50
  audeer.mkdir(filepath)
51
- self.util.debug(f"random splicing {sample_selection} samples to {filepath}")
51
+ self.util.debug(
52
+ f"random splicing {sample_selection} samples to {filepath}, "
53
+ + f"p_reverse = {p_reverse}, top_db = {top_db}",
54
+ )
52
55
  newpath = ""
53
56
  index_map = {}
54
57
  for i, f in enumerate(tqdm(files)):
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.89.2"
1
+ VERSION="0.90.0"
2
2
  SAMPLING_RATE = 16000
nkululeko/data/dataset.py CHANGED
@@ -30,8 +30,8 @@ class Dataset:
30
30
  def __init__(self, name):
31
31
  """Constructor setting up name and configuration"""
32
32
  self.name = name
33
- self.target = glob_conf.config["DATA"]["target"]
34
33
  self.util = Util("dataset")
34
+ self.target = self.util.config_val("DATA", "target", "none")
35
35
  self.plot = Plots()
36
36
  self.limit = int(self.util.config_val_data(self.name, "limit", 0))
37
37
  self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
@@ -127,6 +127,9 @@ class Dataset:
127
127
  self.got_gender,
128
128
  self.got_age,
129
129
  ) = self._get_df_for_lists(self.db, df_files_tables)
130
+ if df.shape[0] > 0 and self.target == "none":
131
+ self.df = df
132
+ return
130
133
  if False in {
131
134
  self.is_labeled,
132
135
  self.got_speaker,
@@ -553,7 +556,10 @@ class Dataset:
553
556
  " samples in train/test"
554
557
  )
555
558
  # because this generates new train/test sample quantaties, the feature extraction has to be done again
556
- glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
559
+ try:
560
+ glob_conf.config["FEATS"]["needs_feature_extraction"] = "True"
561
+ except KeyError:
562
+ pass
557
563
 
558
564
  def random_split(self):
559
565
  """One way to split train and eval sets: Specify percentage of random samples"""
nkululeko/experiment.py CHANGED
@@ -101,12 +101,15 @@ class Experiment:
101
101
  if data.got_speaker:
102
102
  self.got_speaker = True
103
103
  self.datasets.update({d: data})
104
- self.target = self.util.config_val("DATA", "target", "emotion")
104
+ self.target = self.util.config_val("DATA", "target", "none")
105
105
  glob_conf.set_target(self.target)
106
106
  # print target via debug
107
107
  self.util.debug(f"target: {self.target}")
108
108
  # print keys/column
109
109
  dbs = ",".join(list(self.datasets.keys()))
110
+ if self.target == "none":
111
+ self.util.debug(f"loaded databases {dbs}")
112
+ return
110
113
  labels = self.util.config_val("DATA", "labels", False)
111
114
  auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
112
115
  if labels:
@@ -191,7 +194,8 @@ class Experiment:
191
194
  self.df_train, self.df_test = pd.DataFrame(), pd.DataFrame()
192
195
  for d in self.datasets.values():
193
196
  d.split()
194
- d.prepare_labels()
197
+ if self.target != "none":
198
+ d.prepare_labels()
195
199
  if d.df_train.shape[0] == 0:
196
200
  self.util.debug(f"warn: {d.name} train empty")
197
201
  self.df_train = pd.concat([self.df_train, d.df_train])
@@ -207,6 +211,8 @@ class Experiment:
207
211
  self.df_test.to_csv(storage_test)
208
212
  self.df_train.to_csv(storage_train)
209
213
 
214
+ if self.target == "none":
215
+ return
210
216
  self.util.copy_flags(self, self.df_test)
211
217
  self.util.copy_flags(self, self.df_train)
212
218
  # Try data checks
nkululeko/plots.py CHANGED
@@ -263,7 +263,7 @@ class Plots:
263
263
 
264
264
  def plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
265
265
  """Plot relation of categorical distribution with continuous."""
266
- dist_type = self.util.config_val("EXPL", "dist_type", "hist")
266
+ dist_type = self.util.config_val("EXPL", "dist_type", "kde")
267
267
  cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
268
268
  model_type = self.util.get_model_type()
269
269
  if dist_type == "hist" and model_type != "tree":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.89.2
3
+ Version: 0.90.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -356,6 +356,12 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
356
356
  Changelog
357
357
  =========
358
358
 
359
+ Version 0.90.0
360
+ --------------
361
+ * augmentation can now be done without target
362
+ * random splicing params configurable
363
+ * made kde default for plot continous/categorical plots
364
+
359
365
  Version 0.89.2
360
366
  --------------
361
367
  * fix shap value calculation
@@ -1,13 +1,13 @@
1
1
  nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
- nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
3
+ nkululeko/augment.py,sha256=xNeOR22sXHD5mxv0SEe6kvgEXX0RtiUL4BK-m-BDfcM,3133
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=WFGVylIst9Be_eHBZ9GiR43Qi4CdRySmNUzyNox6aMM,39
5
+ nkululeko/constants.py,sha256=t11gtE4sZM7oZrRSZhmVhIKwQAI83AN0cEZRPXkw5cs,39
6
6
  nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
9
9
  nkululeko/ensemble.py,sha256=MayHpngGH_FTvSxUsH3NdxJd6WBAosGRFQeQ7cMjIco,12922
10
- nkululeko/experiment.py,sha256=L4PzoScPLG2xTyniVy9evcBy_8CIe3RTeTEUVTqiuvQ,31186
10
+ nkululeko/experiment.py,sha256=BXUmJrJn17W-umYh4O0Jt6ZZzr2u_VDL7Lq7fPEEVMQ,31390
11
11
  nkululeko/explore.py,sha256=AbTVDmuDIaLfALQGvDW1yndcw2ikaEVEZ_fJVuUS070,3940
12
12
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
13
13
  nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
@@ -18,7 +18,7 @@ nkululeko/modelrunner.py,sha256=lJy-xM4QfDDWeL0dLTE_VIb4sYrnd_Z_yJRK3wwohQA,1119
18
18
  nkululeko/multidb.py,sha256=CCjmVsZyvydgOztFlaeBvOJH8nsvU-sPQdFAw8-q0U4,6752
19
19
  nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
20
20
  nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
21
- nkululeko/plots.py,sha256=gfNy9Eu2PhSaykMazBPThcYS5o5KwuQwY2jshAUK5Rk,22965
21
+ nkululeko/plots.py,sha256=p9YyN-xAtdGBKjcA305V0KOagAzG8VG6D_Ceoa9rae4,22964
22
22
  nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
23
23
  nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
24
24
  nkululeko/runmanager.py,sha256=xvxL5a9d3jtGFqx0Z3nyyxowA368uNyP0ZitO8kxIIE,7581
@@ -30,7 +30,7 @@ nkululeko/test_predictor.py,sha256=DEHE_D3A6m6KJTrpDKceA1n655t_UZV3WQd57K4a3Ho,2
30
30
  nkululeko/test_pretrain.py,sha256=ZWl-bR6nmeSmXkGAIE6zyfQEjN8Zg0rIxfaS-O6Zbas,8465
31
31
  nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  nkululeko/augmenting/augmenter.py,sha256=XAt0dpmlnKxqyysqCgV3rcz-pRIvOz7rU7dmGDCVAzs,2905
33
- nkululeko/augmenting/randomsplicer.py,sha256=Z5rxdKKUpuncLWuTS6xVfVKUeVbeiYU_dLRHQ5fcg4Y,2669
33
+ nkululeko/augmenting/randomsplicer.py,sha256=jmn4uZc2O_-A_O-GXz7lh0rHR6-2sD9eNG0vwgtRd2w,2861
34
34
  nkululeko/augmenting/randomsplicing.py,sha256=ldym9vZNsZIU5BAAaJVaOmAgmVHNs4a5i5K3bW-WAQU,1791
35
35
  nkululeko/augmenting/resampler.py,sha256=nOBsiQpX6p4jXsP7x6wak78F3B5YYYRmC_iHX8iuOXs,3542
36
36
  nkululeko/autopredict/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -46,7 +46,7 @@ nkululeko/autopredict/ap_stoi.py,sha256=It0Lk-ki-gohA2AzD8nkLAN2WahYvD9rPDGTQuvd
46
46
  nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzWeWW4VM,1024
47
47
  nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
48
48
  nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- nkululeko/data/dataset.py,sha256=xaawk5QthuVStWjHWTFBtorcIe71lbPQgC6mHzSXGeI,29286
49
+ nkululeko/data/dataset.py,sha256=QqU1YoBQk41g3MV8bc0iW1YN_gMHDizuG-cjmSq0d_o,29455
50
50
  nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo,4588
51
51
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  nkululeko/feat_extract/feats_agender.py,sha256=tMK3_qs8adylNNSR0CS1RjU9RxmpumLqmuyzmc2ZYjA,3184
@@ -107,8 +107,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
108
108
  nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
109
109
  nkululeko/utils/util.py,sha256=363Lgmcg6fPKCGbroX0DDyW_zcYNx-Ayqv67qdpfYcw,16710
110
- nkululeko-0.89.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
- nkululeko-0.89.2.dist-info/METADATA,sha256=00CLy_4Wm7IktJy7dAkKrXkCMi0f1HUXCoQYMNcp2kw,40729
112
- nkululeko-0.89.2.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
113
- nkululeko-0.89.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
- nkululeko-0.89.2.dist-info/RECORD,,
110
+ nkululeko-0.90.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
+ nkululeko-0.90.0.dist-info/METADATA,sha256=BiguFg1WzW9crNTqgr2qbtmaOL_PgeLP3M1Azn4Lehk,40900
112
+ nkululeko-0.90.0.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
113
+ nkululeko-0.90.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
+ nkululeko-0.90.0.dist-info/RECORD,,