nkululeko 0.81.0__py3-none-any.whl → 0.81.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.81.0"
1
+ VERSION="0.81.2"
2
2
  SAMPLING_RATE = 16000
nkululeko/data/dataset.py CHANGED
@@ -162,7 +162,10 @@ class Dataset:
162
162
  if column not in [self.target, "age", "speaker", "gender"]:
163
163
  df[column] = df_target[column]
164
164
  except audformat.core.errors.BadKeyError:
165
- pass
165
+ if not self.is_labeled:
166
+ self.util.error(
167
+ f"Giving up: no target ({self.target}) column found"
168
+ )
166
169
 
167
170
  if self.is_labeled:
168
171
  # remember the target in case they get labelencoded later
@@ -1,3 +1,4 @@
1
+ import os
1
2
  import pandas as pd
2
3
  import numpy as np
3
4
  import audiofile
@@ -52,13 +53,16 @@ class Demo_predictor:
52
53
  file_list.append(line)
53
54
  for file_name in file_list:
54
55
  test_folder = glob_conf.config["DATA"]["test_folder"]
55
- file_path = test_folder + file_name.strip()
56
+ file_path = os.path.join(test_folder, file_name.strip())
56
57
  sig, sr = audiofile.read(file_path)
57
58
  print(f"predicting file {file_path}")
58
59
  res_dict = self.predict_signal(sig, sr)
59
- df_tmp = pd.DataFrame(res_dict, index=[file_path])
60
+ df_tmp = pd.DataFrame(res_dict, index=[file_name.strip()])
60
61
  df_res = pd.concat([df_res, df_tmp], ignore_index=False)
61
62
  df_res = df_res.set_index(df_res.index.rename("file"))
63
+ # save only filename and prediction (df_tmp) by default
64
+ # drop other columns
65
+ # df_res = df_res[["predicted"]]
62
66
  if self.outfile is not None:
63
67
  df_res.to_csv(self.outfile)
64
68
  else:
@@ -94,7 +98,7 @@ class Demo_predictor:
94
98
  return result_dict
95
99
  else:
96
100
  # experiment is regression and returns one estimation
97
- dict_2["predicted"] = result_dict[0]
101
+ dict_2["predicted"] = result_dict
98
102
  print(dict_2)
99
103
  return dict_2
100
104
 
@@ -294,8 +294,8 @@ def compute_features(file_index):
294
294
  f4_median,
295
295
  ) = measureFormants(sound, 75, 300)
296
296
  # file_list.append(wave_file) # make an ID list
297
- except statistics.StatisticsError as se:
298
- print(f"error on file {wave_file}: {se}")
297
+ except (statistics.StatisticsError, parselmouth.PraatError) as errors:
298
+ print(f"error on file {wave_file}: {errors}")
299
299
 
300
300
  duration_list.append(duration) # make duration list
301
301
  mean_F0_list.append(meanF0) # make a mean F0 list
nkululeko/models/model.py CHANGED
@@ -269,19 +269,24 @@ class Model:
269
269
  )
270
270
  return report
271
271
 
272
+ def get_type(self):
273
+ return "generic"
274
+
272
275
  def predict_sample(self, features):
273
276
  """Predict one sample"""
274
277
  prediction = {}
275
278
  if self.util.exp_is_classification():
276
279
  # get the class probabilities
277
- predictions = self.clf.predict_proba([features])
280
+ if not self.get_type() == "xgb":
281
+ features = [features]
282
+ predictions = self.clf.predict_proba(features)
278
283
  # pred = self.clf.predict(features)
279
284
  for i in range(len(self.clf.classes_)):
280
285
  cat = self.clf.classes_[i]
281
286
  prediction[cat] = predictions[0][i]
282
287
  else:
283
288
  predictions = self.clf.predict(features)
284
- prediction["result"] = predictions[0]
289
+ prediction = predictions[0]
285
290
  return prediction
286
291
 
287
292
  def store(self):
@@ -247,4 +247,4 @@ class MLP_Reg_model(Model):
247
247
  features = np.reshape(features, (-1, 1)).T
248
248
  logits = self.model(features.to(self.device)).reshape(-1)
249
249
  a = logits.numpy()
250
- return a
250
+ return a[0]
@@ -12,8 +12,17 @@ class SVM_model(Model):
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
14
  c = float(self.util.config_val("MODEL", "C_val", "0.001"))
15
+ if eval(self.util.config_val("MODEL", "class_weight", "False")):
16
+ class_weight = "balanced"
17
+ else:
18
+ class_weight = None
19
+ kernel = self.util.config_val("MODEL", "kernel", "rbf")
15
20
  self.clf = svm.SVC(
16
- kernel="linear", C=c, gamma="scale", probability=True
21
+ kernel=kernel,
22
+ C=c,
23
+ gamma="scale",
24
+ probability=True,
25
+ class_weight=class_weight,
17
26
  ) # set up the classifier
18
27
 
19
28
  def set_C(self, c):
@@ -12,7 +12,9 @@ class SVR_model(Model):
12
12
  def __init__(self, df_train, df_test, feats_train, feats_test):
13
13
  super().__init__(df_train, df_test, feats_train, feats_test)
14
14
  c = float(self.util.config_val("MODEL", "C_val", "0.001"))
15
- self.clf = svm.SVR(kernel="rbf", C=c, probability=True) # set up the classifier
15
+ # kernel{‘linear’, ‘poly’, rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’
16
+ kernel = self.util.config_val("MODEL", "kernel", "rbf")
17
+ self.clf = svm.SVR(kernel=kernel, C=c) # set up the classifier
16
18
 
17
19
  def set_C(self, c):
18
20
  """Set the C parameter"""
@@ -10,3 +10,6 @@ class XGB_model(Model):
10
10
  is_classifier = True
11
11
 
12
12
  clf = XGBClassifier() # set up the classifier
13
+
14
+ def get_type(self):
15
+ return "xgb"
nkululeko/utils/util.py CHANGED
@@ -175,10 +175,6 @@ class Util:
175
175
  mt = f'{self.config["MODEL"]["type"]}'
176
176
  ft = "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
177
177
  ft += "_"
178
- set = self.config_val("FEATS", "set", False)
179
- set_string = ""
180
- if set:
181
- set_string += set
182
178
  layer_string = ""
183
179
  layer_s = self.config_val("MODEL", "layers", False)
184
180
  if layer_s:
@@ -186,16 +182,19 @@ class Util:
186
182
  sorted_layers = sorted(layers.items(), key=lambda x: x[1])
187
183
  for l in sorted_layers:
188
184
  layer_string += f"{str(l[1])}-"
189
- return_string = f"{mt}_{ft}{set_string}{layer_string[:-1]}"
185
+ return_string = f"{mt}_{ft}{layer_string[:-1]}"
190
186
  options = [
191
187
  ["MODEL", "C_val"],
188
+ ["MODEL", "kernel"],
192
189
  ["MODEL", "drop"],
190
+ ["MODEL", "class_weight"],
193
191
  ["MODEL", "loss"],
194
192
  ["MODEL", "logo"],
195
193
  ["MODEL", "learning_rate"],
196
194
  ["MODEL", "k_fold_cross"],
197
195
  ["FEATS", "balancing"],
198
196
  ["FEATS", "scale"],
197
+ ["FEATS", "set"],
199
198
  ["FEATS", "wav2vec2.layer"],
200
199
  ]
201
200
  for option in options:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.81.0
3
+ Version: 0.81.2
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -321,6 +321,16 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
321
321
  Changelog
322
322
  =========
323
323
 
324
+ Version 0.81.2
325
+ --------------
326
+ * added a parselmouth.Praat error if pitch out of range
327
+ * changed file path for demo_predictor
328
+
329
+ Version 0.81.1
330
+ --------------
331
+ * fixed bugs in demo module
332
+ * made kernel for SVM/SVR configurable
333
+
324
334
  Version 0.81.0
325
335
  --------------
326
336
  * added test selection to test module
@@ -2,10 +2,10 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=pA2Wijj2MuEHU3MRfBhne0rARFs9qnjCZzbVKVMQLTc,39
5
+ nkululeko/constants.py,sha256=zujT9J62h5BIBCxzigDt23S5plsfoyutXsGMdK_xkAM,39
6
6
  nkululeko/demo.py,sha256=me8EdjN-zrzClVy9FEmqbTQyDDON88W8vPpWEE8T0cI,2500
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
- nkululeko/demo_predictor.py,sha256=XlaMAuxrDuMIUZdKL2wlKRVqvKl5aUOR6BPmSdj-qcI,4547
8
+ nkululeko/demo_predictor.py,sha256=CQL6DO7QxwmwoB_6DlgDS-pdG1KuvemYJ1NEpMjmMk8,4733
9
9
  nkululeko/experiment.py,sha256=NVhtywaGT5vtreJNlrezp4sq-KIN_gxOjUChvBa7Z38,29575
10
10
  nkululeko/explore.py,sha256=5c89hGpjt5mRMN7w2Ajjnr2VjoFF0hOFs0O1BQruw80,2250
11
11
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
@@ -45,7 +45,7 @@ nkululeko/autopredict/ap_stoi.py,sha256=It0Lk-ki-gohA2AzD8nkLAN2WahYvD9rPDGTQuvd
45
45
  nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzWeWW4VM,1024
46
46
  nkululeko/autopredict/estimate_snr.py,sha256=kJbvkt2alMN5ouS03USheU7hJ2l7U9JF0s9AtNT1Vx0,4818
47
47
  nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- nkululeko/data/dataset.py,sha256=t0f_i1UhHE7U9LDzDmqzqKFZaVskwVCs-Bz_jxnyZno,27110
48
+ nkululeko/data/dataset.py,sha256=n6v_vVdA0EsZ-NaTgnYfPlCT4QCcD02mJJb-oD7SaSU,27265
49
49
  nkululeko/data/dataset_csv.py,sha256=v3lSjF23EVjoP460QOfhdcqbWAlBQWlBOuaYujZoS4s,3407
50
50
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  nkululeko/feat_extract/feats_agender.py,sha256=_lAL6IxJDJH2bhIvd7yarTqQryx7FjbQXAgY0mJP-KI,3192
@@ -69,12 +69,12 @@ nkululeko/feat_extract/feats_trill.py,sha256=PpygJK_W6QoBNeSah9npQPiQlJxLWFn6TSO
69
69
  nkululeko/feat_extract/feats_wav2vec2.py,sha256=sFf-WkLUgKUQsFxGO9m2hS3uYoGkv95mZavCEZyWFGA,5072
70
70
  nkululeko/feat_extract/feats_wavlm.py,sha256=RhI0oWIsknnxTVmdnNS_xJO1NnUUR0CUNDWH1yTpNLk,4683
71
71
  nkululeko/feat_extract/featureset.py,sha256=-ynkdor8iX7BFx10aIbB3LfwxrrzPoBGz9kXwyAJO9M,1375
72
- nkululeko/feat_extract/feinberg_praat.py,sha256=-pgY8Koq7dFaz-99cjkNqsQn1bsKgPInEuwrfmR0ebA,21253
72
+ nkululeko/feat_extract/feinberg_praat.py,sha256=7V1VhVMu4QrXkdcXpmqCbpStXfpmOHtfx5GzxXWukz8,21287
73
73
  nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
74
  nkululeko/losses/loss_ccc.py,sha256=NOK0y0fxKUnU161B5geap6Fmn8QzoPl2MqtPiV8IuJE,976
75
75
  nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv59HKTCo,1309
76
76
  nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
- nkululeko/models/model.py,sha256=Hq40hq5K8sTa1aIevx-o3ok32ngncMDQx_f-q0MmdXs,11517
77
+ nkululeko/models/model.py,sha256=SZ2HQ3KiF5fcmrTcvko1E95EQQeFIaPCG90DvZVHbBA,11638
78
78
  nkululeko/models/model_bayes.py,sha256=wI7-sCwibqXMCHviu349TYjgJXXNXym-Z6ZM83uxlFQ,378
79
79
  nkululeko/models/model_cnn.py,sha256=iyXeRsAMVeRST1j_D2AUngE02CtVkg6vWwQc1BOaBl0,9716
80
80
  nkululeko/models/model_gmm.py,sha256=onovzGBeguwZ-upXtuDLaBw9sd6fDDQslVBOrz1Z8TE,645
@@ -82,12 +82,12 @@ nkululeko/models/model_knn.py,sha256=5tGqiPo2JTw9VLmD-MXNZKFJ5RTLA6uv_blJDJ9lScA
82
82
  nkululeko/models/model_knn_reg.py,sha256=Fbuk6Ku6eyrbbMEk7rB5dwfhvQOMsdZk6HI_0T0gYPw,580
83
83
  nkululeko/models/model_lin_reg.py,sha256=NBTnY2ULuhUBt5ArYQwskZ2Vq4BBDGkqd9SYBFl7Ql4,392
84
84
  nkululeko/models/model_mlp.py,sha256=IjiiupLxm5ddb73-eU5Ad79Gb6enurR1fgGY-7NkbFc,9097
85
- nkululeko/models/model_mlp_regression.py,sha256=OFsGQXS4EhRMq3exZAMLF-vJARxIWH6TZjMKoueUkLs,10051
86
- nkululeko/models/model_svm.py,sha256=-5DHtdm4q6JqbBY60a38sRUg6wSoPtm_TGnekGQlBTM,572
87
- nkululeko/models/model_svr.py,sha256=qPG54wqccIM-Yse-95wKGTQdTZDa1bUHpZky110KlSY,532
85
+ nkululeko/models/model_mlp_regression.py,sha256=F0SaU1qAjnGmTTg-ti1s-XmFYVUYxSV0TJw0_jMxlKU,10054
86
+ nkululeko/models/model_svm.py,sha256=dqDQbfRCtlW3RNqpHDGVsj3ikc131gKURHj5VzAcCr0,867
87
+ nkululeko/models/model_svr.py,sha256=p-Mb4Bn54yOe1upuHQKNpfj4ttOmQnm9pCB7ECkJkJQ,699
88
88
  nkululeko/models/model_tree.py,sha256=soXjV523eRvRZ-jbX7X_3S73Wto1B9bm7ZzzDmgYzTc,390
89
89
  nkululeko/models/model_tree_reg.py,sha256=QxkQEz3LOuCLkXw5xH9IwFg4IcTL3Y5RK03qKe4TtGQ,397
90
- nkululeko/models/model_xgb.py,sha256=tzcksyGP9-XQGOBqt5gYSrQZsbtbcm5qwSkjnxsIX3I,221
90
+ nkululeko/models/model_xgb.py,sha256=yPJFD2jxOGcPDKuBeqJSmh83eKrfbnD_n722i6g39_g,267
91
91
  nkululeko/models/model_xgr.py,sha256=yY6wZV8jdiQCIYQCjYSb8gE0jjeiY44eh3rERe2HDvg,227
92
92
  nkululeko/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
93
93
  nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqSU,648
@@ -100,9 +100,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
100
100
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
101
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
102
102
  nkululeko/utils/stats.py,sha256=29otJpUp1VqbtDKmlLkPPzBmVfTFiHZ70rUdR4860rM,2788
103
- nkululeko/utils/util.py,sha256=Hn27x0f2rjSR-iae2h9_70J4SdXKJTduLFIH13w3db0,12363
104
- nkululeko-0.81.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
105
- nkululeko-0.81.0.dist-info/METADATA,sha256=yaassogO9-2QPBkYPPjT7AUtJZeoTT6L1BNJmZjr5VM,34298
106
- nkululeko-0.81.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
107
- nkululeko-0.81.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
108
- nkululeko-0.81.0.dist-info/RECORD,,
103
+ nkululeko/utils/util.py,sha256=_Z6OMJ3f-8TdETW9eqJYY5hwNRS5XCt9azzRnqoTTZE,12330
104
+ nkululeko-0.81.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
105
+ nkululeko-0.81.2.dist-info/METADATA,sha256=-Oo7DH0SM9gF8F0c65DLjGIt6rnUUPF_Ah_OgJrxDRA,34523
106
+ nkululeko-0.81.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
107
+ nkululeko-0.81.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
108
+ nkululeko-0.81.2.dist-info/RECORD,,