nkululeko 0.88.1__py3-none-any.whl → 0.88.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.88.1"
1
+ VERSION="0.88.3"
2
2
  SAMPLING_RATE = 16000
@@ -72,9 +72,11 @@ class Demo_predictor:
72
72
  else:
73
73
  self.util.debug(df_res)
74
74
  else:
75
- while True:
75
+ answer = input("want to record y/n?")
76
+ while answer == "y":
76
77
  signal = self.record_audio(3)
77
78
  self.predict_signal(signal, self.sr)
79
+ answer = input("want to record y/n?")
78
80
 
79
81
  # self.play_audio(signal)
80
82
 
@@ -109,7 +111,7 @@ class Demo_predictor:
109
111
  def record_audio(self, seconds):
110
112
  import sounddevice as sd
111
113
 
112
- print("recording ...")
114
+ print("recording ...", flush=True)
113
115
  y = sd.rec(int(seconds * self.sr), samplerate=self.sr, channels=1)
114
116
  sd.wait()
115
117
  y = y.T
nkululeko/experiment.py CHANGED
@@ -107,8 +107,7 @@ class Experiment:
107
107
  # print keys/column
108
108
  dbs = ",".join(list(self.datasets.keys()))
109
109
  labels = self.util.config_val("DATA", "labels", False)
110
- auto_labels = list(
111
- next(iter(self.datasets.values())).df[self.target].unique())
110
+ auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
112
111
  if labels:
113
112
  self.labels = ast.literal_eval(labels)
114
113
  self.util.debug(f"Using target labels (from config): {labels}")
@@ -158,8 +157,7 @@ class Experiment:
158
157
  data.split()
159
158
  data.prepare_labels()
160
159
  self.df_test = pd.concat(
161
- [self.df_test, self.util.make_segmented_index(
162
- data.df_test)]
160
+ [self.df_test, self.util.make_segmented_index(data.df_test)]
163
161
  )
164
162
  self.df_test.is_labeled = data.is_labeled
165
163
  self.df_test.got_gender = self.got_gender
@@ -260,8 +258,7 @@ class Experiment:
260
258
  test_cats = self.df_test[self.target].unique()
261
259
  else:
262
260
  # if there is no target, copy a dummy label
263
- self.df_test = self._add_random_target(
264
- self.df_test).astype("str")
261
+ self.df_test = self._add_random_target(self.df_test).astype("str")
265
262
  train_cats = self.df_train[self.target].unique()
266
263
  # print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
267
264
  # print(f"train_cats with target {self.target}: {train_cats}")
@@ -269,8 +266,7 @@ class Experiment:
269
266
  if type(test_cats) == np.ndarray:
270
267
  self.util.debug(f"Categories test (nd.array): {test_cats}")
271
268
  else:
272
- self.util.debug(
273
- f"Categories test (list): {list(test_cats)}")
269
+ self.util.debug(f"Categories test (list): {list(test_cats)}")
274
270
  if type(train_cats) == np.ndarray:
275
271
  self.util.debug(f"Categories train (nd.array): {train_cats}")
276
272
  else:
@@ -293,8 +289,7 @@ class Experiment:
293
289
 
294
290
  target_factor = self.util.config_val("DATA", "target_divide_by", False)
295
291
  if target_factor:
296
- self.df_test[self.target] = self.df_test[self.target] / \
297
- float(target_factor)
292
+ self.df_test[self.target] = self.df_test[self.target] / float(target_factor)
298
293
  self.df_train[self.target] = self.df_train[self.target] / float(
299
294
  target_factor
300
295
  )
@@ -317,16 +312,14 @@ class Experiment:
317
312
  def plot_distribution(self, df_labels):
318
313
  """Plot the distribution of samples and speaker per target class and biological sex"""
319
314
  plot = Plots()
320
- sample_selection = self.util.config_val(
321
- "EXPL", "sample_selection", "all")
315
+ sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
322
316
  plot.plot_distributions(df_labels)
323
317
  if self.got_speaker:
324
318
  plot.plot_distributions_speaker(df_labels)
325
319
 
326
320
  def extract_test_feats(self):
327
321
  self.feats_test = pd.DataFrame()
328
- feats_name = "_".join(ast.literal_eval(
329
- glob_conf.config["DATA"]["tests"]))
322
+ feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["tests"]))
330
323
  feats_types = self.util.config_val_list("FEATS", "type", ["os"])
331
324
  self.feature_extractor = FeatureExtractor(
332
325
  self.df_test, feats_types, feats_name, "test"
@@ -343,8 +336,7 @@ class Experiment:
343
336
 
344
337
  """
345
338
  df_train, df_test = self.df_train, self.df_test
346
- feats_name = "_".join(ast.literal_eval(
347
- glob_conf.config["DATA"]["databases"]))
339
+ feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
348
340
  self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
349
341
  feats_types = self.util.config_val("FEATS", "type", "os")
350
342
  # Ensure feats_types is always a list of strings
@@ -385,8 +377,7 @@ class Experiment:
385
377
  f"test feats ({self.feats_test.shape[0]}) != test labels"
386
378
  f" ({self.df_test.shape[0]})"
387
379
  )
388
- self.df_test = self.df_test[self.df_test.index.isin(
389
- self.feats_test.index)]
380
+ self.df_test = self.df_test[self.df_test.index.isin(self.feats_test.index)]
390
381
  self.util.warn(f"new test labels shape: {self.df_test.shape[0]}")
391
382
 
392
383
  self._check_scale()
@@ -401,8 +392,7 @@ class Experiment:
401
392
  """Augment the selected samples."""
402
393
  from nkululeko.augmenting.augmenter import Augmenter
403
394
 
404
- sample_selection = self.util.config_val(
405
- "AUGMENT", "sample_selection", "all")
395
+ sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
406
396
  if sample_selection == "all":
407
397
  df = pd.concat([self.df_train, self.df_test])
408
398
  elif sample_selection == "train":
@@ -497,8 +487,7 @@ class Experiment:
497
487
  """
498
488
  from nkululeko.augmenting.randomsplicer import Randomsplicer
499
489
 
500
- sample_selection = self.util.config_val(
501
- "AUGMENT", "sample_selection", "all")
490
+ sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
502
491
  if sample_selection == "all":
503
492
  df = pd.concat([self.df_train, self.df_test])
504
493
  elif sample_selection == "train":
@@ -519,8 +508,7 @@ class Experiment:
519
508
  plot_feats = eval(
520
509
  self.util.config_val("EXPL", "feature_distributions", "False")
521
510
  )
522
- sample_selection = self.util.config_val(
523
- "EXPL", "sample_selection", "all")
511
+ sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
524
512
  # get the data labels
525
513
  if sample_selection == "all":
526
514
  df_labels = pd.concat([self.df_train, self.df_test])
@@ -583,8 +571,7 @@ class Experiment:
583
571
  for scat_target in scat_targets:
584
572
  if self.util.is_categorical(df_labels[scat_target]):
585
573
  for scatter in scatters:
586
- plots.scatter_plot(
587
- df_feats, df_labels, scat_target, scatter)
574
+ plots.scatter_plot(df_feats, df_labels, scat_target, scatter)
588
575
  else:
589
576
  self.util.debug(
590
577
  f"{self.name}: binning continuous variable to categories"
@@ -669,15 +656,15 @@ class Experiment:
669
656
  )
670
657
  return
671
658
  best = self.get_best_report(self.reports)
672
- # if not best.is_classification:
673
- # best.continuous_to_categorical()
674
- truths = best.truths
675
- preds = best.preds
659
+ if best.is_classification:
660
+ truths = best.truths
661
+ preds = best.preds
662
+ else:
663
+ truths = best.truths_cont
664
+ preds = best.preds_cont
676
665
  speakers = self.df_test.speaker.values
677
- print(f"{len(truths)} {len(preds)} {len(speakers) }")
678
- df = pd.DataFrame(
679
- data={"truth": truths, "pred": preds, "speaker": speakers})
680
- plot_name = "result_combined_per_speaker"
666
+ df = pd.DataFrame(data={"truths": truths, "preds": preds, "speakers": speakers})
667
+ plot_name = f"{self.util.get_exp_name()}_speakercombined_{function}"
681
668
  self.util.debug(
682
669
  f"plotting speaker combination ({function}) confusion matrix to"
683
670
  f" {plot_name}"
@@ -692,13 +679,13 @@ class Experiment:
692
679
 
693
680
  def demo(self, file, is_list, outfile):
694
681
  model = self.runmgr.get_best_model()
695
- labelEncoder = None
682
+ lab_enc = None
696
683
  try:
697
- labelEncoder = self.label_encoder
684
+ lab_enc = self.label_encoder
698
685
  except AttributeError:
699
686
  pass
700
687
  demo = Demo_predictor(
701
- model, file, is_list, self.feature_extractor, labelEncoder, outfile
688
+ model, file, is_list, self.feature_extractor, lab_enc, outfile
702
689
  )
703
690
  demo.run_demo()
704
691
 
@@ -34,23 +34,24 @@ from nkululeko.utils.util import Util
34
34
 
35
35
 
36
36
  class Reporter:
37
- def __set_measure(self):
37
+ def _set_metric(self):
38
38
  if self.util.exp_is_classification():
39
- self.MEASURE = "UAR"
40
- self.result.measure = self.MEASURE
39
+ self.metric = "uar"
40
+ self.METRIC = "UAR"
41
+ self.result.metric = self.METRIC
41
42
  self.is_classification = True
42
43
  else:
43
44
  self.is_classification = False
44
- self.measure = self.util.config_val("MODEL", "measure", "mse")
45
- if self.measure == "mse":
46
- self.MEASURE = "MSE"
47
- self.result.measure = self.MEASURE
48
- elif self.measure == "mae":
49
- self.MEASURE = "MAE"
50
- self.result.measure = self.MEASURE
51
- elif self.measure == "ccc":
52
- self.MEASURE = "CCC"
53
- self.result.measure = self.MEASURE
45
+ self.metric = self.util.config_val("MODEL", "measure", "mse")
46
+ if self.metric == "mse":
47
+ self.METRIC = "MSE"
48
+ self.result.metric = self.METRIC
49
+ elif self.metric == "mae":
50
+ self.METRIC = "MAE"
51
+ self.result.metric = self.METRIC
52
+ elif self.metric == "ccc":
53
+ self.METRIC = "CCC"
54
+ self.result.metric = self.METRIC
54
55
 
55
56
  def __init__(self, truths, preds, run, epoch, probas=None):
56
57
  """Initialization with ground truth und predictions vector.
@@ -70,60 +71,70 @@ class Reporter:
70
71
  self.result = Result(0, 0, 0, 0, "unknown")
71
72
  self.run = run
72
73
  self.epoch = epoch
73
- self.__set_measure()
74
+ self._set_metric()
74
75
  self.filenameadd = ""
75
76
  self.cont_to_cat = False
76
77
  if len(self.truths) > 0 and len(self.preds) > 0:
77
78
  if self.util.exp_is_classification():
78
- uar, (upper, lower) = evaluate_with_conf_int(
79
- self.preds,
80
- unweighted_average_recall,
81
- self.truths,
82
- num_bootstraps=1000,
83
- alpha=5,
79
+ uar, upper, lower = self._get_test_result(
80
+ self.truths, self.preds, "uar"
84
81
  )
85
82
  self.result.test = uar
86
83
  self.result.set_upper_lower(upper, lower)
87
84
  self.result.loss = 1 - accuracy(self.truths, self.preds)
88
85
  else:
89
86
  # regression experiment
90
- if self.measure == "mse":
91
- test_result, (upper, lower) = evaluate_with_conf_int(
92
- self.preds,
93
- mean_squared_error,
94
- self.truths,
95
- num_bootstraps=1000,
96
- alpha=5,
97
- )
98
- elif self.measure == "mae":
99
- test_result, (upper, lower) = evaluate_with_conf_int(
100
- self.preds,
101
- mean_absolute_error,
102
- self.truths,
103
- num_bootstraps=1000,
104
- alpha=5,
105
- )
106
- elif self.measure == "ccc":
107
- test_result, (upper, lower) = evaluate_with_conf_int(
108
- self.preds,
109
- concordance_cc,
110
- self.truths,
111
- num_bootstraps=1000,
112
- alpha=5,
113
- )
114
-
115
- if math.isnan(self.result.test):
116
- self.util.debug(f"Truth: {self.truths}")
117
- self.util.debug(f"Predict.: {self.preds}")
118
- self.util.debug("Result is NAN: setting to -1")
119
- self.result.test = -1
120
- else:
121
- self.util.error(f"unknown measure: {self.measure}")
122
-
87
+ # keep the original values for further use, they will be binned later
88
+ self.truths_cont = self.truths
89
+ self.preds_cont = self.preds
90
+ test_result, upper, lower = self._get_test_result(
91
+ self.truths, self.preds, self.metric
92
+ )
123
93
  self.result.test = test_result
124
94
  self.result.set_upper_lower(upper, lower)
125
95
  # train and loss are being set by the model
126
- # print out the class probilities
96
+
97
+ def _get_test_result(self, truths, preds, metric):
98
+ if metric == "uar":
99
+ test_result, (upper, lower) = evaluate_with_conf_int(
100
+ preds,
101
+ unweighted_average_recall,
102
+ truths,
103
+ num_bootstraps=1000,
104
+ alpha=5,
105
+ )
106
+ elif metric == "mse":
107
+ test_result, (upper, lower) = evaluate_with_conf_int(
108
+ preds,
109
+ mean_squared_error,
110
+ truths,
111
+ num_bootstraps=1000,
112
+ alpha=5,
113
+ )
114
+ elif metric == "mae":
115
+ test_result, (upper, lower) = evaluate_with_conf_int(
116
+ preds,
117
+ mean_absolute_error,
118
+ truths,
119
+ num_bootstraps=1000,
120
+ alpha=5,
121
+ )
122
+ elif metric == "ccc":
123
+ test_result, (upper, lower) = evaluate_with_conf_int(
124
+ preds,
125
+ concordance_cc,
126
+ truths,
127
+ num_bootstraps=1000,
128
+ alpha=5,
129
+ )
130
+ if math.isnan(test_result):
131
+ self.util.debug(f"Truth: {self.truths}")
132
+ self.util.debug(f"Predict.: {self.preds}")
133
+ self.util.debug("Result is NAN: setting to -1")
134
+ test_result = -1
135
+ else:
136
+ self.util.error(f"unknown metric: {self.metric}")
137
+ return test_result, upper, lower
127
138
 
128
139
  def print_probabilities(self):
129
140
  """Print the probabilities per class to a file in the store."""
@@ -133,10 +144,10 @@ class Reporter:
133
144
  and "uncertainty" not in self.probas
134
145
  ):
135
146
  probas = self.probas
136
- probas["predicted"] = self.preds
137
- probas["truth"] = self.truths
138
147
  # softmax the probabilities or logits
139
148
  uncertainty = probas.apply(softmax, axis=1)
149
+ probas["predicted"] = self.preds
150
+ probas["truth"] = self.truths
140
151
  try:
141
152
  le = glob_conf.label_encoder
142
153
  mapping = dict(zip(le.classes_, range(len(le.classes_))))
@@ -195,31 +206,49 @@ class Reporter:
195
206
  def plot_per_speaker(self, result_df, plot_name, function):
196
207
  """Plot a confusion matrix with the mode category per speakers.
197
208
 
209
+ If the function is mode and the values continuous, bin first
210
+
198
211
  Args:
199
212
  result_df: a pandas dataframe with columns: preds, truths and speaker.
200
213
  plot_name: name for the figure.
201
214
  function: either mode or mean.
202
215
  """
203
- speakers = result_df.speaker.unique()
204
- pred = np.zeros(0)
205
- truth = np.zeros(0)
216
+ if function == "mode" and not self.is_classification:
217
+ truths, preds = result_df["truths"].values, result_df["preds"].values
218
+ truths, preds = self.util._bin_distributions(truths, preds)
219
+ result_df["truths"], result_df["preds"] = truths, preds
220
+ speakers = result_df.speakers.unique()
221
+ preds_speakers = np.zeros(0)
222
+ truths_speakers = np.zeros(0)
206
223
  for s in speakers:
207
- s_df = result_df[result_df.speaker == s]
208
- mode = s_df.pred.mode().iloc[-1]
209
- mean = s_df.pred.mean()
224
+ s_df = result_df[result_df.speakers == s]
225
+ s_truth = s_df.truths.iloc[0]
226
+ s_pred = None
210
227
  if function == "mode":
211
- s_df.pred = mode
228
+ s_pred = s_df.preds.mode().iloc[-1]
212
229
  elif function == "mean":
213
- s_df.pred = mean
230
+ s_pred = s_df.preds.mean()
214
231
  else:
215
- self.util.error(f"unkown function {function}")
216
- pred = np.append(pred, s_df.pred.values)
217
- truth = np.append(truth, s_df["truth"].values)
218
- if not (self.is_classification or self.cont_to_cat):
219
- bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
220
- truth = np.digitize(truth, bins) - 1
221
- pred = np.digitize(pred, bins) - 1
222
- self._plot_confmat(truth, pred.astype("int"), plot_name, 0)
232
+ self.util.error(f"unknown function {function}")
233
+ preds_speakers = np.append(preds_speakers, s_pred)
234
+ truths_speakers = np.append(truths_speakers, s_truth)
235
+ test_result, upper, lower = self._get_test_result(
236
+ result_df.truths.values, result_df.preds.values, self.metric
237
+ )
238
+ test_result = Result(test_result, None, None, None, self.METRIC)
239
+ test_result.set_upper_lower(upper, lower)
240
+ result_msg = f"Speaker combination result: {test_result.test_result_str()}"
241
+ self.util.debug(result_msg)
242
+ if function == "mean":
243
+ truths_speakers, preds_speakers = self.util._bin_distributions(
244
+ truths_speakers, preds_speakers
245
+ )
246
+ self._plot_confmat(
247
+ truths_speakers,
248
+ preds_speakers.astype("int"),
249
+ plot_name,
250
+ test_result=test_result,
251
+ )
223
252
 
224
253
  def _plot_scatter(self, truths, preds, plot_name, epoch=None):
225
254
  # print(truths)
@@ -227,13 +256,10 @@ class Reporter:
227
256
  if epoch is None:
228
257
  epoch = self.epoch
229
258
  fig_dir = self.util.get_path("fig_dir")
230
- fig = plt.figure() # figsize=[5, 5]
231
-
232
259
  pcc = pearsonr(self.truths, self.preds)[0]
233
-
234
- reg_res = f"{self.result.test:.3f} {self.MEASURE}"
235
-
236
- plt.scatter(truths, preds, cmap="Blues")
260
+ reg_res = self.result.test_result_str()
261
+ fig = plt.figure()
262
+ plt.scatter(truths, preds)
237
263
  plt.xlabel("truth")
238
264
  plt.ylabel("prediction")
239
265
 
@@ -258,11 +284,11 @@ class Reporter:
258
284
  )
259
285
  )
260
286
 
261
- def _plot_confmat(self, truths, preds, plot_name, epoch=None):
262
- # print(truths)
263
- # print(preds)
287
+ def _plot_confmat(self, truths, preds, plot_name, epoch=None, test_result=None):
264
288
  if epoch is None:
265
289
  epoch = self.epoch
290
+ if test_result is None:
291
+ test_result = self.result
266
292
  fig_dir = self.util.get_path("fig_dir")
267
293
  labels = glob_conf.labels
268
294
  fig = plt.figure() # figsize=[5, 5]
@@ -295,12 +321,15 @@ class Reporter:
295
321
 
296
322
  reg_res = ""
297
323
  if not self.is_classification:
298
- reg_res = f"{self.result.test:.3f} {self.MEASURE}"
324
+ reg_res = f"{test_result.test_result_str()}"
325
+ self.util.debug(
326
+ f"Best result at epoch {epoch}: {test_result.test_result_str()}"
327
+ )
299
328
 
300
- uar_str = str(int(uar * 1000) / 1000.0)[1:]
301
- acc_str = str(int(acc * 1000) / 1000.0)[1:]
302
- up_str = str(int(upper * 1000) / 1000.0)[1:]
303
- low_str = str(int(lower * 1000) / 1000.0)[1:]
329
+ uar_str = self.util.to_3_digits_str(uar)
330
+ acc_str = self.util.to_3_digits_str(acc)
331
+ up_str = self.util.to_3_digits_str(upper)
332
+ low_str = self.util.to_3_digits_str(lower)
304
333
 
305
334
  if epoch != 0:
306
335
  plt.title(
@@ -427,7 +456,7 @@ class Reporter:
427
456
  ax = df.plot()
428
457
  fig = ax.figure
429
458
  plt.xlabel("epochs")
430
- plt.ylabel(f"{self.MEASURE}")
459
+ plt.ylabel(f"{self.METRIC}")
431
460
  plot_path = f"{fig_dir}{plot_name}.{self.format}"
432
461
  plt.savefig(plot_path)
433
462
  self.util.debug(f"plotted epoch progression to {plot_path}")
@@ -464,7 +493,7 @@ class Reporter:
464
493
  plt.plot(losses, "black", label="losses")
465
494
  plt.plot(losses_eval, "grey", label="losses_eval")
466
495
  plt.xlabel("epochs")
467
- plt.ylabel(f"{self.MEASURE}")
496
+ plt.ylabel(f"{self.METRIC}")
468
497
  plt.legend()
469
498
  plt.savefig(f"{fig_dir}{out_name}.{self.format}")
470
499
  plt.close()
@@ -1,13 +1,15 @@
1
1
  # result.py
2
+ from nkululeko.utils.util import Util
2
3
 
3
4
 
4
5
  class Result:
5
- def __init__(self, test, train, loss, loss_eval, measure):
6
+ def __init__(self, test, train, loss, loss_eval, metric):
6
7
  self.test = test
7
8
  self.train = train
8
9
  self.loss = loss
9
10
  self.loss_eval = loss_eval
10
- self.measure = measure
11
+ self.metric = metric
12
+ self.util = Util("Result")
11
13
 
12
14
  def get_result(self):
13
15
  return self.test
@@ -18,10 +20,16 @@ class Result:
18
20
  self.lower = lower
19
21
 
20
22
  def get_test_result(self):
21
- return f"test: {self.test:.3f} {self.measure}"
23
+ return f"test: {self.test:.3f} {self.metric}"
22
24
 
23
25
  def to_string(self):
24
26
  return (
25
- f"test: {self.test} {self.measure}, train:"
26
- f" {self.train} {self.measure}, loss: {self.loss}, eval-loss: {self.loss_eval}"
27
+ f"test: {self.test} {self.metric}, train:"
28
+ f" {self.train} {self.metric}, loss: {self.loss}, eval-loss: {self.loss_eval}"
27
29
  )
30
+
31
+ def test_result_str(self):
32
+ result_s = self.util.to_3_digits_str(self.test)
33
+ up_str = self.util.to_3_digits_str(self.upper)
34
+ low_str = self.util.to_3_digits_str(self.lower)
35
+ return f"{self.metric}: {result_s} ({up_str}/{low_str})"
nkululeko/utils/util.py CHANGED
@@ -50,9 +50,7 @@ class Util:
50
50
  self.got_data_roots = False
51
51
 
52
52
  def get_path(self, entry):
53
- """
54
- This method allows the user to get the directory path for the given argument.
55
- """
53
+ """This method allows the user to get the directory path for the given argument."""
56
54
  if self.config is None:
57
55
  # If no configuration file is provided, use default paths
58
56
  if entry == "fig_dir":
@@ -139,15 +137,11 @@ class Util:
139
137
  )
140
138
 
141
139
  def get_name(self):
142
- """
143
- Get the name of the experiment
144
- """
140
+ """Get the name of the experiment."""
145
141
  return self.config["EXP"]["name"]
146
142
 
147
143
  def get_exp_dir(self):
148
- """
149
- Get the experiment directory
150
- """
144
+ """Get the experiment directory."""
151
145
  root = os.path.join(self.config["EXP"]["root"], "")
152
146
  name = self.config["EXP"]["name"]
153
147
  dir_name = f"{root}{name}"
@@ -176,15 +170,11 @@ class Util:
176
170
  return ""
177
171
 
178
172
  def get_data_name(self):
179
- """
180
- Get a string as name from all databases that are useed
181
- """
173
+ """Get a string as name from all databases that are useed."""
182
174
  return "_".join(ast.literal_eval(self.config["DATA"]["databases"]))
183
175
 
184
176
  def get_feattype_name(self):
185
- """
186
- Get a string as name from all feature sets that are used
187
- """
177
+ """Get a string as name from all feature sets that are used."""
188
178
  return "_".join(ast.literal_eval(self.config["FEATS"]["type"]))
189
179
 
190
180
  def get_exp_name(self, only_train=False, only_data=False):
@@ -303,9 +293,9 @@ class Util:
303
293
  return ast.literal_eval(self.config["DATA"]["labels"])
304
294
 
305
295
  def continuous_to_categorical(self, series):
306
- """
307
- discretize a categorical variable.
308
- uses the labels and bins from the ini if present
296
+ """Discretize a categorical variable.
297
+
298
+ Uses the labels and bins from the ini if present
309
299
 
310
300
  :param series: a pandas series
311
301
  :return a pandas series with discretized values as categories
@@ -321,11 +311,23 @@ class Util:
321
311
  labels = ["0_low", "1_middle", "2_high"]
322
312
  result = np.digitize(series, bins) - 1
323
313
  result = pd.Series(result)
324
- for i, l in enumerate(labels):
325
- result = result.replace(i, str(l))
314
+ for i, lab in enumerate(labels):
315
+ result = result.replace(i, str(lab))
326
316
  result = result.astype("category")
327
317
  return result
328
318
 
319
+ def _bin_distributions(self, truths, preds):
320
+ try:
321
+ bins = ast.literal_eval(self.config["DATA"]["bins"])
322
+ except KeyError:
323
+ # if no binning is given, simply take three bins, based on truth
324
+ b1 = np.quantile(truths, 0.33)
325
+ b2 = np.quantile(truths, 0.66)
326
+ bins = [-1000000, b1, b2, 1000000]
327
+ truths = np.digitize(truths, bins) - 1
328
+ preds = np.digitize(preds, bins) - 1
329
+ return truths, preds
330
+
329
331
  def print_best_results(self, best_reports):
330
332
  res_dir = self.get_res_dir()
331
333
  # go one level up above the "run" level
@@ -416,5 +418,10 @@ class Util:
416
418
  self.error(f"unknown measure: {measure}")
417
419
 
418
420
  def to_3_digits(self, x):
421
+ """Given a float, return this to 3 digits."""
419
422
  x = float(x)
420
423
  return (int(x * 1000)) / 1000.0
424
+
425
+ def to_3_digits_str(self, x):
426
+ """Given a float, return this to 3 digits as string without integer number."""
427
+ return str(self.to_3_digits(x))[1:]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.88.1
3
+ Version: 0.88.3
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -356,6 +356,15 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
356
356
  Changelog
357
357
  =========
358
358
 
359
+ Version 0.88.3
360
+ --------------
361
+ * fixed bug in false uncertainty estimation
362
+ * changed demo live recording
363
+
364
+ Version 0.88.2
365
+ --------------
366
+ * changed combine speaker results to show speakers not samples
367
+
359
368
  Version 0.88.1
360
369
  --------------
361
370
  * added obligatory scatter plot for regression
@@ -2,12 +2,12 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=wDgoTGTbu7Xgf_ms4HNIIYRiH8Z5f0DMdvLeI-SupRo,39
5
+ nkululeko/constants.py,sha256=nYBLnpl1NpTeGp7KjRHJkxz_Vju3pYEkcrj3CwBX0zI,39
6
6
  nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
- nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
8
+ nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
9
9
  nkululeko/ensemble.py,sha256=huRbXUuabm6QYxGBHjkwEU95e-0qxtO0Z6UdXFgtaMY,4947
10
- nkululeko/experiment.py,sha256=s9PIjm45dR9yzmHu_69JpBjX9qMVzi5wIgPfMR3F44A,31530
10
+ nkululeko/experiment.py,sha256=wXZnb_cfOqF8b0Zqzu2bbrEgCCpG_zPkDbD-Usw5sRs,31283
11
11
  nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
12
12
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
13
13
  nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
@@ -98,17 +98,17 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
98
98
  nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
99
99
  nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
100
100
  nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
101
- nkululeko/reporting/reporter.py,sha256=2SX8qPaWpqslzfcXDQFINxTkVDWbrySiQZR4iZ77xG0,18096
102
- nkululeko/reporting/result.py,sha256=nSN5or-Py2GPRWHkWpGRh7UCi1W0er7WLEHz8fYLk-A,742
101
+ nkululeko/reporting/reporter.py,sha256=WPevPmtpnzSwiF6lxtczmxpFjmzhNABJspAsNNJgmq4,19237
102
+ nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
103
103
  nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
104
  nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
105
105
  nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5EvWlcWQ,3301
106
106
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
108
108
  nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
109
- nkululeko/utils/util.py,sha256=eQkfd_3MO2JYis5QbROnCmhglQGkl4-F9TLCT1uiQH0,14514
110
- nkululeko-0.88.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
- nkululeko-0.88.1.dist-info/METADATA,sha256=S_oewgAcmyRs8p-61M0PcVYfvtRl4hRwwlFDWVH9yDY,39025
112
- nkululeko-0.88.1.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
113
- nkululeko-0.88.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
- nkululeko-0.88.1.dist-info/RECORD,,
109
+ nkululeko/utils/util.py,sha256=BNd9JpoVakPbyysKBsJSCnqlbPlUKHUrcWYcwEnOdVA,15128
110
+ nkululeko-0.88.3.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
+ nkululeko-0.88.3.dist-info/METADATA,sha256=8sPk6npYa7my3UA88uk2373yzdfiRFfb70Zix097KR4,39224
112
+ nkululeko-0.88.3.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
113
+ nkululeko-0.88.3.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
+ nkululeko-0.88.3.dist-info/RECORD,,