nkululeko 0.88.2__py3-none-any.whl → 0.88.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.88.2"
1
+ VERSION="0.88.4"
2
2
  SAMPLING_RATE = 16000
@@ -72,9 +72,11 @@ class Demo_predictor:
72
72
  else:
73
73
  self.util.debug(df_res)
74
74
  else:
75
- while True:
75
+ answer = input("want to record y/n?")
76
+ while answer == "y":
76
77
  signal = self.record_audio(3)
77
78
  self.predict_signal(signal, self.sr)
79
+ answer = input("want to record y/n?")
78
80
 
79
81
  # self.play_audio(signal)
80
82
 
@@ -109,7 +111,7 @@ class Demo_predictor:
109
111
  def record_audio(self, seconds):
110
112
  import sounddevice as sd
111
113
 
112
- print("recording ...")
114
+ print("recording ...", flush=True)
113
115
  y = sd.rec(int(seconds * self.sr), samplerate=self.sr, channels=1)
114
116
  sd.wait()
115
117
  y = y.T
nkululeko/ensemble.py CHANGED
@@ -1,20 +1,181 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
-
4
+ from typing import List
5
5
  import configparser
6
6
  import time
7
7
  from argparse import ArgumentParser
8
8
  from pathlib import Path
9
9
 
10
+ import numpy as np
10
11
  import pandas as pd
12
+ from sklearn.metrics import balanced_accuracy_score
11
13
 
12
14
  from nkululeko.constants import VERSION
13
15
  from nkululeko.experiment import Experiment
14
16
  from nkululeko.utils.util import Util
15
17
 
18
+ # import torch
19
+
20
+ # Constants
21
+ DEFAULT_METHOD = "mean"
22
+ DEFAULT_OUTFILE = "ensemble_result.csv"
23
+
24
+
25
+ def majority_voting(ensemble_preds_ls):
26
+ all_predictions = pd.concat([df["predicted"] for df in ensemble_preds_ls], axis=1)
27
+ return all_predictions.mode(axis=1).iloc[:, 0]
28
+
29
+
30
+ def mean_ensemble(ensemble_preds, labels):
31
+ for label in labels:
32
+ ensemble_preds[label] = ensemble_preds[label].mean(axis=1)
33
+ return ensemble_preds[labels].idxmax(axis=1)
34
+
35
+
36
+ def max_ensemble(ensemble_preds, labels):
37
+ for label in labels:
38
+ ensemble_preds[label] = ensemble_preds[label].max(axis=1)
39
+ return ensemble_preds[labels].idxmax(axis=1)
40
+
41
+
42
+ def sum_ensemble(ensemble_preds, labels):
43
+ for label in labels:
44
+ ensemble_preds[label] = ensemble_preds[label].sum(axis=1)
45
+ return ensemble_preds[labels].idxmax(axis=1)
46
+
47
+
48
+ def uncertainty_ensemble(ensemble_preds):
49
+ """Same as uncertainty_threshold with a threshold of 0.1"""
50
+ final_predictions = []
51
+ best_uncertainty = []
52
+ for _, row in ensemble_preds.iterrows():
53
+ uncertainties = row[["uncertainty"]].values
54
+ min_uncertainty_idx = np.argmin(uncertainties)
55
+ final_predictions.append(row["predicted"].iloc[min_uncertainty_idx])
56
+ best_uncertainty.append(uncertainties[min_uncertainty_idx])
57
+
58
+ return final_predictions, best_uncertainty
59
+
60
+
61
+ def max_class_ensemble(ensemble_preds_ls, labels):
62
+ """Compare the highest probabilites of all models across classes (instead of same class as in max_ensemble) and return the highest probability and the class"""
63
+ final_preds = []
64
+ final_probs = []
65
+
66
+ for _, row in pd.concat(ensemble_preds_ls, axis=1).iterrows():
67
+ max_probs = []
68
+ max_classes = []
69
+
70
+ for model_df in ensemble_preds_ls:
71
+ model_probs = row[labels].astype(float)
72
+ max_prob = model_probs.max()
73
+ max_class = model_probs.idxmax()
74
+
75
+ max_probs.append(max_prob)
76
+ max_classes.append(max_class)
77
+
78
+ best_model_index = np.argmax(max_probs)
79
+
80
+ final_preds.append(max_classes[best_model_index])
81
+ final_probs.append(max_probs[best_model_index])
82
+
83
+ return pd.Series(final_preds), pd.Series(final_probs)
84
+
85
+
86
+ def uncertainty_threshold_ensemble(ensemble_preds_ls, labels, threshold):
87
+ final_predictions = []
88
+ final_uncertainties = []
89
+
90
+ for idx in ensemble_preds_ls[0].index:
91
+ uncertainties = [df.loc[idx, "uncertainty"] for df in ensemble_preds_ls]
92
+ min_uncertainty_idx = np.argmin(uncertainties)
93
+ min_uncertainty = uncertainties[min_uncertainty_idx]
94
+
95
+ if min_uncertainty <= threshold:
96
+ # Use the prediction with low uncertainty
97
+ final_predictions.append(
98
+ ensemble_preds_ls[min_uncertainty_idx].loc[idx, "predicted"]
99
+ )
100
+ final_uncertainties.append(min_uncertainty)
101
+ else: # for uncertainty above threshold
102
+ # Calculate mean of probabilities same class different model
103
+ mean_probs = np.mean(
104
+ [df.loc[idx, labels].values for df in ensemble_preds_ls], axis=0
105
+ )
106
+ final_predictions.append(labels[np.argmax(mean_probs)])
107
+ final_uncertainties.append(np.mean(uncertainties))
108
+
109
+ return final_predictions
110
+
16
111
 
17
- def ensemble_predictions(config_files, method, no_labels):
112
+ def uncertainty_weighted_ensemble(ensemble_preds_ls, labels):
113
+ """Weighted ensemble based on uncertainty, normalized for each class"""
114
+ final_predictions = []
115
+ final_uncertainties = []
116
+
117
+ for idx in ensemble_preds_ls[0].index:
118
+ uncertainties = [df.loc[idx, "uncertainty"] for df in ensemble_preds_ls]
119
+ # Convert uncertainties to accuracies/confidence
120
+ accuracies = [1 - uncertainty for uncertainty in uncertainties]
121
+
122
+ # Calculate weights (inverse of uncertainties)
123
+ weights = [
124
+ 1 / uncertainty if uncertainty != 0 else 1e10
125
+ for uncertainty in uncertainties
126
+ ]
127
+
128
+ # Normalize weights for each class
129
+ total_weight = sum(weights)
130
+ normalized_weights = [w / total_weight for w in weights]
131
+
132
+ # Calculate weighted probabilities for each class
133
+ weighted_probs = {label: 0 for label in labels}
134
+ for df, weight in zip(ensemble_preds_ls, normalized_weights):
135
+ for label in labels:
136
+ weighted_probs[label] += df.loc[idx, label] * weight
137
+
138
+ # Select the class with the highest weighted probability
139
+ predicted_class = max(weighted_probs, key=weighted_probs.get)
140
+ final_predictions.append(predicted_class)
141
+
142
+ # Use the lowest accuracy as the final uncertainty
143
+ final_uncertainties.append(1 - min(accuracies))
144
+
145
+ return final_predictions, final_uncertainties
146
+
147
+
148
+ def confidence_weighted_ensemble(ensemble_preds_ls, labels):
149
+ """Weighted ensemble based on confidence, normalized for all samples per model"""
150
+ final_predictions = []
151
+ final_confidences = []
152
+
153
+ for idx in ensemble_preds_ls[0].index:
154
+ class_probabilities = {label: 0 for label in labels}
155
+ total_confidence = 0
156
+
157
+ for df in ensemble_preds_ls:
158
+ row = df.loc[idx]
159
+ confidence = 1 - row["uncertainty"] # confidence score
160
+ total_confidence += confidence
161
+
162
+ for label in labels:
163
+ class_probabilities[label] += row[label] * confidence
164
+
165
+ # Normalize probabilities
166
+ for label in labels:
167
+ class_probabilities[label] /= total_confidence
168
+
169
+ predicted_class = max(class_probabilities, key=class_probabilities.get)
170
+ final_predictions.append(predicted_class)
171
+ final_confidences.append(max(class_probabilities.values()))
172
+
173
+ return final_predictions, final_confidences
174
+
175
+
176
+ def ensemble_predictions(
177
+ config_files: List[str], method: str, threshold: float, no_labels: bool
178
+ ) -> pd.DataFrame:
18
179
  """
19
180
  Ensemble predictions from multiple experiments.
20
181
 
@@ -31,13 +192,12 @@ def ensemble_predictions(config_files, method, no_labels):
31
192
  AssertionError: If the number of config files is less than 2 for majority voting.
32
193
 
33
194
  """
34
- ensemble_preds = []
35
- # labels = []
195
+ ensemble_preds_ls = []
36
196
  for config_file in config_files:
37
197
  if no_labels:
38
198
  # for ensembling results from Nkululeko.demo
39
- pred = pd.read_csv(config_file)
40
- labels = pred.columns[1:-2]
199
+ preds = pd.read_csv(config_file)
200
+ labels = preds.columns[1:-2]
41
201
  else:
42
202
  # for ensembling results from Nkululeko.nkululeko
43
203
  config = configparser.ConfigParser()
@@ -55,42 +215,49 @@ def ensemble_predictions(config_files, method, no_labels):
55
215
  labels = expr.util.get_labels()
56
216
  # load the experiment
57
217
  # get CSV files of predictions
58
- pred = expr.util.get_pred_name()
59
- print(f"Loading predictions from {pred}")
60
- preds = pd.read_csv(pred)
218
+ pred_name = expr.util.get_pred_name()
219
+ util.debug(f"Loading predictions from {pred_name}")
220
+ preds = pd.read_csv(pred_name)
61
221
 
62
- ensemble_preds.append(preds)
222
+ ensemble_preds_ls.append(preds)
63
223
 
64
224
  # pd concate
65
- ensemble_preds = pd.concat(ensemble_preds, axis=1)
225
+ ensemble_preds = pd.concat(ensemble_preds_ls, axis=1)
66
226
 
67
227
  if method == "majority_voting":
68
- # majority voting, get mode, works for odd number of models
69
- # raise error when number of configs only two:
70
228
  assert (
71
- len(config_files) > 2
229
+ len(ensemble_preds_ls) > 2
72
230
  ), "Majority voting only works for more than two models"
73
- ensemble_preds["predicted"] = ensemble_preds.mode(axis=1)[0]
74
-
231
+ ensemble_preds["predicted"] = majority_voting(ensemble_preds_ls)
75
232
  elif method == "mean":
76
- for label in labels:
77
- ensemble_preds[label] = ensemble_preds[label].mean(axis=1)
78
-
233
+ ensemble_preds["predicted"] = mean_ensemble(ensemble_preds, labels)
79
234
  elif method == "max":
80
- for label in labels:
81
- ensemble_preds[label] = ensemble_preds[label].max(axis=1)
82
- # get max value from all labels to inver that labels
83
-
235
+ ensemble_preds["predicted"] = max_ensemble(ensemble_preds, labels)
84
236
  elif method == "sum":
85
- for label in labels:
86
- ensemble_preds[label] = ensemble_preds[label].sum(axis=1)
87
-
237
+ ensemble_preds["predicted"] = sum_ensemble(ensemble_preds, labels)
238
+ elif method == "max_class":
239
+ ensemble_preds["predicted"], ensemble_preds["max_probability"] = (
240
+ max_class_ensemble(ensemble_preds_ls, labels)
241
+ )
242
+ elif method == "uncertainty_threshold":
243
+ ensemble_preds["predicted"] = uncertainty_threshold_ensemble(
244
+ ensemble_preds_ls, labels, threshold
245
+ )
246
+ elif method == "uncertainty_weighted":
247
+ ensemble_preds["predicted"], ensemble_preds["uncertainty"] = (
248
+ uncertainty_weighted_ensemble(ensemble_preds_ls, labels)
249
+ )
250
+ elif method == "confidence_weighted":
251
+ ensemble_preds["predicted"], ensemble_preds["confidence"] = (
252
+ confidence_weighted_ensemble(ensemble_preds_ls, labels)
253
+ )
88
254
  else:
89
255
  raise ValueError(f"Unknown ensemble method: {method}")
90
256
 
91
- # get the highest value from all labels to inver that labels
257
+ # get the highest value from all labels to infer the label
92
258
  # replace the old first predicted column
93
- ensemble_preds["predicted"] = ensemble_preds[labels].idxmax(axis=1)
259
+ if method in ["mean", "max", "sum"]:
260
+ ensemble_preds["predicted"] = ensemble_preds[labels].idxmax(axis=1)
94
261
 
95
262
  if no_labels:
96
263
  return ensemble_preds
@@ -105,14 +272,14 @@ def ensemble_predictions(config_files, method, no_labels):
105
272
 
106
273
  truth = ensemble_preds["truth"]
107
274
  predicted = ensemble_preds["predicted"]
108
- uar = (truth == predicted).mean()
109
- Util("ensemble").debug(f"UAR: {uar:.3f}")
275
+ uar = balanced_accuracy_score(truth, predicted)
276
+ acc = (truth == predicted).mean()
277
+ Util("ensemble").debug(f"{method}: UAR: {uar:.3f}, ACC: {acc:.3f}")
110
278
 
111
- # only return until 'predicted' column
112
279
  return ensemble_preds
113
280
 
114
281
 
115
- def main(src_dir):
282
+ def main(src_dir: Path) -> None:
116
283
  parser = ArgumentParser()
117
284
  parser.add_argument(
118
285
  "configs",
@@ -122,17 +289,33 @@ def main(src_dir):
122
289
  )
123
290
  parser.add_argument(
124
291
  "--method",
125
- default="majority_voting",
126
- choices=["majority_voting", "mean", "max", "sum"],
127
- help="Ensemble method to use (default: majority_voting)",
292
+ default=DEFAULT_METHOD,
293
+ choices=[
294
+ "majority_voting",
295
+ "mean",
296
+ "max",
297
+ "sum",
298
+ "max_class",
299
+ # "uncertainty_lowest",
300
+ # "entropy",
301
+ "uncertainty_threshold",
302
+ "uncertainty_weighted",
303
+ "confidence_weighted",
304
+ ],
305
+ help=f"Ensemble method to use (default: {DEFAULT_METHOD})",
306
+ )
307
+ # add threshold if method is uncertainty_threshold
308
+ parser.add_argument(
309
+ "--threshold",
310
+ default=1.0,
311
+ type=float,
312
+ help="Threshold for uncertainty_threshold method (default: 1.0, i.e. no threshold)",
128
313
  )
129
314
  parser.add_argument(
130
315
  "--outfile",
131
- default="ensemble_result.csv",
132
- help="Output file path for the ensemble predictions (default: ensemble_predictions.csv)",
316
+ default=DEFAULT_OUTFILE,
317
+ help=f"Output file path for the ensemble predictions (default: {DEFAULT_OUTFILE})",
133
318
  )
134
-
135
- # add argument if true label is not available
136
319
  parser.add_argument(
137
320
  "--no_labels",
138
321
  action="store_true",
@@ -143,14 +326,16 @@ def main(src_dir):
143
326
 
144
327
  start = time.time()
145
328
 
146
- ensemble_preds = ensemble_predictions(args.configs, args.method, args.no_labels)
329
+ ensemble_preds = ensemble_predictions(
330
+ args.configs, args.method, args.threshold, args.no_labels
331
+ )
147
332
 
148
333
  # save to csv
149
334
  ensemble_preds.to_csv(args.outfile, index=False)
150
- print(f"Ensemble predictions saved to: {args.outfile}")
151
- print(f"Ensemble done, used {time.time()-start:.2f} seconds")
335
+ Util("ensemble").debug(f"Ensemble predictions saved to: {args.outfile}")
336
+ Util("ensemble").debug(f"Ensemble done, used {time.time()-start:.2f} seconds")
152
337
 
153
- print("DONE")
338
+ Util("ensemble").debug("DONE")
154
339
 
155
340
 
156
341
  if __name__ == "__main__":
@@ -100,10 +100,8 @@ class Ast(Featureset):
100
100
  embeddings = torch.mean(last_hidden_state, dim=1)
101
101
  embeddings = embeddings.cpu().numpy()
102
102
 
103
- # convert the same from (768,) to (1, 768)
104
- # embeddings = embeddings.reshape(1, -1)
105
- print(f"hs shape: {embeddings.shape}")
106
-
103
+ # print(f"hs shape: {embeddings.shape}")
104
+ # hs shape: (1, 768)
107
105
 
108
106
  except Exception as e:
109
107
  self.util.error(f"Error extracting embeddings for file {file}: {str(e)}, fill with")
nkululeko/modelrunner.py CHANGED
@@ -238,21 +238,21 @@ class Modelrunner:
238
238
  if balancing == "ros":
239
239
  from imblearn.over_sampling import RandomOverSampler
240
240
 
241
- sampler = RandomOverSampler()
241
+ sampler = RandomOverSampler(random_state=42)
242
242
  X_res, y_res = sampler.fit_resample(
243
243
  self.feats_train, self.df_train[self.target]
244
244
  )
245
245
  elif balancing == "smote":
246
246
  from imblearn.over_sampling import SMOTE
247
247
 
248
- sampler = SMOTE()
248
+ sampler = SMOTE(random_state=42)
249
249
  X_res, y_res = sampler.fit_resample(
250
250
  self.feats_train, self.df_train[self.target]
251
251
  )
252
252
  elif balancing == "adasyn":
253
253
  from imblearn.over_sampling import ADASYN
254
254
 
255
- sampler = ADASYN()
255
+ sampler = ADASYN(random_state=42)
256
256
  X_res, y_res = sampler.fit_resample(
257
257
  self.feats_train, self.df_train[self.target]
258
258
  )
@@ -1,5 +1,6 @@
1
1
  # model_svm.py
2
2
 
3
+ import random
3
4
  from sklearn import svm
4
5
  from nkululeko.models.model import Model
5
6
 
@@ -24,6 +25,7 @@ class SVM_model(Model):
24
25
  gamma="scale",
25
26
  probability=True,
26
27
  class_weight=class_weight,
28
+ random_state=42, # for consistent result
27
29
  ) # set up the classifier
28
30
 
29
31
  def set_c(self, c):
@@ -144,10 +144,10 @@ class Reporter:
144
144
  and "uncertainty" not in self.probas
145
145
  ):
146
146
  probas = self.probas
147
- probas["predicted"] = self.preds
148
- probas["truth"] = self.truths
149
147
  # softmax the probabilities or logits
150
148
  uncertainty = probas.apply(softmax, axis=1)
149
+ probas["predicted"] = self.preds
150
+ probas["truth"] = self.truths
151
151
  try:
152
152
  le = glob_conf.label_encoder
153
153
  mapping = dict(zip(le.classes_, range(len(le.classes_))))
@@ -166,7 +166,8 @@ class Reporter:
166
166
  )
167
167
  probas["uncertainty"] = uncertainty
168
168
  probas["correct"] = probas.predicted == probas.truth
169
- sp = os.path.join(self.util.get_path("store"), "pred_df.csv")
169
+ sp = self.util.get_pred_name()
170
+
170
171
  self.probas = probas
171
172
  probas.to_csv(sp)
172
173
  self.util.debug(f"Saved probabilities to {sp}")
nkululeko/runmanager.py CHANGED
@@ -50,7 +50,7 @@ class Runmanager:
50
50
  self.last_epochs = [] # keep the epoch of best result per run
51
51
  # for all runs
52
52
  for run in range(int(self.util.config_val("EXP", "runs", 1))):
53
- self.util.debug(f"run {run}")
53
+ self.util.debug(f"run {run} using model {glob_conf.config['MODEL']['type']}")
54
54
  # set the run index as global variable for reporting
55
55
  self.util.set_config_val("EXP", "run", run)
56
56
  self.modelrunner = Modelrunner(
nkululeko/utils/util.py CHANGED
@@ -1,10 +1,13 @@
1
1
  # util.py
2
2
  import ast
3
3
  import configparser
4
+ import logging
4
5
  import os.path
5
6
  import pickle
6
7
  import sys
7
8
 
9
+ # from sysconfig import get_config_h_filename
10
+ # from turtle import setup
8
11
  import audeer
9
12
  import audformat
10
13
  import numpy as np
@@ -32,6 +35,7 @@ class Util:
32
35
  self.caller = caller
33
36
  else:
34
37
  self.caller = ""
38
+ self.config = None
35
39
  if has_config:
36
40
  try:
37
41
  import nkululeko.glob_conf as glob_conf
@@ -49,6 +53,30 @@ class Util:
49
53
  self.config = None
50
54
  self.got_data_roots = False
51
55
 
56
+ self.setup_logging()
57
+ # self.logged_configs = set()
58
+
59
+ def setup_logging(self):
60
+ # Setup logging
61
+ logger = logging.getLogger(__name__)
62
+ if not logger.hasHandlers():
63
+ logger.setLevel(logging.DEBUG) # Set the desired logging level
64
+
65
+ # Create a console handler
66
+ console_handler = logging.StreamHandler()
67
+
68
+ # Create a simple formatter that only shows the message
69
+ class SimpleFormatter(logging.Formatter):
70
+ def format(self, record):
71
+ return record.getMessage()
72
+
73
+ # Set the formatter for the console handler
74
+ console_handler.setFormatter(SimpleFormatter())
75
+
76
+ # Add the console handler to the logger
77
+ logger.addHandler(console_handler)
78
+ self.logger = logger
79
+
52
80
  def get_path(self, entry):
53
81
  """This method allows the user to get the directory path for the given argument."""
54
82
  if self.config is None:
@@ -120,6 +148,7 @@ class Util:
120
148
 
121
149
  def set_config(self, config):
122
150
  self.config = config
151
+ # self.logged_configs.clear()
123
152
 
124
153
  def get_save_name(self):
125
154
  """Return a relative path to a name to save the experiment"""
@@ -128,7 +157,8 @@ class Util:
128
157
 
129
158
  def get_pred_name(self):
130
159
  store = self.get_path("store")
131
- return f"{store}/pred_df.csv"
160
+ pred_name = self.get_model_description()
161
+ return f"{store}/pred_{pred_name}.csv"
132
162
 
133
163
  def is_categorical(self, pd_series):
134
164
  """Check if a dataframe column is categorical"""
@@ -233,6 +263,11 @@ class Util:
233
263
  return_string += self._get_value_descript(option[0], option[1]).replace(
234
264
  ".", "-"
235
265
  )
266
+ # prevent double underscores
267
+ return_string = return_string.replace("__", "_")
268
+ # remove trailing underscores in the end
269
+ return_string = return_string.strip("_")
270
+
236
271
  return return_string
237
272
 
238
273
  def get_plot_name(self):
@@ -249,14 +284,14 @@ class Util:
249
284
  return False
250
285
 
251
286
  def error(self, message):
252
- print(f"ERROR {self.caller}: {message}")
287
+ self.logger.error(f"ERROR: {self.caller}: {message}")
253
288
  sys.exit()
254
289
 
255
290
  def warn(self, message):
256
- print(f"WARNING {self.caller}: {message}")
291
+ self.logger.warning(f"WARNING: {self.caller}: {message}")
257
292
 
258
293
  def debug(self, message):
259
- print(f"DEBUG {self.caller}: {message}")
294
+ self.logger.debug(f"DEBUG: {self.caller}: {message}")
260
295
 
261
296
  def set_config_val(self, section, key, value):
262
297
  try:
@@ -278,9 +313,13 @@ class Util:
278
313
  return self.config[section][key]
279
314
  except KeyError:
280
315
  if default not in self.stopvals:
281
- self.debug(f"value for {key} not found, using default: {default}")
316
+ self.debug(f"value for {key} is not found, using default: {default}")
282
317
  return default
283
318
 
319
+ @classmethod
320
+ def reset_logged_configs(cls):
321
+ cls.logged_configs.clear()
322
+
284
323
  def config_val_list(self, section, key, default):
285
324
  try:
286
325
  return ast.literal_eval(self.config[section][key])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.88.2
3
+ Version: 0.88.4
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -203,7 +203,8 @@ All of them take *--config <my_config.ini>* as an argument.
203
203
  * **nkululeko.nkululeko**: do machine learning experiments combining features and learners
204
204
  * **nkululeko.ensemble**: [combine several nkululeko experiments](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/) and report on late fusion results
205
205
  * *configurations*: which experiments to combine
206
- * *--method* (optional): majority_voting, mean, max, sum
206
+ * *--method* (optional): mean, max, sum, max_class, uncertainty_threshold, uncertainty_weighted, confidence_weighted
207
+ * *--threshold*: uncertainty threshold (1.0 means no threshold)
207
208
  * *--outfile* (optional): name of CSV file for output
208
209
  * *--no_labels* (optional): indicate that no ground truth is given
209
210
  * **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
@@ -273,6 +274,8 @@ There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
273
274
  * [Tweak the target variable for database comparison](http://blog.syntheticspeech.de/2024/03/13/nkululeko-how-to-tweak-the-target-variable-for-database-comparison/)
274
275
  * [How to run multiple experiments in one go](http://blog.syntheticspeech.de/2022/03/28/how-to-run-multiple-experiments-in-one-go-with-nkululeko/)
275
276
  * [How to finetune a transformer-model](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
277
+ * [Ensemble (combine) classifiers with late-fusion](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
278
+
276
279
 
277
280
  ### <a name="helloworld">Hello World example</a>
278
281
  * NEW: [Here's a Google colab that runs this example out-of-the-box](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and here is the same [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
@@ -356,6 +359,15 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
356
359
  Changelog
357
360
  =========
358
361
 
362
+ Version 0.88.4
363
+ --------------
364
+ * added more ensemble methods, e.g. based on uncertainty
365
+
366
+ Version 0.88.3
367
+ --------------
368
+ * fixed bug in false uncertainty estimation
369
+ * changed demo live recording
370
+
359
371
  Version 0.88.2
360
372
  --------------
361
373
  * changed combine speaker results to show speakers not samples
@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=92td3PSYccIF_YkZhW6EMRo70neUjL_2Wj7JXyHzoq4,39
5
+ nkululeko/constants.py,sha256=iiVolfJ9RJn2fD9QaaoFnxuLzxJos6Q4H3tzHQGLfp4,39
6
6
  nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
- nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
9
- nkululeko/ensemble.py,sha256=huRbXUuabm6QYxGBHjkwEU95e-0qxtO0Z6UdXFgtaMY,4947
8
+ nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
9
+ nkululeko/ensemble.py,sha256=rUHg8YmD6L8Ktt2T5M6iwsWVWbpCnfiynhHdN22bLRQ,11873
10
10
  nkululeko/experiment.py,sha256=wXZnb_cfOqF8b0Zqzu2bbrEgCCpG_zPkDbD-Usw5sRs,31283
11
11
  nkululeko/explore.py,sha256=lDzRoW_Taa5u4BBABZLD89BcQWnYlrftJR4jgt1yyj0,2609
12
12
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
@@ -14,14 +14,14 @@ nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUy
14
14
  nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
15
15
  nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
16
16
  nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
17
- nkululeko/modelrunner.py,sha256=rpWQRXERiDZ-i_7CwsqynI87vawtsaPihsonDMPe9PU,11151
17
+ nkululeko/modelrunner.py,sha256=cKYD9a7MRoBxfqUy3X8kf6rGTYho-33In8I9YkzMOo8,11196
18
18
  nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
19
19
  nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
20
20
  nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
21
21
  nkululeko/plots.py,sha256=WsI_dtPKfrYPsKymHRmIhqj33aZzTcE8fF_EwLkm_5A,22899
22
22
  nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
23
23
  nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
24
- nkululeko/runmanager.py,sha256=eRMJidkoJhkU5NdIKoozv3vovU-8tqfn-7zqr2JZcnE,7533
24
+ nkululeko/runmanager.py,sha256=xvxL5a9d3jtGFqx0Z3nyyxowA368uNyP0ZitO8kxIIE,7581
25
25
  nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
26
26
  nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
27
27
  nkululeko/syllable_nuclei.py,sha256=Sky-C__MeUDaxqHnDl2TGLLYOYvsahD35TUjWGeG31k,10047
@@ -52,7 +52,7 @@ nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
52
52
  nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
53
53
  nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
54
54
  nkululeko/feat_extract/feats_analyser.py,sha256=Y9hMpZ9WsQOrxTP3B1diHnzMeOgwbVpVFWVlIyhHMJs,12722
55
- nkululeko/feat_extract/feats_ast.py,sha256=LolDaRTfNB9L8-CUqz9tOfkXntL8c9GJs4kqMmg5BSo,4724
55
+ nkululeko/feat_extract/feats_ast.py,sha256=ycJn5eSVOxcEpmeHVk0FPB8q5XiTC8VSKz61L9n0Wa4,4638
56
56
  nkululeko/feat_extract/feats_auddim.py,sha256=VlzKKXTXa5kjLgQBWyEFy-daIyU1SkOwCCOIhKsWCvE,3162
57
57
  nkululeko/feat_extract/feats_audmodel.py,sha256=VjBNgAoxsHJhwr6Kwt9CxX6SaCM4RK_OV-GU2W5-bhU,3187
58
58
  nkululeko/feat_extract/feats_clap.py,sha256=nR6eEIRdsMHcfmD1bNtt5WfDvkxKjvEbukSSrXHm-HU,3489
@@ -86,7 +86,7 @@ nkululeko/models/model_knn_reg.py,sha256=j7YFfVm6xOR2d9yBYdQiwwqYfqkX0JynX_qLCvk
86
86
  nkululeko/models/model_lin_reg.py,sha256=0D7mSnSwK82lNWDMwHYRyq3FmGa6y-DHDGg4qUe85q4,422
87
87
  nkululeko/models/model_mlp.py,sha256=VE0CI19qMyRbI-THDkMeJ7JbWf4z7CmZ4MMs1FIQgtM,10557
88
88
  nkululeko/models/model_mlp_regression.py,sha256=7oK2zQhhCegSqiBUe6eU7Av8MJ_DPLA9skixJcHaVfg,10232
89
- nkululeko/models/model_svm.py,sha256=rsME3KvKvNG7bdE5lbvYUu85WZhaASZxxmdNDIVJRZ4,940
89
+ nkululeko/models/model_svm.py,sha256=AzWksBRbIdpUuMbDnAh_YAXebewR5POj9AkB9VC40pI,1010
90
90
  nkululeko/models/model_svr.py,sha256=_YZeksqB3eBENGlg3g9RwYFlk9rQQ-XCeNBKLlGGVoE,725
91
91
  nkululeko/models/model_tree.py,sha256=rf16faUm4o2LJgkoYpeY998b8DQIvXZ73_m1IS3TnnE,417
92
92
  nkululeko/models/model_tree_reg.py,sha256=IgQcPTE-304HQLYSKPF8Z4ot_Ur9dH01fZjS0nXke_M,428
@@ -98,7 +98,7 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
98
98
  nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
99
99
  nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
100
100
  nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
101
- nkululeko/reporting/reporter.py,sha256=77u9t3v_ilqOEToISPPcRffCQuawhgGO3xKnVFGs_pg,19237
101
+ nkululeko/reporting/reporter.py,sha256=vV6SAHWSIvybFvXBGapHjPmWWhKxIsIWuVO-uY9RHzQ,19219
102
102
  nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
103
103
  nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
104
  nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
@@ -106,9 +106,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
106
106
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
108
108
  nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
109
- nkululeko/utils/util.py,sha256=BNd9JpoVakPbyysKBsJSCnqlbPlUKHUrcWYcwEnOdVA,15128
110
- nkululeko-0.88.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
- nkululeko-0.88.2.dist-info/METADATA,sha256=VL3DswyjLpnRvaQkV8jDGw7OszOv-pfQC_i9j57lyLs,39119
112
- nkululeko-0.88.2.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
113
- nkululeko-0.88.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
- nkululeko-0.88.2.dist-info/RECORD,,
109
+ nkululeko/utils/util.py,sha256=nK108-v6UubFj2kjJo38flS2yTTeUZyu3gNBGyhaR1c,16512
110
+ nkululeko-0.88.4.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
111
+ nkululeko-0.88.4.dist-info/METADATA,sha256=WHQrQU39sA1MuTnFTF6Fs47wWfVAtcQTQ4Tga_i5gB0,39583
112
+ nkululeko-0.88.4.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
113
+ nkululeko-0.88.4.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
114
+ nkululeko-0.88.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.1.1)
2
+ Generator: setuptools (70.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5