nkululeko 0.86.8__py3-none-any.whl → 0.87.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.86.8"
1
+ VERSION="0.87.0"
2
2
  SAMPLING_RATE = 16000
@@ -23,6 +23,9 @@ class Dataset_CSV(Dataset):
23
23
  root = os.path.dirname(data_file)
24
24
  audio_path = self.util.config_val_data(self.name, "audio_path", "./")
25
25
  df = pd.read_csv(data_file)
26
+ # trim all string values
27
+ df_obj = df.select_dtypes("object")
28
+ df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
26
29
  # special treatment for segmented dataframes with only one column:
27
30
  if "start" in df.columns and len(df.columns) == 4:
28
31
  index = audformat.segmented_index(
@@ -49,8 +52,7 @@ class Dataset_CSV(Dataset):
49
52
  .map(lambda x: root + "/" + audio_path + "/" + x)
50
53
  .values
51
54
  )
52
- df = df.set_index(df.index.set_levels(
53
- file_index, level="file"))
55
+ df = df.set_index(df.index.set_levels(file_index, level="file"))
54
56
  else:
55
57
  if not isinstance(df, pd.DataFrame):
56
58
  df = pd.DataFrame(df)
@@ -59,27 +61,24 @@ class Dataset_CSV(Dataset):
59
61
  lambda x: root + "/" + audio_path + "/" + x
60
62
  )
61
63
  )
62
- else: # absolute path is True
64
+ else: # absolute path is True
63
65
  if audformat.index_type(df.index) == "segmented":
64
66
  file_index = (
65
- df.index.levels[0]
66
- .map(lambda x: audio_path + "/" + x)
67
- .values
67
+ df.index.levels[0].map(lambda x: audio_path + "/" + x).values
68
68
  )
69
- df = df.set_index(df.index.set_levels(
70
- file_index, level="file"))
69
+ df = df.set_index(df.index.set_levels(file_index, level="file"))
71
70
  else:
72
71
  if not isinstance(df, pd.DataFrame):
73
72
  df = pd.DataFrame(df)
74
- df = df.set_index(df.index.to_series().apply(
75
- lambda x: audio_path + "/" + x ))
73
+ df = df.set_index(
74
+ df.index.to_series().apply(lambda x: audio_path + "/" + x)
75
+ )
76
76
 
77
77
  self.df = df
78
78
  self.db = None
79
79
  self.got_target = True
80
80
  self.is_labeled = self.got_target
81
- self.start_fresh = eval(
82
- self.util.config_val("DATA", "no_reuse", "False"))
81
+ self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
83
82
  is_index = False
84
83
  try:
85
84
  if self.is_labeled and not "class_label" in self.df.columns:
@@ -106,8 +105,7 @@ class Dataset_CSV(Dataset):
106
105
  f" {self.got_gender}, got age: {self.got_age}"
107
106
  )
108
107
  self.util.debug(r_string)
109
- glob_conf.report.add_item(ReportItem(
110
- "Data", "Loaded report", r_string))
108
+ glob_conf.report.add_item(ReportItem("Data", "Loaded report", r_string))
111
109
 
112
110
  def prepare(self):
113
111
  super().prepare()
nkululeko/demo.py CHANGED
@@ -30,10 +30,8 @@ from transformers import pipeline
30
30
 
31
31
 
32
32
  def main(src_dir):
33
- parser = argparse.ArgumentParser(
34
- description="Call the nkululeko DEMO framework.")
35
- parser.add_argument("--config", default="exp.ini",
36
- help="The base configuration")
33
+ parser = argparse.ArgumentParser(description="Call the nkululeko DEMO framework.")
34
+ parser.add_argument("--config", default="exp.ini", help="The base configuration")
37
35
  parser.add_argument(
38
36
  "--file", help="A file that should be processed (16kHz mono wav)"
39
37
  )
@@ -84,8 +82,7 @@ def main(src_dir):
84
82
  )
85
83
 
86
84
  def print_pipe(files, outfile):
87
- """
88
- Prints the pipeline output for a list of files, and optionally writes the results to an output file.
85
+ """Prints the pipeline output for a list of files, and optionally writes the results to an output file.
89
86
 
90
87
  Args:
91
88
  files (list): A list of file paths to process through the pipeline.
@@ -108,8 +105,7 @@ def main(src_dir):
108
105
  f.write("\n".join(results))
109
106
 
110
107
  if util.get_model_type() == "finetune":
111
- model_path = os.path.join(
112
- util.get_exp_dir(), "models", "run_0", "torch")
108
+ model_path = os.path.join(util.get_exp_dir(), "models", "run_0", "torch")
113
109
  pipe = pipeline("audio-classification", model=model_path)
114
110
  if args.file is not None:
115
111
  print_pipe([args.file], args.outfile)
nkululeko/modelrunner.py CHANGED
@@ -85,7 +85,7 @@ class Modelrunner:
85
85
  f"run: {self.run} epoch: {epoch}: result: {test_score_metric}"
86
86
  )
87
87
  # print(f"performance: {performance.split(' ')[1]}")
88
- performance = float(test_score_metric.split(' ')[1])
88
+ performance = float(test_score_metric.split(" ")[1])
89
89
  if performance > self.best_performance:
90
90
  self.best_performance = performance
91
91
  self.best_epoch = epoch
@@ -204,15 +204,15 @@ class Modelrunner:
204
204
  self.df_train, self.df_test, self.feats_train, self.feats_test
205
205
  )
206
206
  elif model_type == "cnn":
207
- from nkululeko.models.model_cnn import CNN_model
207
+ from nkululeko.models.model_cnn import CNNModel
208
208
 
209
- self.model = CNN_model(
209
+ self.model = CNNModel(
210
210
  self.df_train, self.df_test, self.feats_train, self.feats_test
211
211
  )
212
212
  elif model_type == "mlp":
213
- from nkululeko.models.model_mlp import MLP_model
213
+ from nkululeko.models.model_mlp import MLPModel
214
214
 
215
- self.model = MLP_model(
215
+ self.model = MLPModel(
216
216
  self.df_train, self.df_test, self.feats_train, self.feats_test
217
217
  )
218
218
  elif model_type == "mlp_reg":
nkululeko/models/model.py CHANGED
@@ -247,8 +247,25 @@ class Model:
247
247
  self.clf.fit(feats, labels)
248
248
 
249
249
  def get_predictions(self):
250
- predictions = self.clf.predict(self.feats_test.to_numpy())
251
- return predictions
250
+ # predictions = self.clf.predict(self.feats_test.to_numpy())
251
+ if self.util.exp_is_classification():
252
+ # make a dataframe for the class probabilities
253
+ proba_d = {}
254
+ for c in self.clf.classes_:
255
+ proba_d[c] = []
256
+ # get the class probabilities
257
+ predictions = self.clf.predict_proba(self.feats_test.to_numpy())
258
+ # pred = self.clf.predict(features)
259
+ for i, c in enumerate(self.clf.classes_):
260
+ proba_d[c] = list(predictions.T[i])
261
+ probas = pd.DataFrame(proba_d)
262
+ probas = probas.set_index(self.feats_test.index)
263
+ predictions = probas.idxmax(axis=1).values
264
+ else:
265
+ predictions = self.clf.predict(self.feats_test.to_numpy())
266
+ probas = None
267
+
268
+ return predictions, probas
252
269
 
253
270
  def predict(self):
254
271
  if self.feats_test.isna().to_numpy().any():
@@ -263,13 +280,16 @@ class Model:
263
280
  )
264
281
  return report
265
282
  """Predict the whole eval feature set"""
266
- predictions = self.get_predictions()
283
+ predictions, probas = self.get_predictions()
284
+
267
285
  report = Reporter(
268
286
  self.df_test[self.target].to_numpy().astype(float),
269
287
  predictions,
270
288
  self.run,
271
289
  self.epoch,
290
+ probas=probas,
272
291
  )
292
+ report.print_probabilities()
273
293
  return report
274
294
 
275
295
  def get_type(self):
@@ -5,33 +5,40 @@ Inspired by code from Su Lei
5
5
 
6
6
  """
7
7
 
8
+ import ast
9
+ from collections import OrderedDict
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ from PIL import Image
14
+ from sklearn.metrics import recall_score
8
15
  import torch
9
16
  import torch.nn as nn
10
17
  import torch.nn.functional as F
11
- import torchvision
12
- import torchvision.transforms as transforms
13
18
  from torch.utils.data import Dataset
14
- import ast
15
- import numpy as np
16
- from sklearn.metrics import recall_score
17
- from collections import OrderedDict
18
- from PIL import Image
19
- from traitlets import default
19
+ import torchvision.transforms as transforms
20
20
 
21
- from nkululeko.utils.util import Util
22
21
  import nkululeko.glob_conf as glob_conf
22
+ from nkululeko.losses.loss_softf1loss import SoftF1Loss
23
23
  from nkululeko.models.model import Model
24
24
  from nkululeko.reporting.reporter import Reporter
25
- from nkululeko.losses.loss_softf1loss import SoftF1Loss
25
+ from nkululeko.utils.util import Util
26
26
 
27
27
 
28
- class CNN_model(Model):
29
- """CNN = convolutional neural net"""
28
+ class CNNModel(Model):
29
+ """CNN = convolutional neural net."""
30
30
 
31
31
  is_classifier = True
32
32
 
33
33
  def __init__(self, df_train, df_test, feats_train, feats_test):
34
- """Constructor taking the configuration and all dataframes"""
34
+ """Constructor, taking all dataframes.
35
+
36
+ Args:
37
+ df_train (pd.DataFrame): The train labels.
38
+ df_test (pd.DataFrame): The test labels.
39
+ feats_train (pd.DataFrame): The train features.
40
+ feats_test (pd.DataFrame): The test features.
41
+ """
35
42
  super().__init__(df_train, df_test, feats_train, feats_test)
36
43
  super().set_model_type("ann")
37
44
  self.name = "cnn"
@@ -147,7 +154,20 @@ class CNN_model(Model):
147
154
  self.optimizer.step()
148
155
  self.loss = (np.asarray(losses)).mean()
149
156
 
150
- def evaluate_model(self, model, loader, device):
157
+ def get_probas(self, logits):
158
+ # make a dataframe for probabilites (logits)
159
+ proba_d = {}
160
+ classes = self.df_test[self.target].unique()
161
+ classes.sort()
162
+ for c in classes:
163
+ proba_d[c] = []
164
+ for i, c in enumerate(classes):
165
+ proba_d[c] = list(logits.numpy().T[i])
166
+ probas = pd.DataFrame(proba_d)
167
+ probas = probas.set_index(self.df_test.index)
168
+ return probas
169
+
170
+ def evaluate(self, model, loader, device):
151
171
  logits = torch.zeros(len(loader.dataset), self.class_num)
152
172
  targets = torch.zeros(len(loader.dataset))
153
173
  model.eval()
@@ -169,14 +189,15 @@ class CNN_model(Model):
169
189
  self.loss_eval = (np.asarray(losses)).mean()
170
190
  predictions = logits.argmax(dim=1)
171
191
  uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
172
- return uar, targets, predictions
192
+ return uar, targets, predictions, logits
173
193
 
174
194
  def predict(self):
175
- _, truths, predictions = self.evaluate_model(
195
+ _, truths, predictions, logits = self.evaluate(
176
196
  self.model, self.testloader, self.device
177
197
  )
178
- uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
179
- report = Reporter(truths, predictions, self.run, self.epoch)
198
+ uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
199
+ probas = self.get_probas(logits)
200
+ report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
180
201
  try:
181
202
  report.result.loss = self.loss
182
203
  except AttributeError: # if the model was loaded from disk the loss is unknown
@@ -189,13 +210,11 @@ class CNN_model(Model):
189
210
  return report
190
211
 
191
212
  def get_predictions(self):
192
- _, truths, predictions = self.evaluate_model(
193
- self.model, self.testloader, self.device
194
- )
213
+ _, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
195
214
  return predictions.numpy()
196
215
 
197
216
  def predict_sample(self, features):
198
- """Predict one sample"""
217
+ """Predict one sample."""
199
218
  with torch.no_grad():
200
219
  logits = self.model(torch.from_numpy(features).to(self.device))
201
220
  a = logits.numpy()
@@ -1,25 +1,33 @@
1
1
  # model_mlp.py
2
+ import ast
3
+ from collections import OrderedDict
4
+
5
+ import numpy as np
2
6
  import pandas as pd
7
+ from sklearn.metrics import recall_score
8
+ import torch
3
9
 
4
- from nkululeko.utils.util import Util
5
10
  import nkululeko.glob_conf as glob_conf
11
+ from nkululeko.losses.loss_softf1loss import SoftF1Loss
6
12
  from nkululeko.models.model import Model
7
13
  from nkululeko.reporting.reporter import Reporter
8
- import torch
9
- import ast
10
- import numpy as np
11
- from sklearn.metrics import recall_score
12
- from collections import OrderedDict
13
- from nkululeko.losses.loss_softf1loss import SoftF1Loss
14
+ from nkululeko.utils.util import Util
14
15
 
15
16
 
16
- class MLP_model(Model):
17
+ class MLPModel(Model):
17
18
  """MLP = multi layer perceptron."""
18
19
 
19
20
  is_classifier = True
20
21
 
21
22
  def __init__(self, df_train, df_test, feats_train, feats_test):
22
- """Constructor taking the configuration and all dataframes."""
23
+ """Constructor, taking all dataframes.
24
+
25
+ Args:
26
+ df_train (pd.DataFrame): The train labels.
27
+ df_test (pd.DataFrame): The test labels.
28
+ feats_train (pd.DataFrame): The train features.
29
+ feats_test (pd.DataFrame): The test features.
30
+ """
23
31
  super().__init__(df_train, df_test, feats_train, feats_test)
24
32
  super().set_model_type("ann")
25
33
  self.name = "mlp"
@@ -97,7 +105,7 @@ class MLP_model(Model):
97
105
  self.optimizer.step()
98
106
  self.loss = (np.asarray(losses)).mean()
99
107
 
100
- def evaluate_model(self, model, loader, device):
108
+ def evaluate(self, model, loader, device):
101
109
  logits = torch.zeros(len(loader.dataset), self.class_num)
102
110
  targets = torch.zeros(len(loader.dataset))
103
111
  model.eval()
@@ -119,14 +127,28 @@ class MLP_model(Model):
119
127
  self.loss_eval = (np.asarray(losses)).mean()
120
128
  predictions = logits.argmax(dim=1)
121
129
  uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
122
- return uar, targets, predictions
130
+ return uar, targets, predictions, logits
131
+
132
+ def get_probas(self, logits):
133
+ # make a dataframe for probabilites (logits)
134
+ proba_d = {}
135
+ classes = self.df_test[self.target].unique()
136
+ classes.sort()
137
+ for c in classes:
138
+ proba_d[c] = []
139
+ for i, c in enumerate(classes):
140
+ proba_d[c] = list(logits.numpy().T[i])
141
+ probas = pd.DataFrame(proba_d)
142
+ probas = probas.set_index(self.df_test.index)
143
+ return probas
123
144
 
124
145
  def predict(self):
125
- _, truths, predictions = self.evaluate_model(
146
+ _, truths, predictions, logits = self.evaluate(
126
147
  self.model, self.testloader, self.device
127
148
  )
128
- uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
129
- report = Reporter(truths, predictions, self.run, self.epoch)
149
+ uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
150
+ probas = self.get_probas(logits)
151
+ report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
130
152
  try:
131
153
  report.result.loss = self.loss
132
154
  except AttributeError: # if the model was loaded from disk the loss is unknown
@@ -139,9 +161,7 @@ class MLP_model(Model):
139
161
  return report
140
162
 
141
163
  def get_predictions(self):
142
- _, truths, predictions = self.evaluate_model(
143
- self.model, self.testloader, self.device
144
- )
164
+ _, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
145
165
  return predictions.numpy()
146
166
 
147
167
  def get_loader(self, df_x, df_y, shuffle):
@@ -97,7 +97,9 @@ class MLP_Reg_model(Model):
97
97
  self.model, self.testloader, self.device
98
98
  )
99
99
  result, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
100
- report = Reporter(truths.numpy(), predictions.numpy(), self.run, self.epoch)
100
+ report = Reporter(
101
+ truths.numpy(), predictions.numpy(), None, self.run, self.epoch
102
+ )
101
103
  try:
102
104
  report.result.loss = self.loss
103
105
  except AttributeError: # if the model was loaded from disk the loss is unknown
nkululeko/plots.py CHANGED
@@ -48,7 +48,7 @@ class Plots:
48
48
  )
49
49
  ax.set_ylabel(f"number of speakers")
50
50
  ax.set_xlabel("number of samples")
51
- self._save_plot(
51
+ self.save_plot(
52
52
  ax,
53
53
  "Samples per speaker",
54
54
  f"Samples per speaker ({df_speakers.shape[0]})",
@@ -70,9 +70,9 @@ class Plots:
70
70
  rot=0,
71
71
  )
72
72
  )
73
- ax.set_ylabel(f"number of speakers")
73
+ ax.set_ylabel("number of speakers")
74
74
  ax.set_xlabel("number of samples")
75
- self._save_plot(
75
+ self.save_plot(
76
76
  ax,
77
77
  "Sample value counts",
78
78
  f"Samples per speaker ({df_speakers.shape[0]})",
@@ -96,7 +96,7 @@ class Plots:
96
96
  binned_data = self.util.continuous_to_categorical(df[class_label])
97
97
  ax = binned_data.value_counts().plot(kind="bar")
98
98
  filename_binned = f"{class_label}_discreet"
99
- self._save_plot(
99
+ self.save_plot(
100
100
  ax,
101
101
  "Sample value counts",
102
102
  filename_binned,
@@ -106,7 +106,7 @@ class Plots:
106
106
  dist_type = self.util.config_val("EXPL", "dist_type", "hist")
107
107
  ax = df[class_label].plot(kind=dist_type)
108
108
 
109
- self._save_plot(
109
+ self.save_plot(
110
110
  ax,
111
111
  "Sample value counts",
112
112
  filename,
@@ -131,17 +131,17 @@ class Plots:
131
131
  df, class_label, att1, self.target, type_s
132
132
  )
133
133
  else:
134
- ax, caption = self._plotcatcont(
134
+ ax, caption = self.plotcatcont(
135
135
  df, class_label, att1, att1, type_s
136
136
  )
137
137
  else:
138
138
  if self.util.is_categorical(df[att1]):
139
- ax, caption = self._plotcatcont(
139
+ ax, caption = self.plotcatcont(
140
140
  df, att1, class_label, att1, type_s
141
141
  )
142
142
  else:
143
143
  ax, caption = self._plot2cont(df, class_label, att1, type_s)
144
- self._save_plot(
144
+ self.save_plot(
145
145
  ax,
146
146
  caption,
147
147
  f"Correlation of {self.target} and {att[0]}",
@@ -171,15 +171,11 @@ class Plots:
171
171
  ax, caption = self._plot2cat(df, att1, att2, att1, type_s)
172
172
  else:
173
173
  # class_label = cat, att1 = cat, att2 = cont
174
- ax, caption = self._plotcatcont(
175
- df, att1, att2, att1, type_s
176
- )
174
+ ax, caption = self.plotcatcont(df, att1, att2, att1, type_s)
177
175
  else:
178
176
  if self.util.is_categorical(df[att2]):
179
177
  # class_label = cat, att1 = cont, att2 = cat
180
- ax, caption = self._plotcatcont(
181
- df, att2, att1, att2, type_s
182
- )
178
+ ax, caption = self.plotcatcont(df, att2, att1, att2, type_s)
183
179
  else:
184
180
  # class_label = cat, att1 = cont, att2 = cont
185
181
  ax, caption = self._plot2cont_cat(
@@ -205,7 +201,7 @@ class Plots:
205
201
  # class_label = cont, att1 = cont, att2 = cont
206
202
  ax, caption = self._plot2cont(df, att1, att2, type_s)
207
203
 
208
- self._save_plot(
204
+ self.save_plot(
209
205
  ax, caption, f"Correlation of {att1} and {att2}", filename, type_s
210
206
  )
211
207
 
@@ -215,16 +211,16 @@ class Plots:
215
211
  f" {att} has more than 2 values. Perhaps you forgot to state a list of lists?"
216
212
  )
217
213
 
218
- def _save_plot(self, ax, caption, header, filename, type_s):
214
+ def save_plot(self, ax, caption, header, filename, type_s):
219
215
  # one up because of the runs
220
216
  fig_dir = self.util.get_path("fig_dir") + "../"
221
- fig = ax.figure
217
+ fig_plots = ax.figure
222
218
  # avoid warning
223
219
  # plt.tight_layout()
224
220
  img_path = f"{fig_dir}{filename}_{type_s}.{self.format}"
225
221
  plt.savefig(img_path)
226
- plt.close(fig)
227
- # fig.clear() # avoid error
222
+ plt.close(fig_plots)
223
+ self.util.debug(f"Saved plot to {img_path}")
228
224
  glob_conf.report.add_item(
229
225
  ReportItem(
230
226
  Header.HEADER_EXPLORE,
@@ -244,35 +240,29 @@ class Plots:
244
240
  return att, df
245
241
 
246
242
  def _plot2cont_cat(self, df, cont1, cont2, cat, ylab):
247
- """
248
- plot relation of two continuous distributions with one categorical
249
- """
243
+ """Plot relation of two continuous distributions with one categorical."""
250
244
  pearson = stats.pearsonr(df[cont1], df[cont2])
251
245
  # trunc to three digits
252
246
  pearson = int(pearson[0] * 1000) / 1000
253
247
  pearson_string = f"PCC: {pearson}"
254
248
  ax = sns.lmplot(data=df, x=cont1, y=cont2, hue=cat)
255
249
  caption = f"{ylab} {df.shape[0]}. {pearson_string}"
256
- ax.fig.suptitle(caption)
250
+ ax.figure.suptitle(caption)
257
251
  return ax, caption
258
252
 
259
253
  def _plot2cont(self, df, col1, col2, ylab):
260
- """
261
- plot relation of two continuous distributions
262
- """
254
+ """Plot relation of two continuous distributions."""
263
255
  pearson = stats.pearsonr(df[col1], df[col2])
264
256
  # trunc to three digits
265
257
  pearson = int(pearson[0] * 1000) / 1000
266
258
  pearson_string = f"PCC: {pearson}"
267
259
  ax = sns.lmplot(data=df, x=col1, y=col2)
268
260
  caption = f"{ylab} {df.shape[0]}. {pearson_string}"
269
- ax.fig.suptitle(caption)
261
+ ax.figure.suptitle(caption)
270
262
  return ax, caption
271
263
 
272
- def _plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
273
- """
274
- plot relation of categorical distribution with continuous
275
- """
264
+ def plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
265
+ """Plot relation of categorical distribution with continuous."""
276
266
  dist_type = self.util.config_val("EXPL", "dist_type", "hist")
277
267
  cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
278
268
  if dist_type == "hist":
@@ -287,13 +277,11 @@ class Plots:
287
277
  )
288
278
  ax.set(xlabel=f"{cont_col}")
289
279
  caption = f"{ylab} {df.shape[0]}. {cat_str} ({cats}):" f" {es}"
290
- ax.fig.suptitle(caption)
280
+ ax.figure.suptitle(caption)
291
281
  return ax, caption
292
282
 
293
283
  def _plot2cat(self, df, col1, col2, xlab, ylab):
294
- """
295
- plot relation of 2 categorical distributions
296
- """
284
+ """Plot relation of 2 categorical distributions."""
297
285
  crosstab = pd.crosstab(index=df[col1], columns=df[col2])
298
286
  res_pval = stats.chi2_contingency(crosstab)
299
287
  res_pval = int(res_pval[1] * 1000) / 1000
@@ -320,8 +308,8 @@ class Plots:
320
308
  max = self.util.to_3_digits(df.duration.max())
321
309
  title = f"Duration distr. for {sample_selection} {df.shape[0]}. min={min}, max={max}"
322
310
  ax.set_title(title)
323
- ax.set_xlabel(f"duration")
324
- ax.set_ylabel(f"number of samples")
311
+ ax.set_xlabel("duration")
312
+ ax.set_ylabel("number of samples")
325
313
  fig = ax.figure
326
314
  # plt.tight_layout()
327
315
  img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
@@ -2,16 +2,21 @@ import ast
2
2
  import glob
3
3
  import json
4
4
  import math
5
+ import os
5
6
 
6
7
  from confidence_intervals import evaluate_with_conf_int
7
8
  import matplotlib.pyplot as plt
8
9
  import numpy as np
10
+ from scipy.special import softmax
11
+ from scipy.stats import entropy
9
12
  from scipy.stats import pearsonr
10
- from sklearn.metrics import ConfusionMatrixDisplay, roc_curve
13
+ from sklearn.metrics import ConfusionMatrixDisplay
14
+ from sklearn.metrics import auc
11
15
  from sklearn.metrics import classification_report
12
16
  from sklearn.metrics import confusion_matrix
13
17
  from sklearn.metrics import r2_score
14
- from sklearn.metrics import roc_curve, auc, roc_auc_score
18
+ from sklearn.metrics import roc_auc_score
19
+ from sklearn.metrics import roc_curve
15
20
  from torch import is_tensor
16
21
 
17
22
  from audmetric import accuracy
@@ -21,6 +26,7 @@ from audmetric import mean_squared_error
21
26
  from audmetric import unweighted_average_recall
22
27
 
23
28
  import nkululeko.glob_conf as glob_conf
29
+ from nkululeko.plots import Plots
24
30
  from nkululeko.reporting.defines import Header
25
31
  from nkululeko.reporting.report_item import ReportItem
26
32
  from nkululeko.reporting.result import Result
@@ -46,9 +52,18 @@ class Reporter:
46
52
  self.MEASURE = "CCC"
47
53
  self.result.measure = self.MEASURE
48
54
 
49
- def __init__(self, truths, preds, run, epoch):
50
- """Initialization with ground truth und predictions vector."""
55
+ def __init__(self, truths, preds, run, epoch, probas=None):
56
+ """Initialization with ground truth und predictions vector.
57
+
58
+ Args:
59
+ truths (list): the ground truth
60
+ preds (list): the predictions
61
+ run (int): number of run
62
+ epoch (int): number of epoch
63
+ probas (pd.Dataframe, optional): probabilities per class. Defaults to None.
64
+ """
51
65
  self.util = Util("reporter")
66
+ self.probas = probas
52
67
  self.format = self.util.config_val("PLOT", "format", "png")
53
68
  self.truths = np.asarray(truths)
54
69
  self.preds = np.asarray(preds)
@@ -108,6 +123,47 @@ class Reporter:
108
123
  self.result.test = test_result
109
124
  self.result.set_upper_lower(upper, lower)
110
125
  # train and loss are being set by the model
126
+ # print out the class probilities
127
+
128
+ def print_probabilities(self):
129
+ """Print the probabilities per class to a file in the store."""
130
+ if (
131
+ self.util.exp_is_classification()
132
+ and self.probas is not None
133
+ and "uncertainty" not in self.probas
134
+ ):
135
+ probas = self.probas
136
+ probas["predicted"] = self.preds
137
+ probas["truth"] = self.truths
138
+ # softmax the probabilities or logits
139
+ uncertainty = probas.apply(softmax, axis=1)
140
+ try:
141
+ le = glob_conf.label_encoder
142
+ mapping = dict(zip(le.classes_, range(len(le.classes_))))
143
+ mapping_reverse = {value: key for key, value in mapping.items()}
144
+ probas = probas.rename(columns=mapping_reverse)
145
+ probas["predicted"] = probas["predicted"].map(mapping_reverse)
146
+ probas["truth"] = probas["truth"].map(mapping_reverse)
147
+ except AttributeError as ae:
148
+ self.util.debug(f"Can't label categories: {ae}")
149
+ # compute entropy per sample
150
+ uncertainty = uncertainty.apply(entropy)
151
+ # scale it to 0-1
152
+ max_ent = math.log(len(glob_conf.labels))
153
+ uncertainty = (uncertainty - uncertainty.min()) / (
154
+ max_ent - uncertainty.min()
155
+ )
156
+ probas["uncertainty"] = uncertainty
157
+ probas["correct"] = probas.predicted == probas.truth
158
+ sp = os.path.join(self.util.get_path("store"), "pred_df.csv")
159
+ self.probas = probas
160
+ probas.to_csv(sp)
161
+ self.util.debug(f"Saved probabilities to {sp}")
162
+ plots = Plots()
163
+ ax, caption = plots.plotcatcont(
164
+ probas, "correct", "uncertainty", "uncertainty", "correct"
165
+ )
166
+ plots.save_plot(ax, caption, "Uncertainty", "uncertainty", "samples")
111
167
 
112
168
  def set_id(self, run, epoch):
113
169
  """Make the report identifiable with run and epoch index."""
@@ -123,6 +179,12 @@ class Reporter:
123
179
  self.preds = np.digitize(self.preds, bins) - 1
124
180
 
125
181
  def plot_confmatrix(self, plot_name, epoch=None):
182
+ """Plot a confusionmatrix to the store.
183
+
184
+ Args:
185
+ plot_name (str): name for the image file.
186
+ epoch (int, optional): Number of epoch. Defaults to None.
187
+ """
126
188
  if not self.util.exp_is_classification():
127
189
  self.continuous_to_categorical()
128
190
  self._plot_confmat(self.truths, self.preds, plot_name, epoch)
@@ -212,10 +274,11 @@ class Reporter:
212
274
  )
213
275
  img_path = f"{fig_dir}{plot_name}{self.filenameadd}.{self.format}"
214
276
  plt.savefig(img_path)
277
+ self.util.debug(f"Saved confusion plot to {img_path}")
215
278
  fig.clear()
216
279
  plt.close(fig)
217
- plt.savefig(img_path)
218
- plt.close(fig)
280
+ plt.close()
281
+ plt.clf()
219
282
  glob_conf.report.add_item(
220
283
  ReportItem(
221
284
  Header.HEADER_RESULTS,
nkululeko/runmanager.py CHANGED
@@ -11,7 +11,7 @@ from nkululeko.utils.util import Util
11
11
 
12
12
 
13
13
  class Runmanager:
14
- """Class to manage the runs of the experiment (e.g. when results differ caused by random initialization)"""
14
+ """Class to manage the runs of the experiment (e.g. when results differ caused by random initialization)."""
15
15
 
16
16
  model = None # The underlying model
17
17
  df_train, df_test, feats_train, feats_test = (
@@ -23,15 +23,14 @@ class Runmanager:
23
23
  reports = []
24
24
 
25
25
  def __init__(self, df_train, df_test, feats_train, feats_test):
26
- """Constructor setting up the dataframes
26
+ """Constructor setting up the dataframes.
27
+
27
28
  Args:
28
29
  df_train: train dataframe
29
30
  df_test: test dataframe
30
31
  feats_train: train features
31
32
  feats_train: test features
32
33
 
33
- Returns:
34
-
35
34
  """
36
35
  self.df_train, self.df_test, self.feats_train, self.feats_test = (
37
36
  df_train,
@@ -46,7 +45,7 @@ class Runmanager:
46
45
  # self._select_model(model_type)
47
46
 
48
47
  def do_runs(self):
49
- """Start the runs"""
48
+ """Start the runs."""
50
49
  self.best_results = [] # keep the best result per run
51
50
  self.last_epochs = [] # keep the epoch of best result per run
52
51
  # for all runs
@@ -105,15 +104,13 @@ class Runmanager:
105
104
  )
106
105
  self.print_model(best_report, plot_name)
107
106
  # finally, print out the numbers for this run
108
- # self.reports[-1].print_results(
109
- # int(self.util.config_val("EXP", "epochs", 1))
110
- # )
111
107
  best_report.print_results(best_report.epoch)
108
+ best_report.print_probabilities()
112
109
  self.best_results.append(best_report)
113
110
  self.last_epochs.append(last_epoch)
114
111
 
115
112
  def print_best_result_runs(self):
116
- """Print the best result for all runs"""
113
+ """Print the best result for all runs."""
117
114
  best_report = self.get_best_result(self.best_results)
118
115
  self.util.debug(
119
116
  f"best result all runs with run {best_report.run} and"
@@ -177,7 +174,7 @@ class Runmanager:
177
174
  return self.load_model(best_report)
178
175
 
179
176
  def get_best_result(self, reports):
180
- best_r = Reporter([], [], 0, 0)
177
+ best_r = Reporter([], [], None, 0, 0)
181
178
  if self.util.high_is_good():
182
179
  best_r = self.search_best_result(reports, "ascending")
183
180
  else:
@@ -185,7 +182,7 @@ class Runmanager:
185
182
  return best_r
186
183
 
187
184
  def search_best_result(self, reports, order):
188
- best_r = Reporter([], [], 0, 0)
185
+ best_r = Reporter([], [], None, 0, 0)
189
186
  if order == "ascending":
190
187
  best_result = 0
191
188
  for r in reports:
@@ -56,18 +56,13 @@ class TestPredictor:
56
56
  else:
57
57
  test_dbs = ast.literal_eval(glob_conf.config["DATA"]["tests"])
58
58
  test_dbs_string = "_".join(test_dbs)
59
- predictions = self.model.get_predictions()
59
+ predictions, _ = self.model.get_predictions()
60
60
  report = self.model.predict()
61
61
  result = report.result.get_result()
62
62
  report.set_filename_add(f"test-{test_dbs_string}")
63
63
  self.util.print_best_results([report])
64
64
  report.plot_confmatrix(self.util.get_plot_name(), 0)
65
65
  report.print_results(0)
66
- # print(predictions)
67
- # df = pd.DataFrame(index=self.orig_df.index)
68
- # df["speaker"] = self.orig_df["speaker"]
69
- # df["gender"] = self.orig_df["gender"]
70
- # df[self.target] = self.orig_df[self.target]
71
66
  df = self.orig_df.copy()
72
67
  df["predictions"] = self.label_encoder.inverse_transform(predictions)
73
68
  target = self.util.config_val("DATA", "target", "emotion")
nkululeko/utils/stats.py CHANGED
@@ -70,12 +70,16 @@ def get_effect_size(df, target, variable):
70
70
  cats[c] = df[df[target] == c][variable].values
71
71
  combos = all_combinations(categories)
72
72
  results = {}
73
- for combo in combos:
74
- one = combo[0]
75
- other = combo[1]
76
- results[f"{one}-{other}"] = cohen_d(cats[one], cats[other])
77
- max_cat = max(results, key=results.get)
78
- cat_s = cohens_D_to_string(float(results[max_cat]))
73
+ if len(categories) == 1:
74
+ cat_s = cohens_D_to_string(0)
75
+ return categories[0], cat_s, 0
76
+ else:
77
+ for combo in combos:
78
+ one = combo[0]
79
+ other = combo[1]
80
+ results[f"{one}-{other}"] = cohen_d(cats[one], cats[other])
81
+ max_cat = max(results, key=results.get)
82
+ cat_s = cohens_D_to_string(float(results[max_cat]))
79
83
  return max_cat, cat_s, results[max_cat]
80
84
 
81
85
 
@@ -92,7 +96,7 @@ def cohens_D_to_string(val):
92
96
 
93
97
 
94
98
  def normalize(values):
95
- """Do a z-transformation of a distribution.
99
+ """Do a z-transformation of a distribution.
96
100
 
97
101
  So that mean = 0 and variance = 1
98
102
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.86.8
3
+ Version: 0.87.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -51,6 +51,7 @@ Requires-Dist: pylatex
51
51
  - [t-SNE plots](#t-sne-plots)
52
52
  - [Data distribution](#data-distribution)
53
53
  - [Bias checking](#bias-checking)
54
+ - [Uncertainty](#uncertainty)
54
55
  - [Documentation](#documentation)
55
56
  - [Installation](#installation)
56
57
  - [Usage](#usage)
@@ -113,6 +114,13 @@ In cases you might wonder if there's bias in your data. You can try to detect th
113
114
 
114
115
  <img src="meta/images/emotion-pesq.png" width="500px"/>
115
116
 
117
+ ### Uncertainty
118
+ Nkululeko estimates uncertainty of model decision (only for classifiers) with entropy over the class-probabilities or logits per sample.
119
+
120
+ <img src="meta/images/uncertainty.png" width="500px"/>
121
+
122
+
123
+
116
124
  ## Documentation
117
125
  The documentation, along with extensions of installation, usage, INI file format, and examples, can be found [nkululeko.readthedocs.io](https://nkululeko.readthedocs.io).
118
126
 
@@ -343,6 +351,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
343
351
  Changelog
344
352
  =========
345
353
 
354
+ Version 0.87.0
355
+ --------------
356
+ * added class probability output and uncertainty analysis
357
+
346
358
  Version 0.86.8
347
359
  --------------
348
360
  * handle single feature sets as strings in the config
@@ -2,8 +2,8 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
3
3
  nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=FOK-XF_DHGNFHsO_OMLof3jwgrn2buWnPVfrHy5QBm8,39
6
- nkululeko/demo.py,sha256=WSKr-W5uJ9DQfemK923g7Hd5V3kgAn03Er0JX1Pa45I,5142
5
+ nkululeko/constants.py,sha256=qVowcvAZL-g-Bsp_4yBCOQDkCoW-S-1wrRG5XgnjnX0,39
6
+ nkululeko/demo.py,sha256=Sqbu3o6Pzdr_UlYxWM8Mn3l5uCXsw429yJbtkVDUYHU,5087
7
7
  nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
8
8
  nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
9
9
  nkululeko/experiment.py,sha256=s9PIjm45dR9yzmHu_69JpBjX9qMVzi5wIgPfMR3F44A,31530
@@ -13,19 +13,19 @@ nkululeko/feature_extractor.py,sha256=rL-TybLmjZz5uxT9LNTORaDat9FKp_1qloxbyMriny
13
13
  nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
14
14
  nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
15
15
  nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
16
- nkululeko/modelrunner.py,sha256=OU35qwP94GxW_EtL4I2-RhqB-wxbjNvp8CIHNbtnt7Q,11155
16
+ nkululeko/modelrunner.py,sha256=rpWQRXERiDZ-i_7CwsqynI87vawtsaPihsonDMPe9PU,11151
17
17
  nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
18
18
  nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
19
19
  nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
20
- nkululeko/plots.py,sha256=C2mwQFK0Vxfl5ZM7CO87tULDoEf7G16ek0nU77bhOc4,23070
20
+ nkululeko/plots.py,sha256=WsI_dtPKfrYPsKymHRmIhqj33aZzTcE8fF_EwLkm_5A,22899
21
21
  nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
22
22
  nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
23
- nkululeko/runmanager.py,sha256=Na8oPn59lRFiNMsYChRHBRgw40mBcw0Rwl2Kz1RUsA0,7614
23
+ nkululeko/runmanager.py,sha256=eRMJidkoJhkU5NdIKoozv3vovU-8tqfn-7zqr2JZcnE,7533
24
24
  nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
25
25
  nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
26
26
  nkululeko/syllable_nuclei.py,sha256=Sky-C__MeUDaxqHnDl2TGLLYOYvsahD35TUjWGeG31k,10047
27
27
  nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
28
- nkululeko/test_predictor.py,sha256=_w5J8CxH6hmW3mLTKbdfmywl5QpdNAnW1Y8TE5GtlfE,3237
28
+ nkululeko/test_predictor.py,sha256=KaGef_r4mXW89f0aUiYDw8IiBe2ciGt14HNkR-S14lU,2985
29
29
  nkululeko/test_pretrain.py,sha256=ZWl-bR6nmeSmXkGAIE6zyfQEjN8Zg0rIxfaS-O6Zbas,8465
30
30
  nkululeko/augmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  nkululeko/augmenting/augmenter.py,sha256=XAt0dpmlnKxqyysqCgV3rcz-pRIvOz7rU7dmGDCVAzs,2905
@@ -46,7 +46,7 @@ nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzW
46
46
  nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
47
47
  nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
48
  nkululeko/data/dataset.py,sha256=hUD0NqWCfRaSHG8JNs1MsPb0zjUZAf8FJkg_c0ebq0Q,28046
49
- nkululeko/data/dataset_csv.py,sha256=dzOrbKB8t0UATAIYaKAOqHTogmYPBqskt6Hak7VjbSM,4537
49
+ nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo,4588
50
50
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
52
52
  nkululeko/feat_extract/feats_agender_agender.py,sha256=tgH2BnwcxpvuLmOkrMbVdBSX0Onfz2MG12FsddalRKI,3424
@@ -75,15 +75,15 @@ nkululeko/losses/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
75
75
  nkululeko/losses/loss_ccc.py,sha256=NOK0y0fxKUnU161B5geap6Fmn8QzoPl2MqtPiV8IuJE,976
76
76
  nkululeko/losses/loss_softf1loss.py,sha256=5gW-PuiqeAZcRgfwjueIOQtMokOjZWgQnVIv59HKTCo,1309
77
77
  nkululeko/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
- nkululeko/models/model.py,sha256=PUCqF2r_dEfmFsZn6Cgr1UIzYvxziLH6nSqZ5-vuN1o,11639
78
+ nkululeko/models/model.py,sha256=JXrd0fbU0JhTxUDrs0kOEHF9rtPJBxBeO6zcrHAzk8k,12475
79
79
  nkululeko/models/model_bayes.py,sha256=WJFZ8wFKwWATz6MhmjeZIi1Pal1viU549WL_PjXDSy8,406
80
- nkululeko/models/model_cnn.py,sha256=bJxqwe6FnVR2hFeqN6EXexYGgvKYFED1VOhBXVlLWaE,9954
80
+ nkululeko/models/model_cnn.py,sha256=NreR2LrKMyBYHyIJEL6wm3UQ4mA5HleZfpUyA5wNYpA,10629
81
81
  nkululeko/models/model_gmm.py,sha256=hZ9UO36KNf48qa3J-xkWIicIj9-TApmt21zNES2vEOs,649
82
82
  nkululeko/models/model_knn.py,sha256=KlnrJfwiVnmXZrAaYGFrKA2f5sznvTzSJQ8-5etOP0k,599
83
83
  nkululeko/models/model_knn_reg.py,sha256=j7YFfVm6xOR2d9yBYdQiwwqYfqkX0JynX_qLCvkr1fk,610
84
84
  nkululeko/models/model_lin_reg.py,sha256=0D7mSnSwK82lNWDMwHYRyq3FmGa6y-DHDGg4qUe85q4,422
85
- nkululeko/models/model_mlp.py,sha256=xMirtYax3bLBz_0kkC0M4Rc6-KQY05NNKHQGw7rbum8,9856
86
- nkululeko/models/model_mlp_regression.py,sha256=PO5qyfjgAJH8hawhmeXDaUThyXDYdM642dQHkO0NY7c,10204
85
+ nkululeko/models/model_mlp.py,sha256=VE0CI19qMyRbI-THDkMeJ7JbWf4z7CmZ4MMs1FIQgtM,10557
86
+ nkululeko/models/model_mlp_regression.py,sha256=7oK2zQhhCegSqiBUe6eU7Av8MJ_DPLA9skixJcHaVfg,10232
87
87
  nkululeko/models/model_svm.py,sha256=rsME3KvKvNG7bdE5lbvYUu85WZhaASZxxmdNDIVJRZ4,940
88
88
  nkululeko/models/model_svr.py,sha256=_YZeksqB3eBENGlg3g9RwYFlk9rQQ-XCeNBKLlGGVoE,725
89
89
  nkululeko/models/model_tree.py,sha256=rf16faUm4o2LJgkoYpeY998b8DQIvXZ73_m1IS3TnnE,417
@@ -96,17 +96,17 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
96
96
  nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
97
97
  nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
98
98
  nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
99
- nkululeko/reporting/reporter.py,sha256=S9A62AxdMTEV-9XDUQNxdoevGLXBP52WiDmZ694QMV4,14161
99
+ nkululeko/reporting/reporter.py,sha256=6zW3PmQrwVJO5orBVA-fiaIhnzGrFymC861DSd8nSjc,16806
100
100
  nkululeko/reporting/result.py,sha256=nSN5or-Py2GPRWHkWpGRh7UCi1W0er7WLEHz8fYLk-A,742
101
101
  nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
102
  nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
103
103
  nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5EvWlcWQ,3301
104
104
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
105
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
106
- nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
106
+ nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
107
107
  nkululeko/utils/util.py,sha256=ZCS02mE2c3_h9_q4hpsSm4XAooCranqRF_5pY-6055E,14432
108
- nkululeko-0.86.8.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
109
- nkululeko-0.86.8.dist-info/METADATA,sha256=5TQSWqzrN9E7XJGcVn5oPKGl6qy-RliYGEG2Ycl46qk,38109
110
- nkululeko-0.86.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
111
- nkululeko-0.86.8.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
112
- nkululeko-0.86.8.dist-info/RECORD,,
108
+ nkululeko-0.87.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
109
+ nkululeko-0.87.0.dist-info/METADATA,sha256=DPO61pORcuEhRsDwB5S5VJ8CK_piJeh-I5kKJc8eNJE,38442
110
+ nkululeko-0.87.0.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
111
+ nkululeko-0.87.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
112
+ nkululeko-0.87.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (70.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5