nkululeko 0.81.4__py3-none-any.whl → 0.81.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/multidb.py CHANGED
@@ -3,23 +3,27 @@
3
3
 
4
4
  import argparse
5
5
  import ast
6
- import seaborn as sn
7
- import pandas as pd
8
- import matplotlib.pyplot as plt
6
+ import configparser
7
+ import os
8
+
9
9
  import matplotlib.cm as cm
10
+ import matplotlib.pyplot as plt
10
11
  import numpy as np
11
- import os
12
+ import pandas as pd
13
+ import seaborn as sn
14
+
15
+ import nkululeko.glob_conf as glob_conf
16
+ from nkululeko.aug_train import doit as aug_train
12
17
  from nkululeko.experiment import Experiment
13
- import configparser
14
- from nkululeko.utils.util import Util
15
18
  from nkululeko.nkululeko import doit as nkulu
16
- from nkululeko.aug_train import doit as aug_train
17
- import nkululeko.glob_conf as glob_conf
19
+ from nkululeko.utils.util import Util
18
20
 
19
21
 
20
22
  def main(src_dir):
21
- parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
22
- parser.add_argument("--config", default="exp.ini", help="The base configuration")
23
+ parser = argparse.ArgumentParser(
24
+ description="Call the nkululeko MULTIDB framework.")
25
+ parser.add_argument("--config", default="exp.ini",
26
+ help="The base configuration")
23
27
  args = parser.parse_args()
24
28
  if args.config is not None:
25
29
  config_file = args.config
@@ -54,7 +58,8 @@ def main(src_dir):
54
58
  dataset = datasets[i]
55
59
  print(f"running {dataset}")
56
60
  if extra_trains:
57
- extra_trains_1 = extra_trains.removeprefix("[").removesuffix("]")
61
+ extra_trains_1 = extra_trains.removeprefix(
62
+ "[").removesuffix("]")
58
63
  config["DATA"]["databases"] = f"['{dataset}', {extra_trains_1}]"
59
64
  extra_trains_2 = ast.literal_eval(extra_trains)
60
65
  for extra_train in extra_trains_2:
@@ -67,7 +72,8 @@ def main(src_dir):
67
72
  test = datasets[j]
68
73
  print(f"running train: {train}, test: {test}")
69
74
  if extra_trains:
70
- extra_trains_1 = extra_trains.removeprefix("[").removesuffix("]")
75
+ extra_trains_1 = extra_trains.removeprefix(
76
+ "[").removesuffix("]")
71
77
  config["DATA"][
72
78
  "databases"
73
79
  ] = f"['{train}', '{test}', {extra_trains_1}]"
nkululeko/predict.py CHANGED
@@ -1,17 +1,34 @@
1
1
  # predict.py
2
- # use some model and add automatically predicted labels to train and test splits, than save as a new dataset
2
+ # use some model and add automatically predicted labels to train and test splits
3
+ # then save as a new dataset
4
+
5
+ """This script is used to call the nkululeko PREDICT framework.
6
+
7
+ It loads a configuration file, creates a new experiment,
8
+ and performs automatic prediction on the train and test datasets. The predicted labels are added to the datasets and
9
+ saved as a new dataset.
10
+
11
+ Usage: \n
12
+ python3 -m nkululeko.predict [--config CONFIG_FILE] \n
13
+
14
+ Arguments: \n
15
+ --config (str): The path to the base configuration file (default: exp.ini)
16
+ """
3
17
 
4
- from nkululeko.experiment import Experiment
5
- import configparser
6
- from nkululeko.utils.util import Util
7
- from nkululeko.constants import VERSION
8
18
  import argparse
19
+ import configparser
9
20
  import os
10
21
 
22
+ from nkululeko.constants import VERSION
23
+ from nkululeko.experiment import Experiment
24
+ from nkululeko.utils.util import Util
25
+
11
26
 
12
27
  def main(src_dir):
13
- parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
14
- parser.add_argument("--config", default="exp.ini", help="The base configuration")
28
+ parser = argparse.ArgumentParser(
29
+ description="Call the nkululeko PREDICT framework.")
30
+ parser.add_argument("--config", default="exp.ini",
31
+ help="The base configuration")
15
32
  args = parser.parse_args()
16
33
  if args.config is not None:
17
34
  config_file = args.config
@@ -41,7 +58,8 @@ def main(src_dir):
41
58
 
42
59
  # split into train and test
43
60
  expr.fill_train_and_tests()
44
- util.debug(f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
61
+ util.debug(
62
+ f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
45
63
 
46
64
  # process the data
47
65
  df = expr.autopredict()
nkululeko/reporter.py ADDED
@@ -0,0 +1,332 @@
1
+ """Reporter module.
2
+
3
+ This module contains the Reporter class which is responsible for generating reports.
4
+ """
5
+
6
+
7
+ import ast
8
+ import glob
9
+ import json
10
+ import math
11
+
12
+ import matplotlib.pyplot as plt
13
+ import numpy as np
14
+ from scipy.stats import pearsonr
15
+ from sklearn.metrics import (
16
+ ConfusionMatrixDisplay,
17
+ accuracy_score,
18
+ classification_report,
19
+ confusion_matrix,
20
+ mean_absolute_error,
21
+ mean_squared_error,
22
+ r2_score,
23
+ recall_score,
24
+ )
25
+ from sklearn.utils import resample
26
+
27
+ import nkululeko.glob_conf as glob_conf
28
+ from nkululeko.reporting.defines import Header
29
+ from nkululeko.reporting.report_item import ReportItem
30
+ from nkululeko.result import Result
31
+ from nkululeko.utils.util import Util
32
+
33
+
34
+ class Reporter:
35
+ def __set_measure(self):
36
+ if self.util.exp_is_classification():
37
+ self.MEASURE = "UAR"
38
+ self.result.measure = self.MEASURE
39
+ self.is_classification = True
40
+ else:
41
+ self.is_classification = False
42
+ self.measure = self.util.config_val("MODEL", "measure", "mse")
43
+ if self.measure == "mse":
44
+ self.MEASURE = "MSE"
45
+ self.result.measure = self.MEASURE
46
+ elif self.measure == "mae":
47
+ self.MEASURE = "MAE"
48
+ self.result.measure = self.MEASURE
49
+ elif self.measure == "ccc":
50
+ self.MEASURE = "CCC"
51
+ self.result.measure = self.MEASURE
52
+
53
+ def __init__(self, truths, preds, run, epoch):
54
+ """Initialization with ground truth und predictions vector"""
55
+ self.util = Util("reporter")
56
+ self.format = self.util.config_val("PLOT", "format", "png")
57
+ self.truths = truths
58
+ self.preds = preds
59
+ self.result = Result(0, 0, 0, 0, "unknown")
60
+ self.run = run
61
+ self.epoch = epoch
62
+ self.__set_measure()
63
+ self.cont_to_cat = False
64
+ if len(self.truths) > 0 and len(self.preds) > 0:
65
+ if self.util.exp_is_classification():
66
+ self.result.test = recall_score(
67
+ self.truths, self.preds, average="macro"
68
+ )
69
+ self.result.loss = 1 - accuracy_score(self.truths, self.preds)
70
+ else:
71
+ # regression experiment
72
+ if self.measure == "mse":
73
+ self.result.test = mean_squared_error(
74
+ self.truths, self.preds)
75
+ elif self.measure == "mae":
76
+ self.result.test = mean_absolute_error(
77
+ self.truths, self.preds)
78
+ elif self.measure == "ccc":
79
+ self.result.test = self.ccc(self.truths, self.preds)
80
+ if math.isnan(self.result.test):
81
+ self.util.debug(f"Truth: {self.truths}")
82
+ self.util.debug(f"Predict.: {self.preds}")
83
+ self.util.debug(f"Result is NAN: setting to -1")
84
+ self.result.test = -1
85
+ else:
86
+ self.util.error(f"unknown measure: {self.measure}")
87
+
88
+ # train and loss are being set by the model
89
+
90
+ def set_id(self, run, epoch):
91
+ """Make the report identifiable with run and epoch index"""
92
+ self.run = run
93
+ self.epoch = epoch
94
+
95
+ def continuous_to_categorical(self):
96
+ if self.cont_to_cat:
97
+ return
98
+ self.cont_to_cat = True
99
+ bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
100
+ self.truths = np.digitize(self.truths, bins) - 1
101
+ self.preds = np.digitize(self.preds, bins) - 1
102
+
103
+ def plot_confmatrix(self, plot_name, epoch):
104
+ if not self.util.exp_is_classification():
105
+ self.continuous_to_categorical()
106
+ self._plot_confmat(self.truths, self.preds, plot_name, epoch)
107
+
108
+
109
+ def plot_per_speaker(self, result_df, plot_name, function):
110
+ """Plot a confusion matrix with the mode category per speakers.
111
+
112
+ This function creates a confusion matrix for each speaker in the result_df.
113
+ The result_df should contain the columns: preds, truths and speaker.
114
+
115
+ Args:
116
+ * result_df: a pandas dataframe with columns: preds, truths and speaker
117
+ * plot_name: a string with the name of the plot
118
+ * function: a string with the function to use for each speaker,
119
+ can be 'mode' or 'mean'
120
+
121
+ Returns:
122
+ * None
123
+ """
124
+ # Initialize empty arrays for predictions and truths
125
+ pred = np.zeros(0)
126
+ truth = np.zeros(0)
127
+
128
+ # Iterate over each speaker
129
+ for s in result_df.speaker.unique():
130
+ # Filter the dataframe for the current speaker
131
+ s_df = result_df[result_df.speaker == s]
132
+
133
+ # Get the mode or mean prediction for the current speaker
134
+ mode = s_df.pred.mode().iloc[-1]
135
+ mean = s_df.pred.mean()
136
+ if function == "mode":
137
+ s_df.pred = mode
138
+ elif function == "mean":
139
+ s_df.pred = mean
140
+ else:
141
+ self.util.error(f"unknown function {function}")
142
+
143
+ # Append the current speaker's predictions and truths to the arrays
144
+ pred = np.append(pred, s_df.pred.values)
145
+ truth = np.append(truth, s_df["truth"].values)
146
+
147
+ # If the experiment is not a classification or continuous to categorical conversion was performed,
148
+ # convert the truths and predictions to categorical
149
+ if not (self.is_classification or self.cont_to_cat):
150
+ bins = ast.literal_eval(glob_conf.config["DATA"]["bins"])
151
+ truth = np.digitize(truth, bins) - 1
152
+ pred = np.digitize(pred, bins) - 1
153
+
154
+ # Plot the confusion matrix for the speakers
155
+ self._plot_confmat(truth, pred.astype("int"), plot_name, 0)
156
+
157
+ def _plot_confmat(self, truths, preds, plot_name, epoch):
158
+ # print(truths)
159
+ # print(preds)
160
+ fig_dir = self.util.get_path("fig_dir")
161
+ labels = glob_conf.labels
162
+ fig = plt.figure() # figsize=[5, 5]
163
+ uar = recall_score(truths, preds, average="macro")
164
+ acc = accuracy_score(truths, preds)
165
+ cm = confusion_matrix(
166
+ truths, preds, normalize=None
167
+ ) # normalize must be one of {'true', 'pred', 'all', None}
168
+ if cm.shape[0] != len(labels):
169
+ self.util.error(
170
+ f"mismatch between confmatrix dim ({cm.shape[0]}) and labels"
171
+ f" length ({len(labels)}: {labels})"
172
+ )
173
+ try:
174
+ disp = ConfusionMatrixDisplay(
175
+ confusion_matrix=cm, display_labels=labels
176
+ ).plot(cmap="Blues")
177
+ except ValueError:
178
+ disp = ConfusionMatrixDisplay(
179
+ confusion_matrix=cm,
180
+ display_labels=list(labels).remove("neutral"),
181
+ ).plot(cmap="Blues")
182
+
183
+ reg_res = ""
184
+ if not self.is_classification:
185
+ reg_res = f", {self.MEASURE}: {self.result.test:.3f}"
186
+
187
+ if epoch != 0:
188
+ plt.title(
189
+ f"Confusion Matrix, UAR: {uar:.3f}{reg_res}, Epoch: {epoch}")
190
+ else:
191
+ plt.title(f"Confusion Matrix, UAR: {uar:.3f}{reg_res}")
192
+ img_path = f"{fig_dir}{plot_name}.{self.format}"
193
+ plt.savefig(img_path)
194
+ fig.clear()
195
+ plt.close(fig)
196
+ plt.savefig(img_path)
197
+ plt.close(fig)
198
+ glob_conf.report.add_item(
199
+ ReportItem(
200
+ Header.HEADER_RESULTS,
201
+ self.util.get_model_description(),
202
+ "Confusion matrix",
203
+ img_path,
204
+ )
205
+ )
206
+
207
+ res_dir = self.util.get_path("res_dir")
208
+ uar = int(uar * 1000) / 1000.0
209
+ acc = int(acc * 1000) / 1000.0
210
+ rpt = f"epoch: {epoch}, UAR: {uar}, ACC: {acc}"
211
+ # print(rpt)
212
+ self.util.debug(rpt)
213
+ file_name = f"{res_dir}{self.util.get_exp_name()}_conf.txt"
214
+ with open(file_name, "w") as text_file:
215
+ text_file.write(rpt)
216
+
217
+ def print_results(self, epoch):
218
+ """Print all evaluation values to text file"""
219
+ res_dir = self.util.get_path("res_dir")
220
+ file_name = f"{res_dir}{self.util.get_exp_name()}_{epoch}.txt"
221
+ if self.util.exp_is_classification():
222
+ labels = glob_conf.labels
223
+ try:
224
+ rpt = classification_report(
225
+ self.truths,
226
+ self.preds,
227
+ target_names=labels,
228
+ output_dict=True,
229
+ )
230
+ except ValueError as e:
231
+ self.util.debug(
232
+ "Reporter: caught a ValueError when trying to get"
233
+ " classification_report: " + e
234
+ )
235
+ rpt = self.result.to_string()
236
+ with open(file_name, "w") as text_file:
237
+ c_ress = list(range(len(labels)))
238
+ for i, l in enumerate(labels):
239
+ c_res = rpt[l]["f1-score"]
240
+ c_ress[i] = float(f"{c_res:.3f}")
241
+ self.util.debug(f"labels: {labels}")
242
+ f1_per_class = f"result per class (F1 score): {c_ress}"
243
+ self.util.debug(f1_per_class)
244
+ rpt_str = f"{json.dumps(rpt)}\n{f1_per_class}"
245
+ text_file.write(rpt_str)
246
+ glob_conf.report.add_item(
247
+ ReportItem(
248
+ Header.HEADER_RESULTS,
249
+ f"Classification result {self.util.get_model_description()}",
250
+ rpt_str,
251
+ )
252
+ )
253
+
254
+ else: # regression
255
+ result = self.result.test
256
+ r2 = r2_score(self.truths, self.preds)
257
+ pcc = pearsonr(self.truths, self.preds)[0]
258
+ measure = self.util.config_val("MODEL", "measure", "mse")
259
+ with open(file_name, "w") as text_file:
260
+ text_file.write(
261
+ f"{measure}: {result:.3f}, r_2: {r2:.3f}, pcc {pcc:.3f}"
262
+ )
263
+
264
+ def make_conf_animation(self, out_name):
265
+ import imageio
266
+
267
+ fig_dir = self.util.get_path("fig_dir")
268
+ filenames = glob.glob(
269
+ fig_dir + f"{self.util.get_plot_name()}*_?_???_cnf.png")
270
+ images = []
271
+ for filename in filenames:
272
+ images.append(imageio.imread(filename))
273
+ fps = self.util.config_val("PLOT", "fps", "1")
274
+ try:
275
+ imageio.mimsave(fig_dir + out_name, images, fps=int(fps))
276
+ except RuntimeError as e:
277
+ self.util.error("error writing anim gif: " + e)
278
+
279
+ def get_result(self):
280
+ return self.result
281
+
282
+ def plot_epoch_progression(self, reports, out_name):
283
+ fig_dir = self.util.get_path("fig_dir")
284
+ results, losses, train_results, losses_eval = [], [], [], []
285
+ for r in reports:
286
+ results.append(r.get_result().test)
287
+ losses.append(r.get_result().loss)
288
+ train_results.append(r.get_result().train)
289
+ losses_eval.append(r.get_result().loss_eval)
290
+
291
+ # do a plot per run
292
+ # scale the losses so they fit on the picture
293
+ losses, results, train_results, losses_eval = (
294
+ np.asarray(losses),
295
+ np.asarray(results),
296
+ np.asarray(train_results),
297
+ np.asarray(losses_eval),
298
+ )
299
+
300
+ if np.all((results > 1)):
301
+ # scale down values
302
+ results = results / 100.0
303
+ train_results = train_results / 100.0
304
+ # if np.all((losses < 1)):
305
+ # scale up values
306
+ plt.figure(dpi=200)
307
+ plt.plot(train_results, "green", label="train set")
308
+ plt.plot(results, "red", label="dev set")
309
+ plt.plot(losses, "black", label="losses")
310
+ plt.plot(losses_eval, "grey", label="losses_eval")
311
+ plt.xlabel("epochs")
312
+ plt.ylabel(f"{self.MEASURE}")
313
+ plt.legend()
314
+ plt.savefig(f"{fig_dir}{out_name}.{self.format}")
315
+ plt.close()
316
+
317
+ @staticmethod
318
+ def ccc(ground_truth, prediction):
319
+ mean_gt = np.mean(ground_truth, 0)
320
+ mean_pred = np.mean(prediction, 0)
321
+ var_gt = np.var(ground_truth, 0)
322
+ var_pred = np.var(prediction, 0)
323
+ v_pred = prediction - mean_pred
324
+ v_gt = ground_truth - mean_gt
325
+ cor = sum(v_pred * v_gt) / \
326
+ (np.sqrt(sum(v_pred**2)) * np.sqrt(sum(v_gt**2)))
327
+ sd_gt = np.std(ground_truth)
328
+ sd_pred = np.std(prediction)
329
+ numerator = 2 * cor * sd_gt * sd_pred
330
+ denominator = var_gt + var_pred + (mean_gt - mean_pred) ** 2
331
+ ccc = numerator / denominator
332
+ return ccc
nkululeko/resample.py CHANGED
@@ -1,19 +1,23 @@
1
1
  # resample.py
2
2
  # change the sampling rate for train and test splits
3
3
 
4
- from nkululeko.experiment import Experiment
5
- import configparser
6
- from nkululeko.utils.util import Util
7
- from nkululeko.constants import VERSION
8
4
  import argparse
5
+ import configparser
9
6
  import os
7
+
10
8
  import pandas as pd
9
+
11
10
  from nkululeko.augmenting.resampler import Resampler
11
+ from nkululeko.constants import VERSION
12
+ from nkululeko.experiment import Experiment
13
+ from nkululeko.utils.util import Util
12
14
 
13
15
 
14
16
  def main(src_dir):
15
- parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
16
- parser.add_argument("--config", default="exp.ini", help="The base configuration")
17
+ parser = argparse.ArgumentParser(
18
+ description="Call the nkululeko RESAMPLE framework.")
19
+ parser.add_argument("--config", default="exp.ini",
20
+ help="The base configuration")
17
21
  args = parser.parse_args()
18
22
  if args.config is not None:
19
23
  config_file = args.config
@@ -48,7 +52,8 @@ def main(src_dir):
48
52
 
49
53
  # split into train and test
50
54
  expr.fill_train_and_tests()
51
- util.debug(f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
55
+ util.debug(
56
+ f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}")
52
57
 
53
58
  sample_selection = util.config_val("RESAMPLE", "sample_selection", "all")
54
59
  if sample_selection == "all":
nkululeko/runmanager.py CHANGED
@@ -1,9 +1,13 @@
1
- # runmanager.py
1
+ """Runmanager module.
2
+
3
+ This module contains the Runmanager class which is responsible for managing the
4
+ runs of the experiment.
5
+ """
2
6
 
3
- from nkululeko.reporting.reporter import Reporter
4
- from nkululeko.utils.util import Util
5
7
  import nkululeko.glob_conf as glob_conf
6
8
  from nkululeko.modelrunner import Modelrunner
9
+ from nkululeko.reporting.reporter import Reporter
10
+ from nkululeko.utils.util import Util
7
11
 
8
12
 
9
13
  class Runmanager:
@@ -59,7 +63,8 @@ class Runmanager:
59
63
  )
60
64
  self.reports, last_epoch = self.modelrunner.do_epochs()
61
65
  # wrap up the run
62
- plot_anim_progression = self.util.config_val("PLOT", "anim_progression", 0)
66
+ plot_anim_progression = self.util.config_val(
67
+ "PLOT", "anim_progression", 0)
63
68
  if plot_anim_progression:
64
69
  plot_name_suggest = self.util.get_exp_name()
65
70
  plot_name = (
@@ -83,7 +88,8 @@ class Runmanager:
83
88
  + "_epoch_progression"
84
89
  )
85
90
  self.util.debug(f"plotting progression to {plot_name}")
86
- self.reports[-1].plot_epoch_progression(self.reports, plot_name)
91
+ self.reports[-1].plot_epoch_progression(
92
+ self.reports, plot_name)
87
93
  # remember the best run
88
94
  best_report = self.get_best_result(self.reports)
89
95
  plot_best_model = self.util.config_val("PLOT", "best_model", False)
@@ -122,7 +128,8 @@ class Runmanager:
122
128
  self.print_model(best_report, plot_name)
123
129
 
124
130
  def print_given_result(self, run, epoch):
125
- """Print a result (confusion matrix) for a given epoch and run
131
+ """Print a result (confusion matrix) for a given epoch and run.
132
+
126
133
  Args:
127
134
  run: for which run
128
135
  epoch: for which epoch
@@ -139,7 +146,8 @@ class Runmanager:
139
146
  self.print_model(report, plot_name)
140
147
 
141
148
  def print_model(self, report, plot_name):
142
- """Print a confusion matrix for a special report
149
+ """Print a confusion matrix for a special report.
150
+
143
151
  Args:
144
152
  report: for which report (will be computed newly from model)
145
153
  plot_name: name of plot file
@@ -152,7 +160,8 @@ class Runmanager:
152
160
  report.print_results(epoch)
153
161
 
154
162
  def load_model(self, report):
155
- """Load a model from disk for a specific run and epoch and evaluate
163
+ """Load a model from disk for a specific run and epoch and evaluate it.
164
+
156
165
  Args:
157
166
  report: for which report (will be re-evaluated)
158
167
 
nkululeko/test.py CHANGED
@@ -1,17 +1,20 @@
1
1
  # test.py
2
2
  # Just use a database as test
3
3
 
4
- from nkululeko.experiment import Experiment
5
- import configparser
6
- from nkululeko.utils.util import Util
7
- from nkululeko.constants import VERSION
8
4
  import argparse
5
+ import configparser
9
6
  import os
10
7
 
8
+ from nkululeko.constants import VERSION
9
+ from nkululeko.experiment import Experiment
10
+ from nkululeko.utils.util import Util
11
+
11
12
 
12
13
  def main(src_dir):
13
- parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
14
- parser.add_argument("--config", default="exp.ini", help="The base configuration")
14
+ parser = argparse.ArgumentParser(
15
+ description="Call the nkululeko TEST framework.")
16
+ parser.add_argument("--config", default="exp.ini",
17
+ help="The base configuration")
15
18
  parser.add_argument(
16
19
  "--outfile",
17
20
  default="my_results.csv",
@@ -62,3 +62,4 @@ class Test_predictor:
62
62
  df = df.drop(columns=[target])
63
63
  df = df.rename(columns={"class_label": target})
64
64
  df.to_csv(self.name)
65
+ self.util.debug(f"results stored in {self.name}")
nkululeko/utils/stats.py CHANGED
@@ -1,5 +1,6 @@
1
- from itertools import combinations
2
1
  import math
2
+ from itertools import combinations
3
+
3
4
  import numpy as np
4
5
  import pandas as pd
5
6
 
@@ -16,7 +17,8 @@ def check_na(a):
16
17
 
17
18
  def cohen_d(d1, d2):
18
19
  """
19
- Compute Cohen's d from two distributions of real valued arrays
20
+ Compute Cohen's d from two distributions of real valued arrays.
21
+
20
22
  Args:
21
23
  d1: one array
22
24
  d2: the other array
@@ -50,8 +52,10 @@ def all_combinations(items_list):
50
52
 
51
53
 
52
54
  def get_effect_size(df, target, variable):
53
- """
54
- Get the effect size as Cohen's D from a real numbered variable on a categorical target.
55
+ """Get the effect size as Cohen's D.
56
+
57
+ Effect size is computed from a real numbered variable on a categorical target.
58
+
55
59
  Args:
56
60
  df: a pd.Dataframe with at least target and variable as columns
57
61
  target: the categorical target, e.g. emotion
@@ -88,7 +92,10 @@ def cohens_D_to_string(val):
88
92
 
89
93
 
90
94
  def normalize(values):
91
- """Do a z-transformation of a distribution, so that mean = 0 and variance = 1"""
95
+ """Do a z-transformation of a distribution.
96
+
97
+ So that mean = 0 and variance = 1
98
+ """
92
99
  from sklearn.preprocessing import StandardScaler
93
100
 
94
101
  scaler = StandardScaler()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.81.4
3
+ Version: 0.81.6
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -323,6 +323,16 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
323
323
  Changelog
324
324
  =========
325
325
 
326
+ Version 0.81.6
327
+ --------------
328
+ * updated documentation
329
+ * updated crema-d
330
+ * updated tests
331
+
332
+ Version 0.81.5
333
+ --------------
334
+ * added sex=gender for speaker mappings
335
+
326
336
  Version 0.81.4
327
337
  --------------
328
338
  * fixed bug in demo module