nkululeko 0.89.2__py3-none-any.whl → 0.90.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. nkululeko/aug_train.py +6 -4
  2. nkululeko/augment.py +8 -6
  3. nkululeko/augmenting/augmenter.py +4 -4
  4. nkululeko/augmenting/randomsplicer.py +12 -9
  5. nkululeko/augmenting/randomsplicing.py +2 -3
  6. nkululeko/augmenting/resampler.py +9 -6
  7. nkululeko/autopredict/ap_age.py +4 -2
  8. nkululeko/autopredict/ap_arousal.py +4 -2
  9. nkululeko/autopredict/ap_dominance.py +3 -2
  10. nkululeko/autopredict/ap_gender.py +4 -2
  11. nkululeko/autopredict/ap_mos.py +5 -2
  12. nkululeko/autopredict/ap_pesq.py +5 -2
  13. nkululeko/autopredict/ap_sdr.py +5 -2
  14. nkululeko/autopredict/ap_snr.py +5 -2
  15. nkululeko/autopredict/ap_stoi.py +5 -2
  16. nkululeko/autopredict/ap_valence.py +4 -2
  17. nkululeko/autopredict/estimate_snr.py +10 -14
  18. nkululeko/cacheddataset.py +1 -1
  19. nkululeko/constants.py +1 -1
  20. nkululeko/data/dataset.py +19 -16
  21. nkululeko/data/dataset_csv.py +5 -3
  22. nkululeko/demo-ft.py +29 -0
  23. nkululeko/demo_feats.py +5 -4
  24. nkululeko/demo_predictor.py +3 -4
  25. nkululeko/ensemble.py +27 -28
  26. nkululeko/experiment.py +11 -7
  27. nkululeko/experiment_felix.py +728 -0
  28. nkululeko/explore.py +1 -0
  29. nkululeko/export.py +7 -5
  30. nkululeko/feat_extract/feats_agender.py +5 -4
  31. nkululeko/feat_extract/feats_agender_agender.py +7 -6
  32. nkululeko/feat_extract/feats_analyser.py +18 -16
  33. nkululeko/feat_extract/feats_ast.py +9 -8
  34. nkululeko/feat_extract/feats_auddim.py +3 -5
  35. nkululeko/feat_extract/feats_audmodel.py +2 -2
  36. nkululeko/feat_extract/feats_clap.py +9 -12
  37. nkululeko/feat_extract/feats_hubert.py +2 -3
  38. nkululeko/feat_extract/feats_import.py +5 -4
  39. nkululeko/feat_extract/feats_mld.py +3 -5
  40. nkululeko/feat_extract/feats_mos.py +4 -3
  41. nkululeko/feat_extract/feats_opensmile.py +4 -3
  42. nkululeko/feat_extract/feats_oxbow.py +5 -4
  43. nkululeko/feat_extract/feats_praat.py +4 -7
  44. nkululeko/feat_extract/feats_snr.py +3 -5
  45. nkululeko/feat_extract/feats_spectra.py +8 -9
  46. nkululeko/feat_extract/feats_spkrec.py +6 -11
  47. nkululeko/feat_extract/feats_squim.py +2 -4
  48. nkululeko/feat_extract/feats_trill.py +2 -5
  49. nkululeko/feat_extract/feats_wav2vec2.py +8 -4
  50. nkululeko/feat_extract/feats_wavlm.py +2 -3
  51. nkululeko/feat_extract/feats_whisper.py +4 -6
  52. nkululeko/feat_extract/featureset.py +4 -2
  53. nkululeko/feat_extract/feinberg_praat.py +1 -3
  54. nkululeko/feat_extract/transformer_feature_extractor.py +147 -0
  55. nkululeko/file_checker.py +3 -3
  56. nkululeko/filter_data.py +3 -1
  57. nkululeko/fixedsegment.py +83 -0
  58. nkululeko/models/model.py +3 -5
  59. nkululeko/models/model_bayes.py +1 -0
  60. nkululeko/models/model_cnn.py +4 -6
  61. nkululeko/models/model_gmm.py +13 -9
  62. nkululeko/models/model_knn.py +1 -0
  63. nkululeko/models/model_knn_reg.py +1 -0
  64. nkululeko/models/model_lin_reg.py +1 -0
  65. nkululeko/models/model_mlp.py +2 -3
  66. nkululeko/models/model_mlp_regression.py +1 -6
  67. nkululeko/models/model_svm.py +2 -2
  68. nkululeko/models/model_svr.py +1 -0
  69. nkululeko/models/model_tree.py +2 -3
  70. nkululeko/models/model_tree_reg.py +1 -0
  71. nkululeko/models/model_tuned.py +54 -33
  72. nkululeko/models/model_xgb.py +1 -0
  73. nkululeko/models/model_xgr.py +1 -0
  74. nkululeko/multidb.py +1 -0
  75. nkululeko/nkululeko.py +1 -1
  76. nkululeko/plots.py +1 -1
  77. nkululeko/predict.py +4 -5
  78. nkululeko/reporting/defines.py +6 -8
  79. nkululeko/reporting/latex_writer.py +3 -3
  80. nkululeko/reporting/report.py +2 -2
  81. nkululeko/reporting/report_item.py +1 -0
  82. nkululeko/reporting/reporter.py +20 -19
  83. nkululeko/resample.py +8 -12
  84. nkululeko/resample_cli.py +99 -0
  85. nkululeko/runmanager.py +3 -1
  86. nkululeko/scaler.py +1 -1
  87. nkululeko/segment.py +6 -5
  88. nkululeko/segmenting/seg_inaspeechsegmenter.py +3 -3
  89. nkululeko/segmenting/seg_silero.py +4 -4
  90. nkululeko/syllable_nuclei.py +9 -22
  91. nkululeko/test_pretrain.py +6 -7
  92. nkululeko/utils/stats.py +0 -1
  93. nkululeko/utils/util.py +2 -3
  94. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/METADATA +12 -2
  95. nkululeko-0.90.1.dist-info/RECORD +119 -0
  96. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/WHEEL +1 -1
  97. nkululeko-0.89.2.dist-info/RECORD +0 -114
  98. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/LICENSE +0 -0
  99. {nkululeko-0.89.2.dist-info → nkululeko-0.90.1.dist-info}/top_level.txt +0 -0
nkululeko/ensemble.py CHANGED
@@ -15,28 +15,20 @@ Raises:
15
15
  ValueError: If an unknown ensemble method is provided.
16
16
  AssertionError: If the number of config files is less than 2 for majority voting.
17
17
  """
18
+
18
19
  #!/usr/bin/env python
19
20
  # -*- coding: utf-8 -*-
20
21
 
21
22
 
22
- from typing import List
23
23
  import configparser
24
24
  import time
25
25
  from argparse import ArgumentParser
26
26
  from pathlib import Path
27
+ from typing import List
27
28
 
28
29
  import numpy as np
29
30
  import pandas as pd
30
- import matplotlib.pyplot as plt
31
-
32
- from sklearn.metrics import(
33
- RocCurveDisplay,
34
- balanced_accuracy_score,
35
- classification_report,
36
- auc,
37
- roc_auc_score,
38
- roc_curve
39
- )
31
+ from sklearn.metrics import balanced_accuracy_score, classification_report
40
32
 
41
33
  from nkululeko.constants import VERSION
42
34
  from nkululeko.experiment import Experiment
@@ -169,17 +161,19 @@ def performance_weighted_ensemble(ensemble_preds_ls, labels, weights):
169
161
 
170
162
  # asserts weiths in decimal 0-1
171
163
  assert all(0 <= w <= 1 for w in weights), "Weights must be between 0 and 1"
172
-
164
+
173
165
  # assert lenght of weights matches number of models
174
- assert len(weights) == len(ensemble_preds_ls), "Number of weights must match number of models"
175
-
166
+ assert len(weights) == len(
167
+ ensemble_preds_ls
168
+ ), "Number of weights must match number of models"
169
+
176
170
  # Normalize weights
177
171
  total_weight = sum(weights)
178
172
  weights = [weight / total_weight for weight in weights]
179
-
173
+
180
174
  for idx in ensemble_preds_ls[0].index:
181
175
  class_probabilities = {label: 0 for label in labels}
182
-
176
+
183
177
  for df, weight in zip(ensemble_preds_ls, weights):
184
178
  row = df.loc[idx]
185
179
  for label in labels:
@@ -192,10 +186,12 @@ def performance_weighted_ensemble(ensemble_preds_ls, labels, weights):
192
186
  return final_predictions, final_confidences
193
187
 
194
188
 
195
-
196
-
197
189
  def ensemble_predictions(
198
- config_files: List[str], method: str, threshold: float, weights: List[float], no_labels: bool
190
+ config_files: List[str],
191
+ method: str,
192
+ threshold: float,
193
+ weights: List[float],
194
+ no_labels: bool,
199
195
  ) -> pd.DataFrame:
200
196
  """
201
197
  Ensemble predictions from multiple experiments.
@@ -261,17 +257,20 @@ def ensemble_predictions(
261
257
  ensemble_preds_ls, labels, threshold
262
258
  )
263
259
  elif method == "uncertainty_weighted":
264
- ensemble_preds["predicted"], ensemble_preds["uncertainty"] = (
265
- uncertainty_weighted_ensemble(ensemble_preds_ls, labels)
266
- )
260
+ (
261
+ ensemble_preds["predicted"],
262
+ ensemble_preds["uncertainty"],
263
+ ) = uncertainty_weighted_ensemble(ensemble_preds_ls, labels)
267
264
  elif method == "confidence_weighted":
268
- ensemble_preds["predicted"], ensemble_preds["confidence"] = (
269
- confidence_weighted_ensemble(ensemble_preds_ls, labels)
270
- )
265
+ (
266
+ ensemble_preds["predicted"],
267
+ ensemble_preds["confidence"],
268
+ ) = confidence_weighted_ensemble(ensemble_preds_ls, labels)
271
269
  elif method == "performance_weighted":
272
- ensemble_preds["predicted"], ensemble_preds["confidence"] = (
273
- performance_weighted_ensemble(ensemble_preds_ls, labels, weights)
274
- )
270
+ (
271
+ ensemble_preds["predicted"],
272
+ ensemble_preds["confidence"],
273
+ ) = performance_weighted_ensemble(ensemble_preds_ls, labels, weights)
275
274
  else:
276
275
  raise ValueError(f"Unknown ensemble method: {method}")
277
276
 
nkululeko/experiment.py CHANGED
@@ -5,13 +5,13 @@ import pickle
5
5
  import random
6
6
  import time
7
7
 
8
+ import audeer
9
+ import audformat
8
10
  import numpy as np
9
11
  import pandas as pd
10
12
  from sklearn.preprocessing import LabelEncoder
11
13
 
12
- import audeer
13
- import audformat
14
-
14
+ import nkululeko.glob_conf as glob_conf
15
15
  from nkululeko.data.dataset import Dataset
16
16
  from nkululeko.data.dataset_csv import Dataset_CSV
17
17
  from nkululeko.demo_predictor import Demo_predictor
@@ -19,7 +19,6 @@ from nkululeko.feat_extract.feats_analyser import FeatureAnalyser
19
19
  from nkululeko.feature_extractor import FeatureExtractor
20
20
  from nkululeko.file_checker import FileChecker
21
21
  from nkululeko.filter_data import DataFilter
22
- import nkululeko.glob_conf as glob_conf
23
22
  from nkululeko.plots import Plots
24
23
  from nkululeko.reporting.report import Report
25
24
  from nkululeko.runmanager import Runmanager
@@ -101,12 +100,15 @@ class Experiment:
101
100
  if data.got_speaker:
102
101
  self.got_speaker = True
103
102
  self.datasets.update({d: data})
104
- self.target = self.util.config_val("DATA", "target", "emotion")
103
+ self.target = self.util.config_val("DATA", "target", "none")
105
104
  glob_conf.set_target(self.target)
106
105
  # print target via debug
107
106
  self.util.debug(f"target: {self.target}")
108
107
  # print keys/column
109
108
  dbs = ",".join(list(self.datasets.keys()))
109
+ if self.target == "none":
110
+ self.util.debug(f"loaded databases {dbs}")
111
+ return
110
112
  labels = self.util.config_val("DATA", "labels", False)
111
113
  auto_labels = list(next(iter(self.datasets.values())).df[self.target].unique())
112
114
  if labels:
@@ -191,7 +193,8 @@ class Experiment:
191
193
  self.df_train, self.df_test = pd.DataFrame(), pd.DataFrame()
192
194
  for d in self.datasets.values():
193
195
  d.split()
194
- d.prepare_labels()
196
+ if self.target != "none":
197
+ d.prepare_labels()
195
198
  if d.df_train.shape[0] == 0:
196
199
  self.util.debug(f"warn: {d.name} train empty")
197
200
  self.df_train = pd.concat([self.df_train, d.df_train])
@@ -207,6 +210,8 @@ class Experiment:
207
210
  self.df_test.to_csv(storage_test)
208
211
  self.df_train.to_csv(storage_train)
209
212
 
213
+ if self.target == "none":
214
+ return
210
215
  self.util.copy_flags(self, self.df_test)
211
216
  self.util.copy_flags(self, self.df_train)
212
217
  # Try data checks
@@ -738,7 +743,6 @@ class Experiment:
738
743
  if model.is_ann():
739
744
  print("converting to onnx from torch")
740
745
  else:
741
-
742
746
  print("converting to onnx from sklearn")
743
747
  # save the rest
744
748
  f = open(filename, "wb")