nkululeko 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. nkululeko/augmenting/resampler.py +5 -2
  2. nkululeko/autopredict/ap_emotion.py +36 -0
  3. nkululeko/autopredict/ap_text.py +45 -0
  4. nkululeko/autopredict/tests/__init__.py +0 -0
  5. nkululeko/autopredict/tests/test_whisper_transcriber.py +122 -0
  6. nkululeko/autopredict/whisper_transcriber.py +81 -0
  7. nkululeko/balance.py +222 -0
  8. nkululeko/constants.py +1 -1
  9. nkululeko/experiment.py +53 -3
  10. nkululeko/explore.py +32 -13
  11. nkululeko/feat_extract/feats_analyser.py +45 -17
  12. nkululeko/feat_extract/feats_emotion2vec.py +51 -26
  13. nkululeko/feat_extract/feats_praat.py +3 -3
  14. nkululeko/feat_extract/feats_praat_core.py +769 -0
  15. nkululeko/feat_extract/tests/__init__.py +1 -0
  16. nkululeko/feat_extract/tests/test_feats_opensmile.py +162 -0
  17. nkululeko/feat_extract/tests/test_feats_praat_core.py +507 -0
  18. nkululeko/glob_conf.py +9 -0
  19. nkululeko/modelrunner.py +15 -39
  20. nkululeko/models/model.py +4 -42
  21. nkululeko/models/model_tuned.py +416 -84
  22. nkululeko/models/model_xgb.py +148 -2
  23. nkululeko/models/tests/test_model_knn.py +49 -0
  24. nkululeko/models/tests/test_model_mlp.py +153 -0
  25. nkululeko/models/tests/test_model_xgb.py +33 -0
  26. nkululeko/nkululeko.py +0 -9
  27. nkululeko/plots.py +25 -19
  28. nkululeko/predict.py +8 -6
  29. nkululeko/reporting/report.py +7 -5
  30. nkululeko/reporting/reporter.py +20 -5
  31. nkululeko/test_predictor.py +7 -1
  32. nkululeko/tests/__init__.py +1 -0
  33. nkululeko/tests/test_balancing.py +270 -0
  34. nkululeko/utils/util.py +38 -6
  35. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/METADATA +1 -1
  36. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/RECORD +40 -27
  37. nkululeko/feat_extract/feats_opensmile copy.py +0 -93
  38. nkululeko/feat_extract/feinberg_praat.py +0 -628
  39. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/WHEEL +0 -0
  40. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/entry_points.txt +0 -0
  41. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/licenses/LICENSE +0 -0
  42. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/top_level.txt +0 -0
nkululeko/modelrunner.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
4
 
5
5
  from nkululeko import glob_conf
6
6
  from nkululeko.utils.util import Util
7
+ from nkululeko.balance import DataBalancer
7
8
 
8
9
 
9
10
  class Modelrunner:
@@ -143,6 +144,7 @@ class Modelrunner:
143
144
 
144
145
  def _select_model(self, model_type):
145
146
  self._check_balancing()
147
+ self._check_feature_balancing()
146
148
 
147
149
  if model_type == "svm":
148
150
  from nkululeko.models.model_svm import SVM_model
@@ -243,45 +245,19 @@ class Modelrunner:
243
245
  )
244
246
  return self.model
245
247
 
246
- def _check_balancing(self):
248
+ def _check_feature_balancing(self):
249
+ """Check and apply feature balancing using the dedicated DataBalancer class."""
247
250
  balancing = self.util.config_val("FEATS", "balancing", False)
248
251
  if balancing:
249
- orig_size = self.feats_train.shape[0]
250
- self.util.debug(f"balancing the training features with: {balancing}")
251
- if balancing == "ros":
252
- from imblearn.over_sampling import RandomOverSampler
253
-
254
- sampler = RandomOverSampler(random_state=42)
255
- X_res, y_res = sampler.fit_resample(
256
- self.feats_train, self.df_train[self.target]
257
- )
258
- elif balancing == "smote":
259
- from imblearn.over_sampling import SMOTE
260
-
261
- sampler = SMOTE(random_state=42)
262
- X_res, y_res = sampler.fit_resample(
263
- self.feats_train, self.df_train[self.target]
264
- )
265
- elif balancing == "adasyn":
266
- from imblearn.over_sampling import ADASYN
267
-
268
- sampler = ADASYN(random_state=42)
269
- X_res, y_res = sampler.fit_resample(
270
- self.feats_train, self.df_train[self.target]
271
- )
272
- else:
273
- self.util.error(
274
- f"unknown balancing algorithm: {balancing} (should be [ros|smote|adasyn])"
275
- )
276
-
277
- self.feats_train = X_res
278
- self.df_train = pd.DataFrame({self.target: y_res}, index=X_res.index)
279
- self.util.debug(
280
- f"balanced with: {balancing}, new size: {X_res.shape[0]} (was {orig_size})"
252
+ self.util.debug("Applying feature balancing using DataBalancer")
253
+
254
+ # Initialize the data balancer
255
+ balancer = DataBalancer(random_state=42)
256
+
257
+ # Apply balancing
258
+ self.df_train, self.feats_train = balancer.balance_features(
259
+ df_train=self.df_train,
260
+ feats_train=self.feats_train,
261
+ target_column=self.target,
262
+ method=balancing
281
263
  )
282
- le = glob_conf.label_encoder
283
- res = y_res.value_counts()
284
- resd = {}
285
- for i, e in enumerate(le.inverse_transform(res.index.values)):
286
- resd[e] = res.values[i]
287
- self.util.debug(f"{resd})")
nkululeko/models/model.py CHANGED
@@ -3,15 +3,11 @@ import ast
3
3
  import pickle
4
4
  import random
5
5
 
6
- from joblib import parallel_backend
7
6
  import numpy as np
8
7
  import pandas as pd
9
- from sklearn.model_selection import GridSearchCV
10
- from sklearn.model_selection import LeaveOneGroupOut
11
- from sklearn.model_selection import StratifiedKFold
12
8
  import sklearn.utils
13
-
14
- import audeer
9
+ from joblib import parallel_backend
10
+ from sklearn.model_selection import GridSearchCV, LeaveOneGroupOut, StratifiedKFold
15
11
 
16
12
  import nkululeko.glob_conf as glob_conf
17
13
  from nkululeko.reporting.reporter import Reporter
@@ -305,15 +301,8 @@ class Model:
305
301
  def get_type(self):
306
302
  return "generic"
307
303
 
308
- def predict_sample(self, features: np.ndarray) -> dict | float:
309
- """Predict a single sample using the trained model.
310
-
311
- Args:
312
- features (np.ndarray): The feature vector of the sample to predict.
313
-
314
- Returns:
315
- dict: A dictionary containing the predicted class probabilities or value.
316
- """
304
+ def predict_sample(self, features):
305
+ """Predict one sample"""
317
306
  prediction = {}
318
307
  if self.util.exp_is_classification():
319
308
  # get the class probabilities
@@ -347,30 +336,3 @@ class Model:
347
336
  self.set_id(run, epoch)
348
337
  with open(path, "rb") as handle:
349
338
  self.clf = pickle.load(handle)
350
-
351
- # next function exports the model to onnx
352
- def export_onnx(self, onnx_path, input_shape=None):
353
- """Export the trained sklearn model to ONNX format.
354
-
355
- Args:
356
- onnx_path (str): Path to save the ONNX model.
357
- input_shape (tuple, optional): Shape of the input features. If None, inferred from feats_train.
358
- """
359
- import skl2onnx
360
- from skl2onnx import convert_sklearn
361
- from skl2onnx.common.data_types import FloatTensorType
362
-
363
- if not hasattr(self, "clf"):
364
- self.util.error("No trained model found to export.")
365
- return
366
-
367
- if input_shape is None:
368
- n_features = self.feats_train.shape[1]
369
- initial_type = [("input", FloatTensorType([None, n_features]))]
370
- else:
371
- initial_type = [("input", FloatTensorType(input_shape))]
372
-
373
- onnx_model = convert_sklearn(self.clf, initial_types=initial_type)
374
- with open(audeer.path(onnx_path), "wb") as f:
375
- f.write(onnx_model.SerializeToString())
376
- self.util.debug(f"Model exported to ONNX at {onnx_path}")