PyPI - nkululeko - Versions diffs - 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl - Mend

nkululeko 0.94.3py3-none-any.whl → 0.95.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

nkululeko/augmenting/resampler.py +5 -2
nkululeko/autopredict/ap_emotion.py +36 -0
nkululeko/autopredict/ap_text.py +45 -0
nkululeko/autopredict/tests/__init__.py +0 -0
nkululeko/autopredict/tests/test_whisper_transcriber.py +122 -0
nkululeko/autopredict/whisper_transcriber.py +81 -0
nkululeko/balance.py +222 -0
nkululeko/constants.py +1 -1
nkululeko/experiment.py +53 -3
nkululeko/explore.py +32 -13
nkululeko/feat_extract/feats_analyser.py +45 -17
nkululeko/feat_extract/feats_emotion2vec.py +51 -26
nkululeko/feat_extract/feats_praat.py +3 -3
nkululeko/feat_extract/feats_praat_core.py +769 -0
nkululeko/feat_extract/tests/__init__.py +1 -0
nkululeko/feat_extract/tests/test_feats_opensmile.py +162 -0
nkululeko/feat_extract/tests/test_feats_praat_core.py +507 -0
nkululeko/glob_conf.py +9 -0
nkululeko/modelrunner.py +15 -39
nkululeko/models/model.py +4 -42
nkululeko/models/model_tuned.py +416 -84
nkululeko/models/model_xgb.py +148 -2
nkululeko/models/tests/test_model_knn.py +49 -0
nkululeko/models/tests/test_model_mlp.py +153 -0
nkululeko/models/tests/test_model_xgb.py +33 -0
nkululeko/nkululeko.py +0 -9
nkululeko/plots.py +25 -19
nkululeko/predict.py +8 -6
nkululeko/reporting/report.py +7 -5
nkululeko/reporting/reporter.py +20 -5
nkululeko/test_predictor.py +7 -1
nkululeko/tests/__init__.py +1 -0
nkululeko/tests/test_balancing.py +270 -0
nkululeko/utils/util.py +38 -6
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/METADATA +1 -1
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/RECORD +40 -27
nkululeko/feat_extract/feats_opensmile copy.py +0 -93
nkululeko/feat_extract/feinberg_praat.py +0 -628
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/WHEEL +0 -0
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/entry_points.txt +0 -0
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/licenses/LICENSE +0 -0
{nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/top_level.txt +0 -0

nkululeko/modelrunner.py CHANGED Viewed

@@ -4,6 +4,7 @@ import pandas as pd
 from nkululeko import glob_conf
 from nkululeko.utils.util import Util
+from nkululeko.balance import DataBalancer
 class Modelrunner:
@@ -143,6 +144,7 @@ class Modelrunner:
     def _select_model(self, model_type):
         self._check_balancing()
+        self._check_feature_balancing()
         if model_type == "svm":
             from nkululeko.models.model_svm import SVM_model
@@ -243,45 +245,19 @@ class Modelrunner:
             )
         return self.model
-    def _check_balancing(self):
+    def _check_feature_balancing(self):
+        """Check and apply feature balancing using the dedicated DataBalancer class."""
         balancing = self.util.config_val("FEATS", "balancing", False)
         if balancing:
-            orig_size = self.feats_train.shape[0]
-            self.util.debug(f"balancing the training features with: {balancing}")
-            if balancing == "ros":
-                from imblearn.over_sampling import RandomOverSampler
-                sampler = RandomOverSampler(random_state=42)
-                X_res, y_res = sampler.fit_resample(
-                    self.feats_train, self.df_train[self.target]
-                )
-            elif balancing == "smote":
-                from imblearn.over_sampling import SMOTE
-                sampler = SMOTE(random_state=42)
-                X_res, y_res = sampler.fit_resample(
-                    self.feats_train, self.df_train[self.target]
-                )
-            elif balancing == "adasyn":
-                from imblearn.over_sampling import ADASYN
-                sampler = ADASYN(random_state=42)
-                X_res, y_res = sampler.fit_resample(
-                    self.feats_train, self.df_train[self.target]
-                )
-            else:
-                self.util.error(
-                    f"unknown balancing algorithm: {balancing} (should be [ros|smote|adasyn])"
-                )
-            self.feats_train = X_res
-            self.df_train = pd.DataFrame({self.target: y_res}, index=X_res.index)
-            self.util.debug(
-                f"balanced with: {balancing}, new size: {X_res.shape[0]} (was {orig_size})"
+            self.util.debug("Applying feature balancing using DataBalancer")
+            # Initialize the data balancer
+            balancer = DataBalancer(random_state=42)
+            # Apply balancing
+            self.df_train, self.feats_train = balancer.balance_features(
+                df_train=self.df_train,
+                feats_train=self.feats_train,
+                target_column=self.target,
+                method=balancing
             )
-            le = glob_conf.label_encoder
-            res = y_res.value_counts()
-            resd = {}
-            for i, e in enumerate(le.inverse_transform(res.index.values)):
-                resd[e] = res.values[i]
-            self.util.debug(f"{resd})")

nkululeko/models/model.py CHANGED Viewed

@@ -3,15 +3,11 @@ import ast
 import pickle
 import random
-from joblib import parallel_backend
 import numpy as np
 import pandas as pd
-from sklearn.model_selection import GridSearchCV
-from sklearn.model_selection import LeaveOneGroupOut
-from sklearn.model_selection import StratifiedKFold
 import sklearn.utils
-import audeer
+from joblib import parallel_backend
+from sklearn.model_selection import GridSearchCV, LeaveOneGroupOut, StratifiedKFold
 import nkululeko.glob_conf as glob_conf
 from nkululeko.reporting.reporter import Reporter
@@ -305,15 +301,8 @@ class Model:
     def get_type(self):
         return "generic"
-    def predict_sample(self, features: np.ndarray) -> dict | float:
-        """Predict a single sample using the trained model.
-        Args:
-            features (np.ndarray): The feature vector of the sample to predict.
-        Returns:
-            dict: A dictionary containing the predicted class probabilities or value.
-        """
+    def predict_sample(self, features):
+        """Predict one sample"""
         prediction = {}
         if self.util.exp_is_classification():
             # get the class probabilities
@@ -347,30 +336,3 @@ class Model:
         self.set_id(run, epoch)
         with open(path, "rb") as handle:
             self.clf = pickle.load(handle)
-    # next function exports the model to onnx
-    def export_onnx(self, onnx_path, input_shape=None):
-        """Export the trained sklearn model to ONNX format.
-        Args:
-            onnx_path (str): Path to save the ONNX model.
-            input_shape (tuple, optional): Shape of the input features. If None, inferred from feats_train.
-        """
-        import skl2onnx
-        from skl2onnx import convert_sklearn
-        from skl2onnx.common.data_types import FloatTensorType
-        if not hasattr(self, "clf"):
-            self.util.error("No trained model found to export.")
-            return
-        if input_shape is None:
-            n_features = self.feats_train.shape[1]
-            initial_type = [("input", FloatTensorType([None, n_features]))]
-        else:
-            initial_type = [("input", FloatTensorType(input_shape))]
-        onnx_model = convert_sklearn(self.clf, initial_types=initial_type)
-        with open(audeer.path(onnx_path), "wb") as f:
-            f.write(onnx_model.SerializeToString())
-        self.util.debug(f"Model exported to ONNX at {onnx_path}")

nkululeko 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl

nkululeko 0.94.3py3-none-any.whl → 0.95.1py3-none-any.whl