nkululeko 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/augmenting/resampler.py +5 -2
- nkululeko/autopredict/ap_emotion.py +36 -0
- nkululeko/autopredict/ap_text.py +45 -0
- nkululeko/autopredict/tests/__init__.py +0 -0
- nkululeko/autopredict/tests/test_whisper_transcriber.py +122 -0
- nkululeko/autopredict/whisper_transcriber.py +81 -0
- nkululeko/balance.py +222 -0
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +53 -3
- nkululeko/explore.py +32 -13
- nkululeko/feat_extract/feats_analyser.py +45 -17
- nkululeko/feat_extract/feats_emotion2vec.py +51 -26
- nkululeko/feat_extract/feats_praat.py +3 -3
- nkululeko/feat_extract/feats_praat_core.py +769 -0
- nkululeko/feat_extract/tests/__init__.py +1 -0
- nkululeko/feat_extract/tests/test_feats_opensmile.py +162 -0
- nkululeko/feat_extract/tests/test_feats_praat_core.py +507 -0
- nkululeko/glob_conf.py +9 -0
- nkululeko/modelrunner.py +15 -39
- nkululeko/models/model.py +4 -42
- nkululeko/models/model_tuned.py +416 -84
- nkululeko/models/model_xgb.py +148 -2
- nkululeko/models/tests/test_model_knn.py +49 -0
- nkululeko/models/tests/test_model_mlp.py +153 -0
- nkululeko/models/tests/test_model_xgb.py +33 -0
- nkululeko/nkululeko.py +0 -9
- nkululeko/plots.py +25 -19
- nkululeko/predict.py +8 -6
- nkululeko/reporting/report.py +7 -5
- nkululeko/reporting/reporter.py +20 -5
- nkululeko/test_predictor.py +7 -1
- nkululeko/tests/__init__.py +1 -0
- nkululeko/tests/test_balancing.py +270 -0
- nkululeko/utils/util.py +38 -6
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/METADATA +1 -1
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/RECORD +40 -27
- nkululeko/feat_extract/feats_opensmile copy.py +0 -93
- nkululeko/feat_extract/feinberg_praat.py +0 -628
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/WHEEL +0 -0
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/entry_points.txt +0 -0
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/licenses/LICENSE +0 -0
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/top_level.txt +0 -0
nkululeko/modelrunner.py
CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
|
|
4
4
|
|
5
5
|
from nkululeko import glob_conf
|
6
6
|
from nkululeko.utils.util import Util
|
7
|
+
from nkululeko.balance import DataBalancer
|
7
8
|
|
8
9
|
|
9
10
|
class Modelrunner:
|
@@ -143,6 +144,7 @@ class Modelrunner:
|
|
143
144
|
|
144
145
|
def _select_model(self, model_type):
|
145
146
|
self._check_balancing()
|
147
|
+
self._check_feature_balancing()
|
146
148
|
|
147
149
|
if model_type == "svm":
|
148
150
|
from nkululeko.models.model_svm import SVM_model
|
@@ -243,45 +245,19 @@ class Modelrunner:
|
|
243
245
|
)
|
244
246
|
return self.model
|
245
247
|
|
246
|
-
def
|
248
|
+
def _check_feature_balancing(self):
|
249
|
+
"""Check and apply feature balancing using the dedicated DataBalancer class."""
|
247
250
|
balancing = self.util.config_val("FEATS", "balancing", False)
|
248
251
|
if balancing:
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
sampler = SMOTE(random_state=42)
|
262
|
-
X_res, y_res = sampler.fit_resample(
|
263
|
-
self.feats_train, self.df_train[self.target]
|
264
|
-
)
|
265
|
-
elif balancing == "adasyn":
|
266
|
-
from imblearn.over_sampling import ADASYN
|
267
|
-
|
268
|
-
sampler = ADASYN(random_state=42)
|
269
|
-
X_res, y_res = sampler.fit_resample(
|
270
|
-
self.feats_train, self.df_train[self.target]
|
271
|
-
)
|
272
|
-
else:
|
273
|
-
self.util.error(
|
274
|
-
f"unknown balancing algorithm: {balancing} (should be [ros|smote|adasyn])"
|
275
|
-
)
|
276
|
-
|
277
|
-
self.feats_train = X_res
|
278
|
-
self.df_train = pd.DataFrame({self.target: y_res}, index=X_res.index)
|
279
|
-
self.util.debug(
|
280
|
-
f"balanced with: {balancing}, new size: {X_res.shape[0]} (was {orig_size})"
|
252
|
+
self.util.debug("Applying feature balancing using DataBalancer")
|
253
|
+
|
254
|
+
# Initialize the data balancer
|
255
|
+
balancer = DataBalancer(random_state=42)
|
256
|
+
|
257
|
+
# Apply balancing
|
258
|
+
self.df_train, self.feats_train = balancer.balance_features(
|
259
|
+
df_train=self.df_train,
|
260
|
+
feats_train=self.feats_train,
|
261
|
+
target_column=self.target,
|
262
|
+
method=balancing
|
281
263
|
)
|
282
|
-
le = glob_conf.label_encoder
|
283
|
-
res = y_res.value_counts()
|
284
|
-
resd = {}
|
285
|
-
for i, e in enumerate(le.inverse_transform(res.index.values)):
|
286
|
-
resd[e] = res.values[i]
|
287
|
-
self.util.debug(f"{resd})")
|
nkululeko/models/model.py
CHANGED
@@ -3,15 +3,11 @@ import ast
|
|
3
3
|
import pickle
|
4
4
|
import random
|
5
5
|
|
6
|
-
from joblib import parallel_backend
|
7
6
|
import numpy as np
|
8
7
|
import pandas as pd
|
9
|
-
from sklearn.model_selection import GridSearchCV
|
10
|
-
from sklearn.model_selection import LeaveOneGroupOut
|
11
|
-
from sklearn.model_selection import StratifiedKFold
|
12
8
|
import sklearn.utils
|
13
|
-
|
14
|
-
import
|
9
|
+
from joblib import parallel_backend
|
10
|
+
from sklearn.model_selection import GridSearchCV, LeaveOneGroupOut, StratifiedKFold
|
15
11
|
|
16
12
|
import nkululeko.glob_conf as glob_conf
|
17
13
|
from nkululeko.reporting.reporter import Reporter
|
@@ -305,15 +301,8 @@ class Model:
|
|
305
301
|
def get_type(self):
|
306
302
|
return "generic"
|
307
303
|
|
308
|
-
def predict_sample(self, features
|
309
|
-
"""Predict
|
310
|
-
|
311
|
-
Args:
|
312
|
-
features (np.ndarray): The feature vector of the sample to predict.
|
313
|
-
|
314
|
-
Returns:
|
315
|
-
dict: A dictionary containing the predicted class probabilities or value.
|
316
|
-
"""
|
304
|
+
def predict_sample(self, features):
|
305
|
+
"""Predict one sample"""
|
317
306
|
prediction = {}
|
318
307
|
if self.util.exp_is_classification():
|
319
308
|
# get the class probabilities
|
@@ -347,30 +336,3 @@ class Model:
|
|
347
336
|
self.set_id(run, epoch)
|
348
337
|
with open(path, "rb") as handle:
|
349
338
|
self.clf = pickle.load(handle)
|
350
|
-
|
351
|
-
# next function exports the model to onnx
|
352
|
-
def export_onnx(self, onnx_path, input_shape=None):
|
353
|
-
"""Export the trained sklearn model to ONNX format.
|
354
|
-
|
355
|
-
Args:
|
356
|
-
onnx_path (str): Path to save the ONNX model.
|
357
|
-
input_shape (tuple, optional): Shape of the input features. If None, inferred from feats_train.
|
358
|
-
"""
|
359
|
-
import skl2onnx
|
360
|
-
from skl2onnx import convert_sklearn
|
361
|
-
from skl2onnx.common.data_types import FloatTensorType
|
362
|
-
|
363
|
-
if not hasattr(self, "clf"):
|
364
|
-
self.util.error("No trained model found to export.")
|
365
|
-
return
|
366
|
-
|
367
|
-
if input_shape is None:
|
368
|
-
n_features = self.feats_train.shape[1]
|
369
|
-
initial_type = [("input", FloatTensorType([None, n_features]))]
|
370
|
-
else:
|
371
|
-
initial_type = [("input", FloatTensorType(input_shape))]
|
372
|
-
|
373
|
-
onnx_model = convert_sklearn(self.clf, initial_types=initial_type)
|
374
|
-
with open(audeer.path(onnx_path), "wb") as f:
|
375
|
-
f.write(onnx_model.SerializeToString())
|
376
|
-
self.util.debug(f"Model exported to ONNX at {onnx_path}")
|