PyPI - nkululeko - Versions diffs - 0.89.0__py3-none-any.whl → 0.89.2__py3-none-any.whl - Mend

nkululeko 0.89.0py3-none-any.whl → 0.89.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

nkululeko/constants.py +1 -1
nkululeko/ensemble.py +13 -1
nkululeko/explore.py +3 -1
nkululeko/feat_extract/feats_analyser.py +24 -7
nkululeko/modelrunner.py +2 -2
nkululeko/reporting/reporter.py +4 -3
{nkululeko-0.89.0.dist-info → nkululeko-0.89.2.dist-info}/METADATA +9 -1
{nkululeko-0.89.0.dist-info → nkululeko-0.89.2.dist-info}/RECORD +11 -11
{nkululeko-0.89.0.dist-info → nkululeko-0.89.2.dist-info}/WHEEL +1 -1
{nkululeko-0.89.0.dist-info → nkululeko-0.89.2.dist-info}/LICENSE +0 -0
{nkululeko-0.89.0.dist-info → nkululeko-0.89.2.dist-info}/top_level.txt +0 -0

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.89.0"
+VERSION="0.89.2"
 SAMPLING_RATE = 16000

nkululeko/ensemble.py CHANGED Viewed

@@ -18,6 +18,7 @@ Raises:
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from typing import List
 import configparser
 import time
@@ -26,7 +27,16 @@ from pathlib import Path
 import numpy as np
 import pandas as pd
-from sklearn.metrics import balanced_accuracy_score
+import matplotlib.pyplot as plt
+from sklearn.metrics import(
+    RocCurveDisplay,
+    balanced_accuracy_score,
+    classification_report,
+    auc,
+    roc_auc_score,
+    roc_curve
+)
 from nkululeko.constants import VERSION
 from nkululeko.experiment import Experiment
@@ -284,6 +294,8 @@ def ensemble_predictions(
     predicted = ensemble_preds["predicted"]
     uar = balanced_accuracy_score(truth, predicted)
     acc = (truth == predicted).mean()
+    # print classification report
+    Util("ensemble").debug(f"\n {classification_report(truth, predicted, digits=4)}")
     Util("ensemble").debug(f"{method}: UAR: {uar:.3f}, ACC: {acc:.3f}")
     return ensemble_preds

nkululeko/explore.py CHANGED Viewed

@@ -91,7 +91,9 @@ def main(src_dir):
             # these investigations need features to explore
             expr.extract_feats()
             needs_feats = True
-        # explore
+            # explore
+            expr.init_runmanager()
+            expr.runmgr.do_runs()
     expr.analyse_features(needs_feats)
     expr.store_report()
     print("DONE")

nkululeko/feat_extract/feats_analyser.py CHANGED Viewed

@@ -50,19 +50,32 @@ class FeatureAnalyser:
         name = "my_shap_values"
         if not self.util.exist_pickle(name):
+            # get model name
+            model_name = self.util.get_model_type()
+            if hasattr(model, "predict_shap"):
+                model_func = model.predict_shap
+            elif hasattr(model, "clf"):
+                model_func = model.clf.predict
+            else:
+                raise Exception("Model not supported for SHAP analysis")
+            self.util.debug(f"using SHAP explainer for {model_name} model")
             explainer = shap.Explainer(
-                model.predict_shap,
+                model_func,
                 self.features,
                 output_names=glob_conf.labels,
                 algorithm="permutation",
                 npermutations=5,
             )
             self.util.debug("computing SHAP values...")
             shap_values = explainer(self.features)
             self.util.to_pickle(shap_values, name)
         else:
             shap_values = self.util.from_pickle(name)
+        # plt.figure()
+        plt.close('all')
         plt.tight_layout()
         shap.plots.bar(shap_values)
         fig_dir = self.util.get_path("fig_dir") + "../"  # one up because of the runs
@@ -71,7 +84,8 @@ class FeatureAnalyser:
         filename = f"_SHAP_{model.name}"
         filename = f"{fig_dir}{exp_name}{filename}.{format}"
         plt.savefig(filename)
-        self.util.debug(f"plotted SHAP feature importance tp {filename}")
+        plt.close()
+        self.util.debug(f"plotted SHAP feature importance to {filename}")
     def analyse(self):
         models = ast.literal_eval(self.util.config_val("EXPL", "model", "['log_reg']"))
@@ -139,7 +153,7 @@ class FeatureAnalyser:
                 elif model_s == "svm":
                     from sklearn.svm import SVC
-                    c = float(self.util.config_val("MODEL", "C_val", "0.001"))
+                    c = float(self.util.config_val("MODEL", "C_val", "1.0"))
                     model = SVC(kernel="linear", C=c, gamma="scale")
                     result_importances[model_s] = self._get_importance(
                         model, permutation
@@ -205,7 +219,7 @@ class FeatureAnalyser:
                         model, permutation
                     )
                 elif model_s == "xgr":
-                    from xgboost import XGBClassifier
+                    from xgboost import XGBRegressor
                     model = XGBRegressor()
                     result_importances[model_s] = self._get_importance(
@@ -270,12 +284,14 @@ class FeatureAnalyser:
             )
         )
+        # print feature importance values to file and debug and save to result
+        self.util.debug(f"Importance features from {model_name}: features = \n{df_imp['feats'].values.tolist()}")
         # result file
         res_dir = self.util.get_path("res_dir")
         filename = f"_EXPL_{model_name}"
         if permutation:
             filename += "_perm"
-        filename = f"{res_dir}{self.util.get_exp_name(only_data=True)}{filename}_{model_name}.txt"
+        filename = f"{res_dir}{self.util.get_exp_name(only_data=True)}{filename}_{max_feat_num}_fi.txt"
         with open(filename, "w") as text_file:
             text_file.write(
                 "features in order of decreasing importance according to model"
@@ -283,7 +299,8 @@ class FeatureAnalyser:
             )
         df_imp.to_csv(filename, mode="a")
+        self.util.debug(f"Saved feature importance values to {filename}")
         # check if feature distributions should be plotted
         plot_feats = self.util.config_val("EXPL", "feature_distributions", False)
         if plot_feats:

nkululeko/modelrunner.py CHANGED Viewed

@@ -53,8 +53,8 @@ class Modelrunner:
             # epochs are handled by Huggingface API
             self.model.train()
             report = self.model.predict()
-            # todo: findout the best epoch, no need
-            # since oad_best_model_at_end is given in training args
+            # todo: findout the best epoch -> no need
+            # since load_best_model_at_end is given in training args
             epoch = epoch_num
             report.set_id(self.run, epoch)
             plot_name = self.util.get_plot_name() + f"_{self.run}_{epoch:03d}_cnf"

nkululeko/reporting/reporter.py CHANGED Viewed

@@ -402,7 +402,7 @@ class Reporter:
                 )
                 # print classifcation report in console
                 self.util.debug(
-                    f"\n {classification_report(self.truths, self.preds, target_names=labels)}"
+                    f"\n {classification_report(self.truths, self.preds, target_names=labels, digits=4)}"
                 )
             except ValueError as e:
                 self.util.debug(
@@ -422,16 +422,17 @@ class Reporter:
                 if len(np.unique(self.truths)) == 2:
                     fpr, tpr, _ = roc_curve(self.truths, self.preds)
                     auc_score = auc(fpr, tpr)
+                    plot_path = f"{fig_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}_roc.{self.format}"
+                    plt.figure()
                     display = RocCurveDisplay(
                         fpr=fpr,
                         tpr=tpr,
                         roc_auc=auc_score,
                         estimator_name=f"{self.model_type} estimator",
                     )
-                    # save plot
-                    plot_path = f"{fig_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}_roc.{self.format}"
                     display.plot(ax=None)
                     plt.savefig(plot_path)
+                    plt.close()
                     self.util.debug(f"Saved ROC curve to {plot_path}")
                     pauc_score = roc_auc_score(self.truths, self.preds, max_fpr=0.1)
                     auc_pauc = f"auc: {auc_score:.3f}, pauc: {pauc_score:.3f} from epoch: {epoch}"

{nkululeko-0.89.0.dist-info → nkululeko-0.89.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.89.0
+Version: 0.89.2
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -356,6 +356,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.89.2
+--------------
+* fix shap value calculation
+Version 0.89.1
+--------------
+* print and save result of feature importance
 Version 0.89.0
 --------------
 * added Roc plots and classification report on Debug

{nkululeko-0.89.0.dist-info → nkululeko-0.89.2.dist-info}/RECORD RENAMED Viewed

@@ -2,19 +2,19 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=YlaNUy3dWo6v0O02alzu9cMApavzOisvDjFSaYzGepo,39
+nkululeko/constants.py,sha256=WFGVylIst9Be_eHBZ9GiR43Qi4CdRySmNUzyNox6aMM,39
 nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
-nkululeko/ensemble.py,sha256=cVz8hWd2m7poyS0lTIfrsha0K8U-hd6eiBWMqDOAlt8,12669
+nkululeko/ensemble.py,sha256=MayHpngGH_FTvSxUsH3NdxJd6WBAosGRFQeQ7cMjIco,12922
 nkululeko/experiment.py,sha256=L4PzoScPLG2xTyniVy9evcBy_8CIe3RTeTEUVTqiuvQ,31186
-nkululeko/explore.py,sha256=_GOgcRaPvh2xBbKPAkSJjYzgHhD_xb3ZCB6M1MPA6ao,3867
+nkululeko/explore.py,sha256=AbTVDmuDIaLfALQGvDW1yndcw2ikaEVEZ_fJVuUS070,3940
 nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
 nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
 nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
 nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
 nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
-nkululeko/modelrunner.py,sha256=cKYD9a7MRoBxfqUy3X8kf6rGTYho-33In8I9YkzMOo8,11196
+nkululeko/modelrunner.py,sha256=lJy-xM4QfDDWeL0dLTE_VIb4sYrnd_Z_yJRK3wwohQA,11199
 nkululeko/multidb.py,sha256=CCjmVsZyvydgOztFlaeBvOJH8nsvU-sPQdFAw8-q0U4,6752
 nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
 nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
@@ -51,7 +51,7 @@ nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/feat_extract/feats_agender.py,sha256=tMK3_qs8adylNNSR0CS1RjU9RxmpumLqmuyzmc2ZYjA,3184
 nkululeko/feat_extract/feats_agender_agender.py,sha256=19NoRT0KJ8WoZ3EabTYexXymD7bDy58-H20jYmdqjD0,3498
-nkululeko/feat_extract/feats_analyser.py,sha256=Y9hMpZ9WsQOrxTP3B1diHnzMeOgwbVpVFWVlIyhHMJs,12722
+nkululeko/feat_extract/feats_analyser.py,sha256=rSsN6kcDUv64DaTl2DvReXF3_g7CtSwiPKgMzbJPqVI,13516
 nkululeko/feat_extract/feats_ast.py,sha256=ycJn5eSVOxcEpmeHVk0FPB8q5XiTC8VSKz61L9n0Wa4,4638
 nkululeko/feat_extract/feats_auddim.py,sha256=ulP_o4SGeQDFTs8YYCGKgccARAo6-wcjPK6-hhGjmn8,3155
 nkululeko/feat_extract/feats_audmodel.py,sha256=aRGTBDKdYaTT_9xDaFZqpuyPhzxSNN_3b1PJDUHtYW4,3180
@@ -98,7 +98,7 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
 nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
 nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
 nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
-nkululeko/reporting/reporter.py,sha256=xFyGj6gQ8T1WB3w3tJ0awlgQcq1e3IKXEIfl_DvOngg,19996
+nkululeko/reporting/reporter.py,sha256=oodLaNZXqPpfoRqVxTldYcx68oN35OGgy-vvbAuY-yI,20039
 nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
 nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
@@ -107,8 +107,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
 nkululeko/utils/util.py,sha256=363Lgmcg6fPKCGbroX0DDyW_zcYNx-Ayqv67qdpfYcw,16710
-nkululeko-0.89.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.89.0.dist-info/METADATA,sha256=IoIlF0i109BRy7ki2K9Heq-OWVeyhg7rModVCY2fQp0,40590
-nkululeko-0.89.0.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
-nkululeko-0.89.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.89.0.dist-info/RECORD,,
+nkululeko-0.89.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.89.2.dist-info/METADATA,sha256=00CLy_4Wm7IktJy7dAkKrXkCMi0f1HUXCoQYMNcp2kw,40729
+nkululeko-0.89.2.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
+nkululeko-0.89.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.89.2.dist-info/RECORD,,

{nkululeko-0.89.0.dist-info → nkululeko-0.89.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (74.0.0)
+Generator: setuptools (74.1.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{nkululeko-0.89.0.dist-info → nkululeko-0.89.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.89.0.dist-info → nkululeko-0.89.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.89.0__py3-none-any.whl → 0.89.2__py3-none-any.whl

nkululeko 0.89.0py3-none-any.whl → 0.89.2py3-none-any.whl