PyPI - nkululeko - Versions diffs - 0.88.12__py3-none-any.whl → 0.89.1__py3-none-any.whl - Mend

nkululeko 0.88.12py3-none-any.whl → 0.89.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

nkululeko/constants.py +1 -1
nkululeko/ensemble.py +9 -1
nkululeko/feat_extract/feats_analyser.py +7 -4
nkululeko/models/model_tree.py +3 -1
nkululeko/reporting/reporter.py +18 -0
nkululeko/utils/util.py +1 -1
{nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/METADATA +18 -12
{nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/RECORD +11 -11
{nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/WHEEL +1 -1
{nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/LICENSE +0 -0
{nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/top_level.txt +0 -0

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.88.12"
+VERSION="0.89.1"
 SAMPLING_RATE = 16000

nkululeko/ensemble.py CHANGED Viewed

@@ -26,7 +26,11 @@ from pathlib import Path
 import numpy as np
 import pandas as pd
-from sklearn.metrics import balanced_accuracy_score
+from sklearn.metrics import(
+    balanced_accuracy_score,
+    classification_report,
+    f1_score
+)
 from nkululeko.constants import VERSION
 from nkululeko.experiment import Experiment
@@ -284,6 +288,10 @@ def ensemble_predictions(
     predicted = ensemble_preds["predicted"]
     uar = balanced_accuracy_score(truth, predicted)
     acc = (truth == predicted).mean()
+    # print classification report
+    Util("ensemble").debug(f"\n {classification_report(truth, predicted)}")
+    # f1 = f1_score(truth, predicted, pos_label='p')
+    # Util("ensemble").debug(f"F1: {f1:.3f}")
     Util("ensemble").debug(f"{method}: UAR: {uar:.3f}, ACC: {acc:.3f}")
     return ensemble_preds

nkululeko/feat_extract/feats_analyser.py CHANGED Viewed

@@ -139,7 +139,7 @@ class FeatureAnalyser:
                 elif model_s == "svm":
                     from sklearn.svm import SVC
-                    c = float(self.util.config_val("MODEL", "C_val", "0.001"))
+                    c = float(self.util.config_val("MODEL", "C_val", "1.0"))
                     model = SVC(kernel="linear", C=c, gamma="scale")
                     result_importances[model_s] = self._get_importance(
                         model, permutation
@@ -205,7 +205,7 @@ class FeatureAnalyser:
                         model, permutation
                     )
                 elif model_s == "xgr":
-                    from xgboost import XGBClassifier
+                    from xgboost import XGBRegressor
                     model = XGBRegressor()
                     result_importances[model_s] = self._get_importance(
@@ -270,12 +270,14 @@ class FeatureAnalyser:
             )
         )
+        # print feature importance values to file and debug and save to result
+        self.util.debug(f"Importance features from {model_name}: features = \n{df_imp['feats'].values.tolist()}")
         # result file
         res_dir = self.util.get_path("res_dir")
         filename = f"_EXPL_{model_name}"
         if permutation:
             filename += "_perm"
-        filename = f"{res_dir}{self.util.get_exp_name(only_data=True)}{filename}_{model_name}.txt"
+        filename = f"{res_dir}{self.util.get_exp_name(only_data=True)}{filename}_{max_feat_num}_fi.txt"
         with open(filename, "w") as text_file:
             text_file.write(
                 "features in order of decreasing importance according to model"
@@ -283,7 +285,8 @@ class FeatureAnalyser:
             )
         df_imp.to_csv(filename, mode="a")
+        self.util.debug(f"Saved feature importance values to {filename}")
         # check if feature distributions should be plotted
         plot_feats = self.util.config_val("EXPL", "feature_distributions", False)
         if plot_feats:

nkululeko/models/model_tree.py CHANGED Viewed

@@ -12,4 +12,6 @@ class Tree_model(Model):
     def __init__(self, df_train, df_test, feats_train, feats_test):
         super().__init__(df_train, df_test, feats_train, feats_test)
         self.name = "tree"
-        self.clf = DecisionTreeClassifier()  # set up the classifier
+        self.clf = DecisionTreeClassifier(
+            random_state=42
+        )  # set up the classifier

nkululeko/reporting/reporter.py CHANGED Viewed

@@ -27,6 +27,7 @@ from sklearn.metrics import (
     r2_score,
     roc_auc_score,
     roc_curve,
+    RocCurveDisplay,
 )
 import nkululeko.glob_conf as glob_conf
@@ -75,6 +76,7 @@ class Reporter:
         self.result = Result(0, 0, 0, 0, "unknown")
         self.run = run
         self.epoch = epoch
+        self.model_type = self.util.get_model_type()
         self._set_metric()
         self.filenameadd = ""
         self.cont_to_cat = False
@@ -387,6 +389,7 @@ class Reporter:
             epoch = self.epoch
         """Print all evaluation values to text file."""
         res_dir = self.util.get_path("res_dir")
+        fig_dir = self.util.get_path("fig_dir")
         file_name = f"{res_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}.txt"
         if self.util.exp_is_classification():
             labels = glob_conf.labels
@@ -397,6 +400,10 @@ class Reporter:
                     target_names=labels,
                     output_dict=True,
                 )
+                # print classifcation report in console
+                self.util.debug(
+                    f"\n {classification_report(self.truths, self.preds, target_names=labels)}"
+                )
             except ValueError as e:
                 self.util.debug(
                     "Reporter: caught a ValueError when trying to get"
@@ -415,6 +422,17 @@ class Reporter:
                 if len(np.unique(self.truths)) == 2:
                     fpr, tpr, _ = roc_curve(self.truths, self.preds)
                     auc_score = auc(fpr, tpr)
+                    display = RocCurveDisplay(
+                        fpr=fpr,
+                        tpr=tpr,
+                        roc_auc=auc_score,
+                        estimator_name=f"{self.model_type} estimator",
+                    )
+                    # save plot
+                    plot_path = f"{fig_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}_roc.{self.format}"
+                    display.plot(ax=None)
+                    plt.savefig(plot_path)
+                    self.util.debug(f"Saved ROC curve to {plot_path}")
                     pauc_score = roc_auc_score(self.truths, self.preds, max_fpr=0.1)
                     auc_pauc = f"auc: {auc_score:.3f}, pauc: {pauc_score:.3f} from epoch: {epoch}"
                     self.util.debug(auc_pauc)

nkululeko/utils/util.py CHANGED Viewed

@@ -175,7 +175,7 @@ class Util:
         """Get the experiment directory."""
         root = os.path.join(self.config["EXP"]["root"], "")
         name = self.config["EXP"]["name"]
-        dir_name = f"{root}{name}"
+        dir_name = f"{root}/{name}"
         audeer.mkdir(dir_name)
         return dir_name

{nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.88.12
+Version: 0.89.1
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -204,7 +204,7 @@ All of them take *--config <my_config.ini>* as an argument.
   * *configurations*: which experiments to combine
   * *--method* (optional): majority_voting, mean (default), max, sum, uncertainty, uncertainty_weighted, confidence_weighted, performance_weighted
   * *--threshold*: uncertainty threshold (1.0 means no threshold)
-  * *--weightes*: weights for performance_weighted method (could be from previous UAR, ACC)
+  * *--weights*: weights for performance_weighted method (could be from previous UAR, ACC)
   * *--outfile* (optional): name of CSV file for output (default: ensemble_result.csv)
   * *--no_labels* (optional): indicate that no ground truth is given
 * **nkululeko.multidb**: do [multiple experiments](http://blog.syntheticspeech.de/2024/01/02/nkululeko-compare-several-databases/), comparing several databases cross and in itself
@@ -220,14 +220,11 @@ All of them take *--config <my_config.ini>* as an argument.
 * **nkululeko.predict**: [predict features](http://blog.syntheticspeech.de/2023/08/16/nkululeko-how-to-predict-labels-for-your-data-from-existing-models-and-check-them/) like SNR, MOS, arousal/valence, age/gender, with DNN models
 * **nkululeko.segment**: [segment a database](http://blog.syntheticspeech.de/2023/07/14/nkululeko-segmenting-a-database/) based on VAD (voice activity detection)
 * **nkululeko.resample**: check on all [sampling rates and change](http://blog.syntheticspeech.de/2023/08/31/how-to-fix-different-sampling-rates-in-a-dataset-with-nkululeko/) to 16kHz
-* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line.
-  * usage: nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET]
-                    [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
+* **nkululeko.nkuluflag**: a convenient module to specify configuration parameters on the command-line. Usage:
+  ```bash
+  $ python -m nkululeko.nkuluflag.py [-h] [--config CONFIG] [--data [DATA ...]] [--label [LABEL ...]] [--tuning_params [TUNING_PARAMS ...]] [--layers [LAYERS ...]] [--model MODEL] [--feat FEAT] [--set SET] [--with_os WITH_OS] [--target TARGET] [--epochs EPOCHS] [--runs RUNS] [--learning_rate LEARNING_RATE] [--drop DROP]
+  ```
 There's my [blog](http://blog.syntheticspeech.de/?s=nkululeko) with tutorials:
 * [Introduction](http://blog.syntheticspeech.de/2021/08/04/machine-learning-experiment-framework/)
@@ -359,8 +356,17 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
-Version 0.88.12
+Version 0.89.1
 --------------
+* print and save result of feature importance
+Version 0.89.0
+--------------
+* added Roc plots and classification report on Debug
+Version 0.88.12
+---------------
 * added n_jobs for sklearn processing
 * re_named num_workers n_jobs
@@ -833,9 +839,9 @@ Version 0.66.3
 Version 0.66.2
 --------------
-* enabled data-pathes with quotes
+* enabled data-pacthes with quotes
 * enabled missing category labels
-* used tgdm for progress display
+* used tqdm for progress display
 Version 0.66.1
 --------------

{nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/RECORD RENAMED Viewed

@@ -2,11 +2,11 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=7-twiCGYx2fTsGKDZex9QsUXRhaudHMRFwoV8HehRcg,40
+nkululeko/constants.py,sha256=nRA0bWrvi-5tXm8QWv4dzDE-3sujMiz26U4QgSVuck0,39
 nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=zs1bjhpnKuNCPLJeiyDm19ME1NEDOQT3QNeyVKJq9Yc,4882
-nkululeko/ensemble.py,sha256=cVz8hWd2m7poyS0lTIfrsha0K8U-hd6eiBWMqDOAlt8,12669
+nkululeko/ensemble.py,sha256=egtOFxEp7gjuM5cKBfETnhTn1-7_4zWBPEah65K1C3U,12927
 nkululeko/experiment.py,sha256=L4PzoScPLG2xTyniVy9evcBy_8CIe3RTeTEUVTqiuvQ,31186
 nkululeko/explore.py,sha256=_GOgcRaPvh2xBbKPAkSJjYzgHhD_xb3ZCB6M1MPA6ao,3867
 nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
@@ -51,7 +51,7 @@ nkululeko/data/dataset_csv.py,sha256=UGEpi__eT2KFS6Fop6N4HkMrzO-u5VP71gt44kwZavo
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/feat_extract/feats_agender.py,sha256=tMK3_qs8adylNNSR0CS1RjU9RxmpumLqmuyzmc2ZYjA,3184
 nkululeko/feat_extract/feats_agender_agender.py,sha256=19NoRT0KJ8WoZ3EabTYexXymD7bDy58-H20jYmdqjD0,3498
-nkululeko/feat_extract/feats_analyser.py,sha256=Y9hMpZ9WsQOrxTP3B1diHnzMeOgwbVpVFWVlIyhHMJs,12722
+nkululeko/feat_extract/feats_analyser.py,sha256=eW0v7Boybfj2gXi77MPjaLyHUQ1C42mx9hgoQeDwNac,12999
 nkululeko/feat_extract/feats_ast.py,sha256=ycJn5eSVOxcEpmeHVk0FPB8q5XiTC8VSKz61L9n0Wa4,4638
 nkululeko/feat_extract/feats_auddim.py,sha256=ulP_o4SGeQDFTs8YYCGKgccARAo6-wcjPK6-hhGjmn8,3155
 nkululeko/feat_extract/feats_audmodel.py,sha256=aRGTBDKdYaTT_9xDaFZqpuyPhzxSNN_3b1PJDUHtYW4,3180
@@ -88,7 +88,7 @@ nkululeko/models/model_mlp.py,sha256=CaR0PCRBcdCo_hhC5r9Q6IbVIApvtoRVrUdZsgzbx1M
 nkululeko/models/model_mlp_regression.py,sha256=YMHMWRlWL6iL8HdYe6rTAoAW6GwHBx3PDvysCZYj5tQ,10186
 nkululeko/models/model_svm.py,sha256=AzWksBRbIdpUuMbDnAh_YAXebewR5POj9AkB9VC40pI,1010
 nkululeko/models/model_svr.py,sha256=_YZeksqB3eBENGlg3g9RwYFlk9rQQ-XCeNBKLlGGVoE,725
-nkululeko/models/model_tree.py,sha256=rf16faUm4o2LJgkoYpeY998b8DQIvXZ73_m1IS3TnnE,417
+nkululeko/models/model_tree.py,sha256=KScDTGgkOePTZEcereB7bxQ47wIKhYI-xhTKCU4cKDk,454
 nkululeko/models/model_tree_reg.py,sha256=IgQcPTE-304HQLYSKPF8Z4ot_Ur9dH01fZjS0nXke_M,428
 nkululeko/models/model_tuned.py,sha256=vmNBkqvEH-4nnhY1REXDA9kA4vpZJzeRmGJFq7E3bLM,21340
 nkululeko/models/model_xgb.py,sha256=Thgx5ESdIok4v72mKh4plxpo4smGcKALWNCJTDScY0M,447
@@ -98,7 +98,7 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
 nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
 nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
 nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
-nkululeko/reporting/reporter.py,sha256=Gt8tEiDQ9rbsYAKb-EbKGVaogPWKKoAEkwtVeiFoTSA,19119
+nkululeko/reporting/reporter.py,sha256=xFyGj6gQ8T1WB3w3tJ0awlgQcq1e3IKXEIfl_DvOngg,19996
 nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
 nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
@@ -106,9 +106,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
 nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=eC9dMO-by6CDnGLHDBQu-2B4-BudZNJ0nnWGhKYdUMA,2968
-nkululeko/utils/util.py,sha256=CY45KIhpYsfYa9qXyqOYErjhS2fjYfaVzRTfOZZKzaQ,16709
-nkululeko-0.88.12.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.88.12.dist-info/METADATA,sha256=46Vj4H4faGcNqc-dMMpyDo_BB9q6yczjYaPpKtsXgNc,40494
-nkululeko-0.88.12.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-nkululeko-0.88.12.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.88.12.dist-info/RECORD,,
+nkululeko/utils/util.py,sha256=363Lgmcg6fPKCGbroX0DDyW_zcYNx-Ayqv67qdpfYcw,16710
+nkululeko-0.89.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.89.1.dist-info/METADATA,sha256=AuVssWNRMXlseH5xSzcls--AAYLFSeEbFtHbAFT2o_o,40667
+nkululeko-0.89.1.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
+nkululeko-0.89.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.89.1.dist-info/RECORD,,

{nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (72.1.0)
+Generator: setuptools (74.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.88.12.dist-info → nkululeko-0.89.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.88.12__py3-none-any.whl → 0.89.1__py3-none-any.whl

nkululeko 0.88.12py3-none-any.whl → 0.89.1py3-none-any.whl