PyPI - nkululeko - Versions diffs - 0.93.13__py3-none-any.whl → 0.93.14__py3-none-any.whl - Mend

nkululeko 0.93.13py3-none-any.whl → 0.93.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.93.13"
+VERSION="0.93.14"
 SAMPLING_RATE = 16000

nkululeko/data/dataset_csv.py CHANGED Viewed

@@ -3,19 +3,20 @@ import ast
 import os
 import os.path
-import audformat.utils
 import pandas as pd
-import nkululeko.glob_conf as glob_conf
+import audformat.utils
 from nkululeko.data.dataset import Dataset
+import nkululeko.glob_conf as glob_conf
 from nkululeko.reporting.report_item import ReportItem
 class Dataset_CSV(Dataset):
-    """Class to represent datasets stored as a csv file"""
+    """Class to represent datasets stored as a csv file."""
     def load(self):
-        """Load the dataframe with files, speakers and task labels"""
+        """Load the dataframe with files, speakers and task labels."""
         self.util.debug(f"loading {self.name}")
         self.got_target, self.got_speaker, self.got_gender = False, False, False
         data_file = self.util.config_val_data(self.name, "", "")

nkululeko/plots.py CHANGED Viewed

@@ -24,8 +24,10 @@ class Plots:
         self.format = self.util.config_val("PLOT", "format", "png")
         self.target = self.util.config_val("DATA", "target", "emotion")
         self.with_ccc = eval(self.util.config_val("PLOT", "ccc", "False"))
+        self.type_s = "samples"
     def plot_distributions_speaker(self, df):
+        self.type_s = "speaker"
         df_speakers = pd.DataFrame()
         pd.options.mode.chained_assignment = None  # default='warn'
         for s in df.speaker.unique():
@@ -301,11 +303,18 @@ class Plots:
             plot_df = plot_df.rename(columns={cont_col: self.target})
             cont_col = self.target
         dist_type = self.util.config_val("EXPL", "dist_type", "kde")
-        cats, cat_str, es = su.get_effect_size(plot_df, cat_col, cont_col)
+        max_cat, cat_str, effect_results = su.get_effect_size(
+            plot_df, cat_col, cont_col
+        )
+        self.util.debug(effect_results)
+        self.util.print_results_to_store(
+            f"cohens-d_{self.type_s}", str(effect_results) + "\n"
+        )
+        es = effect_results[max_cat]
         model_type = self.util.get_model_type()
         if dist_type == "hist" and model_type != "tree":
             ax = sns.histplot(plot_df, x=cont_col, hue=cat_col, kde=True)
-            caption = f"{ylab} {plot_df.shape[0]}. {cat_str} ({cats}):" f" {es}"
+            caption = f"{ylab} {plot_df.shape[0]}. {cat_str} ({max_cat}):" f" {es}"
             ax.set_title(caption)
             ax.set_xlabel(f"{cont_col}")
             ax.set_ylabel(f"number of {ylab}")
@@ -319,7 +328,7 @@ class Plots:
                 warn_singular=False,
             )
             ax.set(xlabel=f"{cont_col}")
-            caption = f"{ylab} {plot_df.shape[0]}. {cat_str} ({cats}):" f" {es}"
+            caption = f"{ylab} {plot_df.shape[0]}. {cat_str} ({max_cat}):" f" {es}"
             ax.figure.suptitle(caption)
         return ax, caption

nkululeko/utils/stats.py CHANGED Viewed

@@ -1,7 +1,8 @@
-import math
 from itertools import combinations
+import math
 import numpy as np
+import pandas as pd
 def check_na(a):
@@ -14,9 +15,8 @@ def check_na(a):
         return a
-def cohen_d(d1, d2):
-    """
-    Compute Cohen's d from two distributions of real valued arrays.
+def cohen_d(d1: np.array, d2: np.array) -> float:
+    """Compute Cohen's d from two distributions of real valued arrays.
     Args:
         d1: one array
@@ -50,7 +50,9 @@ def all_combinations(items_list):
     return result
-def get_effect_size(df, target, variable):
+def get_effect_size(
+    df: pd.DataFrame, target: str, variable: str
+) -> tuple[str, str, dict]:
     """Get the effect size as Cohen's D.
     Effect size is computed  from a real numbered variable on a categorical target.
@@ -68,10 +70,10 @@ def get_effect_size(df, target, variable):
     for c in categories:
         cats[c] = df[df[target] == c][variable].values
     combos = all_combinations(categories)
-    results = {}
+    results = {categories[0]: 0}
     if len(categories) == 1:
         cat_s = cohens_D_to_string(0)
-        return categories[0], cat_s, 0
+        return categories[0], cat_s, results
     else:
         for combo in combos:
             one = combo[0]
@@ -79,10 +81,10 @@ def get_effect_size(df, target, variable):
             results[f"{one}-{other}"] = cohen_d(cats[one], cats[other])
         max_cat = max(results, key=results.get)
         cat_s = cohens_D_to_string(float(results[max_cat]))
-    return max_cat, cat_s, results[max_cat]
+    return max_cat, cat_s, results
-def cohens_D_to_string(val):
+def cohens_D_to_string(val: float) -> str:
     if val < 0.2:
         rval = "no effect"
     elif val < 0.2:

nkululeko/utils/util.py CHANGED Viewed

@@ -160,6 +160,21 @@ class Util:
         pred_name = self.get_model_description()
         return f"{results_dir}/pred_{target}_{pred_name}.csv"
+    def print_results_to_store(self, name: str, contents: str) -> str:
+        """Write contents to a result file.
+        Args:
+            name (str): the (sub) name of the file_
+        Returns:
+            str: The path to the file
+        """
+        results_dir = self.get_path("res_dir")
+        pred_name = self.get_model_description()
+        path = os.path.join(results_dir, f"{name}_{pred_name}.txt")
+        with open(path, "a") as f:
+            f.write(contents)
     def is_categorical(self, pd_series):
         """Check if a dataframe column is categorical."""
         return pd_series.dtype.name == "object" or isinstance(

{nkululeko-0.93.13.dist-info → nkululeko-0.93.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: nkululeko
-Version: 0.93.13
+Version: 0.93.14
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt

{nkululeko-0.93.13.dist-info → nkululeko-0.93.14.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
 nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
 nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
-nkululeko/constants.py,sha256=B_SoEW_E21VyJqFUyh_XG4GvVYNPEsgUF31slyJ2fFY,40
+nkululeko/constants.py,sha256=o5ER1luWQ6hCEUmTnLGYzK-uGjv9VCnzzDYq2KIxo0o,40
 nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
 nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
 nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
@@ -20,7 +20,7 @@ nkululeko/modelrunner.py,sha256=lJy-xM4QfDDWeL0dLTE_VIb4sYrnd_Z_yJRK3wwohQA,1119
 nkululeko/multidb.py,sha256=sO6OwJn8sn1-C-ig3thsIL8QMWHdV9SnJhDodKjeKrI,6876
 nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
 nkululeko/nkululeko.py,sha256=M7baIq2nAoi6dEoBL4ATEuqAs5U1fvl_hyqAl5DybAQ,2040
-nkululeko/plots.py,sha256=2G5yNR3Q3qWDt8ncKwKUZBLE-O1rbGUiG6omwfFudVk,26138
+nkululeko/plots.py,sha256=jutO1nC7EMXGEPXCivVGhgrk3I0WrYrvIWyClm7ASaE,26440
 nkululeko/predict.py,sha256=MLnHEyFmSiHLLs-HDczag8Vu3zKF5T1rXLKdZZJ6py8,2083
 nkululeko/resample.py,sha256=rn3-M1A-iwVGibfQNGyeYNa7briD24lIN9Szq_1uTJo,5194
 nkululeko/runmanager.py,sha256=AswmORVUkCIH0gTx6zEyufvFATQBS8C5TXo2erSNdVg,7611
@@ -50,7 +50,7 @@ nkululeko/autopredict/ap_valence.py,sha256=WrW4Ltqi_odW49_4QEVKkfnrcztLIVZ4cXIEH
 nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
 nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/data/dataset.py,sha256=G6RFK2msSVHxpzDm8gZSAD4GK6ieMS5fTbqVS-NOFuY,30081
-nkululeko/data/dataset_csv.py,sha256=p2b4eS5R2Q5zdOIc56NRRU2PTFXSRt0qrdHGafHkWKo,4830
+nkululeko/data/dataset_csv.py,sha256=AIbtB6pGk5BSQGIgfokZ7tEGFjmuOq5w2XumRSimVWs,4833
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/feat_extract/feats_agender.py,sha256=onfAQ6-xx_mFMJXEF1IX8cHBmGtGeX6weJmxbkfh1_o,3184
 nkululeko/feat_extract/feats_agender_agender.py,sha256=_YQv1qw--3uQfnyTQDCwlmPRnrhdMhgXbYK2yQtseW0,3464
@@ -110,11 +110,11 @@ nkululeko/segmenting/seg_pyannote.py,sha256=6IPbgjnGOz9juzEKDTZN3PSipX4t6Mz-DILA
 nkululeko/segmenting/seg_silero.py,sha256=ulodnvtRq5MLHDxy_RmAK4tJg6h1d-mPq-uCPFkGVKg,4258
 nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
-nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
-nkululeko/utils/util.py,sha256=wFDslqxpCVDwi6LBakIFDDy1kYsxt5G7ykE38CocmtA,16880
-nkululeko-0.93.13.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.93.13.dist-info/METADATA,sha256=G0DPQrKRoSO4lB0NjR5hjc715sggueUA3lcokR1NyUQ,1148
-nkululeko-0.93.13.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-nkululeko-0.93.13.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
-nkululeko-0.93.13.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.93.13.dist-info/RECORD,,
+nkululeko/utils/stats.py,sha256=3Fyx8q8BSKYmiufT6OkRug9RATWmGrr9BaX_y8jziWo,3074
+nkululeko/utils/util.py,sha256=J_dmqkOVAW63Q7IFUBj0BgygKzMXA0nORxY62-o8z_g,17360
+nkululeko-0.93.14.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.93.14.dist-info/METADATA,sha256=2cqjRLPed00dMPGG8SDMHG9k0w1gx0bItfrYsGk4rR4,1148
+nkululeko-0.93.14.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+nkululeko-0.93.14.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
+nkululeko-0.93.14.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.93.14.dist-info/RECORD,,

{nkululeko-0.93.13.dist-info → nkululeko-0.93.14.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.93.13.dist-info → nkululeko-0.93.14.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.93.13.dist-info → nkululeko-0.93.14.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nkululeko-0.93.13.dist-info → nkululeko-0.93.14.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.93.13__py3-none-any.whl → 0.93.14__py3-none-any.whl

nkululeko 0.93.13py3-none-any.whl → 0.93.14py3-none-any.whl