PyPI - nkululeko - Versions diffs - 0.90.1__py3-none-any.whl → 0.90.3__py3-none-any.whl - Mend

nkululeko 0.90.1py3-none-any.whl → 0.90.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

nkululeko/augment.py +3 -7
nkululeko/constants.py +1 -1
nkululeko/demo.py +2 -3
nkululeko/explore.py +5 -12
nkululeko/export.py +3 -10
nkululeko/models/model_tuned.py +34 -8
nkululeko/nkululeko.py +7 -6
nkululeko/predict.py +3 -10
nkululeko/resample.py +24 -3
nkululeko/segment.py +3 -10
nkululeko/utils/util.py +2 -2
{nkululeko-0.90.1.dist-info → nkululeko-0.90.3.dist-info}/METADATA +11 -2
{nkululeko-0.90.1.dist-info → nkululeko-0.90.3.dist-info}/RECORD +17 -18
nkululeko-0.90.3.dist-info/entry_points.txt +10 -0
nkululeko/experiment_felix.py +0 -728
nkululeko/resample_cli.py +0 -99
{nkululeko-0.90.1.dist-info → nkululeko-0.90.3.dist-info}/LICENSE +0 -0
{nkululeko-0.90.1.dist-info → nkululeko-0.90.3.dist-info}/WHEEL +0 -0
{nkululeko-0.90.1.dist-info → nkululeko-0.90.3.dist-info}/top_level.txt +0 -0

nkululeko/augment.py CHANGED Viewed

@@ -83,17 +83,13 @@ def doit(config_file):
         print("DONE")
-def main(src_dir):
+def main():
     parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
     parser.add_argument("--config", default="exp.ini", help="The base configuration")
     args = parser.parse_args()
-    if args.config is not None:
-        config_file = args.config
-    else:
-        config_file = f"{src_dir}/exp.ini"
+    config_file = args.config if args.config is not None else "exp.ini"
     doit(config_file)
 if __name__ == "__main__":
-    cwd = os.path.dirname(os.path.abspath(__file__))
-    main(cwd)  # use this if you want to state the config file path on command line
+    main()

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION = "0.90.1"
+VERSION="0.90.3"
 SAMPLING_RATE = 16000

nkululeko/demo.py CHANGED Viewed

@@ -30,7 +30,7 @@ from nkululeko.experiment import Experiment
 from nkululeko.utils.util import Util
-def main(src_dir):
+def main():
     parser = argparse.ArgumentParser(description="Call the nkululeko DEMO framework.")
     parser.add_argument("--config", default="exp.ini", help="The base configuration")
     parser.add_argument(
@@ -142,5 +142,4 @@ def main(src_dir):
 if __name__ == "__main__":
-    cwd = os.path.dirname(os.path.abspath(__file__))
-    main(cwd)  # use this if you want to state the config file path on command line
+    main()

nkululeko/explore.py CHANGED Viewed

@@ -25,33 +25,27 @@ for an `exp.ini` file in the same directory as the script.
 import argparse
 import configparser
-import os
+from pathlib import Path
 from nkululeko.constants import VERSION
 from nkululeko.experiment import Experiment
 from nkululeko.utils.util import Util
-def main(src_dir):
+def main():
     parser = argparse.ArgumentParser(
         description="Call the nkululeko EXPLORE framework."
     )
     parser.add_argument("--config", default="exp.ini", help="The base configuration")
     args = parser.parse_args()
-    if args.config is not None:
-        config_file = args.config
-    else:
-        config_file = f"{src_dir}/exp.ini"
+    config_file = args.config if args.config is not None else "exp.ini"
-    # test if the configuration file exists
-    if not os.path.isfile(config_file):
+    if not Path(config_file).is_file():
         print(f"ERROR: no such file: {config_file}")
         exit()
-    # load one configuration per experiment
     config = configparser.ConfigParser()
     config.read(config_file)
-    # create a new experiment
     expr = Experiment(config)
     module = "explore"
     expr.set_module(module)
@@ -101,5 +95,4 @@ def main(src_dir):
 if __name__ == "__main__":
-    cwd = os.path.dirname(os.path.abspath(__file__))
-    main(cwd)  # use this if you want to state the config file path on command line
+    main()

nkululeko/export.py CHANGED Viewed

@@ -15,24 +15,18 @@ from nkululeko.experiment import Experiment
 from nkululeko.utils.util import Util
-def main(src_dir):
+def main():
     parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
     parser.add_argument("--config", default="exp.ini", help="The base configuration")
     args = parser.parse_args()
-    if args.config is not None:
-        config_file = args.config
-    else:
-        config_file = f"{src_dir}/exp.ini"
+    config_file = args.config if args.config is not None else "exp.ini"
-    # test if the configuration file exists
     if not os.path.isfile(config_file):
         print(f"ERROR: no such file: {config_file}")
         exit()
-    # load one configuration per experiment
     config = configparser.ConfigParser()
     config.read(config_file)
-    # create a new experiment
     expr = Experiment(config)
     util = Util("export")
     util.debug(
@@ -122,5 +116,4 @@ def main(src_dir):
 if __name__ == "__main__":
-    cwd = os.path.dirname(os.path.abspath(__file__))
-    main(cwd)  # use this if you want to state the config file path on command line
+    main()

nkululeko/models/model_tuned.py CHANGED Viewed

@@ -30,10 +30,16 @@ class TunedModel(BaseModel):
         """Constructor taking the configuration and all dataframes."""
         super().__init__(df_train, df_test, feats_train, feats_test)
         super().set_model_type("finetuned")
+        self.df_test, self.df_train, self.feats_test, self.feats_train = (
+            df_test,
+            df_train,
+            feats_test,
+            feats_train,
+        )
         self.name = "finetuned_wav2vec2"
         self.target = glob_conf.config["DATA"]["target"]
-        labels = glob_conf.labels
-        self.class_num = len(labels)
+        self.labels = glob_conf.labels
+        self.class_num = len(self.labels)
         device = self.util.config_val("MODEL", "device", False)
         if not device:
             self.device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -304,7 +310,7 @@ class TunedModel(BaseModel):
             else:
                 self.util.error(f"criterion {criterion} not supported for classifier")
         else:
-            self.criterion = self.util.config_val("MODEL", "loss", "ccc")
+            criterion = self.util.config_val("MODEL", "loss", "1-ccc")
             if criterion == "1-ccc":
                 criterion = ConcordanceCorCoeff()
             elif criterion == "mse":
@@ -402,7 +408,7 @@ class TunedModel(BaseModel):
         self.load(self.run, self.epoch)
     def get_predictions(self):
-        results = []
+        results = [[]].pop(0)
         for (file, start, end), _ in audeer.progress_bar(
             self.df_test.iterrows(),
             total=len(self.df_test),
@@ -415,18 +421,37 @@ class TunedModel(BaseModel):
                     file, duration=end - start, offset=start, always_2d=True
                 )
             assert sr == self.sampling_rate
-            predictions = self.model.predict(signal)
-            results.append(predictions.argmax())
-        return results
+            prediction = self.model.predict(signal)
+            results.append(prediction)
+            # results.append(predictions.argmax())
+        predictions = np.asarray(results)
+        if self.util.exp_is_classification():
+            # make a dataframe for the class probabilities
+            proba_d = {}
+            for c in range(self.class_num):
+                proba_d[c] = []
+            # get the class probabilities
+            # predictions = self.clf.predict_proba(self.feats_test.to_numpy())
+            # pred = self.clf.predict(features)
+            for i in range(self.class_num):
+                proba_d[i] = list(predictions.T[i])
+            probas = pd.DataFrame(proba_d)
+            probas = probas.set_index(self.df_test.index)
+            predictions = probas.idxmax(axis=1).values
+        else:
+            predictions = predictions.flatten()
+            probas = None
+        return predictions, probas
     def predict(self):
         """Predict the whole eval feature set"""
-        predictions = self.get_predictions()
+        predictions, probas = self.get_predictions()
         report = Reporter(
             self.df_test[self.target].to_numpy().astype(float),
             predictions,
             self.run,
             self.epoch_num,
+            probas=probas,
         )
         self._plot_epoch_progression(report)
         return report
@@ -438,6 +463,7 @@ class TunedModel(BaseModel):
         )
         with open(log_file, "r") as file:
             data = file.read()
+        data = data.strip().replace("nan", "0")
         list = ast.literal_eval(data)
         epochs, vals, loss = [], [], []
         for index, tp in enumerate(list):

nkululeko/nkululeko.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # Entry script to do a Nkululeko experiment
 import argparse
 import configparser
-import os.path
+from pathlib import Path
 import numpy as np
@@ -13,7 +13,7 @@ from nkululeko.utils.util import Util
 def doit(config_file):
     # test if the configuration file exists
-    if not os.path.isfile(config_file):
+    if not Path(config_file).is_file():
         print(f"ERROR: no such file: {config_file}")
         exit()
@@ -57,17 +57,18 @@ def doit(config_file):
     return result, int(np.asarray(last_epochs).min())
-def main(src_dir):
+def main():
+    cwd = Path(__file__).parent.absolute()
     parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
+    parser.add_argument("--version", action="version", version=f"Nkululeko {VERSION}")
     parser.add_argument("--config", default="exp.ini", help="The base configuration")
     args = parser.parse_args()
     if args.config is not None:
         config_file = args.config
     else:
-        config_file = f"{src_dir}/exp.ini"
+        config_file = cwd / "exp.ini"
     doit(config_file)
 if __name__ == "__main__":
-    cwd = os.path.dirname(os.path.abspath(__file__))
-    main(cwd)  # use this if you want to state the config file path on command line
+    main()  # use this if you want to state the config file path on command line

nkululeko/predict.py CHANGED Viewed

@@ -24,26 +24,20 @@ from nkululeko.experiment import Experiment
 from nkululeko.utils.util import Util
-def main(src_dir):
+def main():
     parser = argparse.ArgumentParser(
         description="Call the nkululeko PREDICT framework."
     )
     parser.add_argument("--config", default="exp.ini", help="The base configuration")
     args = parser.parse_args()
-    if args.config is not None:
-        config_file = args.config
-    else:
-        config_file = f"{src_dir}/exp.ini"
+    config_file = args.config if args.config is not None else "exp.ini"
-    # test if the configuration file exists
     if not os.path.isfile(config_file):
         print(f"ERROR: no such file: {config_file}")
         exit()
-    # load one configuration per experiment
     config = configparser.ConfigParser()
     config.read(config_file)
-    # create a new experiment
     expr = Experiment(config)
     module = "predict"
     expr.set_module(module)
@@ -73,5 +67,4 @@ def main(src_dir):
 if __name__ == "__main__":
-    cwd = os.path.dirname(os.path.abspath(__file__))
-    main(cwd)  # use this if you want to state the config file path on command line
+    main()

nkululeko/resample.py CHANGED Viewed

@@ -1,3 +1,25 @@
+"""
+Resample audio files or INI files (train, test, all) to change the sampling rate.
+This script provides a command-line interface to resample audio files or INI files
+containing train, test, and all data. It supports resampling a single file, a
+directory of files, or all files specified in an INI configuration file.
+The script uses the `Resampler` class from the `nkululeko.augmenting.resampler`
+module to perform the resampling operation. It can optionally replace the original
+audio files with the resampled versions.
+The script supports the following command-line arguments:
+- `--config`: The base configuration file (INI format) to use for resampling.
+- `--file`: The input audio file to resample.
+- `--folder`: The input directory containing audio files and subdirectories to resample.
+- `--replace`: Whether to replace the original audio files with the resampled versions.
+The script also supports loading configuration from an INI file, which can be used
+to specify the sample selection (all, train, or test) and whether to replace the
+original files.
+"""
 # resample.py
 # change the sampling rate for audio file or INI file (train, test, all)
@@ -15,7 +37,7 @@ from nkululeko.utils.files import find_files
 from nkululeko.utils.util import Util
-def main(src_dir):
+def main():
     parser = argparse.ArgumentParser(
         description="Call the nkululeko RESAMPLE framework."
     )
@@ -118,5 +140,4 @@ def main(src_dir):
 if __name__ == "__main__":
-    cwd = os.path.dirname(os.path.abspath(__file__))
-    main(cwd)
+    main()

nkululeko/segment.py CHANGED Viewed

@@ -14,24 +14,18 @@ from nkululeko.reporting.report_item import ReportItem
 from nkululeko.utils.util import Util
-def main(src_dir):
+def main():
     parser = argparse.ArgumentParser(description="Call the nkululeko framework.")
     parser.add_argument("--config", default="exp.ini", help="The base configuration")
     args = parser.parse_args()
-    if args.config is not None:
-        config_file = args.config
-    else:
-        config_file = f"{src_dir}/exp.ini"
+    config_file = args.config if args.config is not None else "exp.ini"
-    # test if the configuration file exists
     if not os.path.isfile(config_file):
         print(f"ERROR: no such file: {config_file}")
         exit()
-    # load one configuration per experiment
     config = configparser.ConfigParser()
     config.read(config_file)
-    # create a new experiment
     expr = Experiment(config)
     module = "segment"
     expr.set_module(module)
@@ -153,5 +147,4 @@ def segment_dataframe(df):
 if __name__ == "__main__":
-    cwd = os.path.dirname(os.path.abspath(__file__))
-    main(cwd)  # use this if you want to state the config file path on command line
+    main()  # use this if you want to state the config file path on command line

nkululeko/utils/util.py CHANGED Viewed

@@ -155,10 +155,10 @@ class Util:
         return f"{store}/{self.get_exp_name()}.pkl"
     def get_pred_name(self):
-        store = self.get_path("store")
+        results_dir = self.get_path("res_dir")
         target = self.get_target_name()
         pred_name = self.get_model_description()
-        return f"{store}/pred_{target}_{pred_name}.csv"
+        return f"{results_dir}/pred_{target}_{pred_name}.csv"
     def is_categorical(self, pd_series):
         """Check if a dataframe column is categorical."""

{nkululeko-0.90.1.dist-info → nkululeko-0.90.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.90.1
+Version: 0.90.3
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -68,7 +68,7 @@ A project to detect speaker characteristics by machine learning experiments with
 The idea is to have a framework (based on e.g. sklearn and torch) that can be used to rapidly and automatically analyse audio data and explore machine learning models based on that data.
-* NEW with nkululek: [Ensemble learning](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
+* NEW with nkululeko: [Ensemble learning](http://blog.syntheticspeech.de/2024/06/25/nkululeko-ensemble-classifiers-with-late-fusion/)
 * NEW: [Finetune transformer-models](http://blog.syntheticspeech.de/2024/05/29/nkululeko-how-to-finetune-a-transformer-model/)
 * The latest features can be seen in [the ini-file](./ini_file.md) options that are used to control Nkululeko
 * Below is a [Hello World example](#helloworld) that should set you up fastly, also on [Google Colab](https://colab.research.google.com/drive/1GYNBd5cdZQ1QC3Jm58qoeMaJg3UuPhjw?usp=sharing#scrollTo=4G_SjuF9xeQf), and [with Kaggle](https://www.kaggle.com/felixburk/nkululeko-hello-world-example)
@@ -356,6 +356,15 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.90.3
+--------------
+* refactorings and documentations
+Version 0.90.2
+--------------
+* added probability output to finetuning classification models
+* switched path to prob. output from "store" to "results"
 Version 0.90.1
 --------------
 * Add balancing for finetune and update data README

{nkululeko-0.90.1.dist-info → nkululeko-0.90.3.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,16 @@
 nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
-nkululeko/augment.py,sha256=sIXRg19Uz8dWKgQv2LBGH7jbd2pgcUTh0PIQ_62B0kA,3135
+nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
 nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
-nkululeko/constants.py,sha256=TmPPFi_-OUMYF2mfBNMLxBQl0vwneI1opUPN0vK2XPY,41
+nkululeko/constants.py,sha256=ovoltLIatbWqcR8hIDurQW7_s53A9c5ZUqgKX7rYbhA,39
 nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
-nkululeko/demo.py,sha256=bLuHkeEl5rOfm7ecGHCcWATiPK7-njNbtrGljxzNzFs,5088
+nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
 nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
 nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
 nkululeko/ensemble.py,sha256=QONr-1VwMr2D0I7wjWxwGjtYzWf4v9DoI3C-fFnar7E,12862
 nkululeko/experiment.py,sha256=octx5S4Y8-gAD0dXCRb6DFZwsXTYgzk06RBA3LX2SN0,31388
-nkululeko/experiment_felix.py,sha256=IBXtyXkQJP7IuFjZ4tCP3SAQ0g_Oqe3Pyzxz8DOeT-A,30134
-nkululeko/explore.py,sha256=lrMrbM2WFJDcfaD_uJFbxpK-cGX2ZVy2QRfWMLRiXjw,3941
-nkululeko/export.py,sha256=aqHnZPRv3dk69keY8HB5WJrhFl649X1PVbv_GlYmfH8,4634
+nkululeko/explore.py,sha256=Y5lPPychnI-7fyP8zvwVb9P09fvprbUPOofOppuABYQ,3658
+nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
 nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
 nkululeko/file_checker.py,sha256=xJY0Q6w47pnmgJVK5rcAKPYBrCpV7eBT4_3YBzTx-H8,3454
 nkululeko/filter_data.py,sha256=5AYDtqs_GWGr4V5CbbYQkVVgCD3kq2dpKu8rF3V87NI,7224
@@ -20,14 +19,13 @@ nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
 nkululeko/modelrunner.py,sha256=lJy-xM4QfDDWeL0dLTE_VIb4sYrnd_Z_yJRK3wwohQA,11199
 nkululeko/multidb.py,sha256=mDh2Zj4zDbM-wZxib-r8LaiGqfAbh7oihgWBODj76kU,6753
 nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
-nkululeko/nkululeko.py,sha256=n4KidI4sN3LwNyZoz-q2bLBjNn8lxYDya35qws55_ys,1968
+nkululeko/nkululeko.py,sha256=M7baIq2nAoi6dEoBL4ATEuqAs5U1fvl_hyqAl5DybAQ,2040
 nkululeko/plots.py,sha256=p9YyN-xAtdGBKjcA305V0KOagAzG8VG6D_Ceoa9rae4,22964
-nkululeko/predict.py,sha256=ObFOxIgQ8JVYZLk2h0VFt8h7lYLMy8fXLUxU6eiePZc,2381
-nkululeko/resample.py,sha256=y2l7k1jKheO-ntBZio9bRFWLKGTihVFUV0fb8U69T0o,4185
-nkululeko/resample_cli.py,sha256=EJnN5t13qC4e0JVO3Rah3uJd4JRE3HM8GkoKyXsE49s,3211
+nkululeko/predict.py,sha256=b35YOqovGb5PLDz0nDuhJGykEAPq2Y45R9lzxJZMuMU,2083
+nkululeko/resample.py,sha256=akSAjJ3qn-O5NAyLJHVHdsK7MUZPGaZUvM2TwMSmj2M,5194
 nkululeko/runmanager.py,sha256=AswmORVUkCIH0gTx6zEyufvFATQBS8C5TXo2erSNdVg,7611
 nkululeko/scaler.py,sha256=7VOZ4sREMoQtahfETt9RyuR29Fb7PCwxlYVjBbdCVFc,4101
-nkululeko/segment.py,sha256=PPB8oSs_MLdEYoWh6_q3gm4mIUqPnCeGrB7FbX2AsBs,4799
+nkululeko/segment.py,sha256=lSeI1i96HZTloSqdH75FhD7VyDQ16Do99-5mhI30To8,4571
 nkululeko/syllable_nuclei.py,sha256=5w_naKxNxz66a_qLkraemi2fggM-gWesiiBPS47iFcE,9931
 nkululeko/test.py,sha256=1w624vo5KTzmFC8BUStGlLDmIEAFuJUz7J0W-gp7AxI,1677
 nkululeko/test_predictor.py,sha256=DEHE_D3A6m6KJTrpDKceA1n655t_UZV3WQd57K4a3Ho,2863
@@ -95,7 +93,7 @@ nkululeko/models/model_svm.py,sha256=zP8ykLhCZTYvwSqw06XHuzq9qMBtsiYpxjUpWDAnMyA
 nkululeko/models/model_svr.py,sha256=FEwYRdgqwgGhZdkpRnT7Ef12lklWi6GZL28PyV99xWs,726
 nkululeko/models/model_tree.py,sha256=6L3PD3aIiiQz1RPWS6z3Edx4f0gnR7AOfBKOJzf0BNU,433
 nkululeko/models/model_tree_reg.py,sha256=IMaQpNImoRqP8Biw1CsJevxpV_PVpKblsKtYlMW5d_U,429
-nkululeko/models/model_tuned.py,sha256=k6c8dPKy2BeFMKABrNTMSwQuiKa9VrZ7oeJdfNYoYAo,22678
+nkululeko/models/model_tuned.py,sha256=VuRyNqw3XTpQ2eHsWOJN8X-V98AN8Wqiq7UgwT5BQRU,23763
 nkululeko/models/model_xgb.py,sha256=ytBaSHZH8r7VvRYdmrBrQnzRM6V4HyCJ8O-v20J8G_g,448
 nkululeko/models/model_xgr.py,sha256=H01FJCRgmX2unvambMs5TTCS9sI6VDB9ip9G6rVGt2c,419
 nkululeko/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -111,9 +109,10 @@ nkululeko/segmenting/seg_silero.py,sha256=CnhjKGTW5OXf-bmw4YsSJeN2yUwkY5m3xnulM_
 nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
-nkululeko/utils/util.py,sha256=a9fs5swVkv_k0CfJRwDhEx1ChZv7rs7K4oQDYspiQWY,16709
-nkululeko-0.90.1.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.90.1.dist-info/METADATA,sha256=unqq8xrL0bfP178Q3fKBaGyry4SJvHxPGJCR3figOpQ,40961
-nkululeko-0.90.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-nkululeko-0.90.1.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.90.1.dist-info/RECORD,,
+nkululeko/utils/util.py,sha256=XFZdhCc_LM4EmoZ5tKKaBCQLXclcNmvHwhfT_CXB98c,16723
+nkululeko-0.90.3.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.90.3.dist-info/METADATA,sha256=jxfoSgwFi3vXSPbJFDDizvTsvfVctfHCaProqJz_TFQ,41179
+nkululeko-0.90.3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+nkululeko-0.90.3.dist-info/entry_points.txt,sha256=KpQhz4HKBvYLrNooqLIc83hub76axRbYUgWzYkH3GnU,397
+nkululeko-0.90.3.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.90.3.dist-info/RECORD,,

nkululeko-0.90.3.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,10 @@
+[console_scripts]
+nkululeko.augment = nkululeko.augment:main
+nkululeko.demo = nkululeko.demo:main
+nkululeko.explore = nkululeko.explore:main
+nkululeko.export = nkululeko.export:main
+nkululeko.nkululeko = nkululeko.nkululeko:main
+nkululeko.predict = nkululeko.predict:main
+nkululeko.resample = nkululeko.resample:main
+nkululeko.segment = nkululeko.segment:main
+nkululeko.test = nkululeko.test:main

nkululeko/experiment_felix.py DELETED Viewed

@@ -1,728 +0,0 @@
-# experiment.py: Main class for an experiment (nkululeko.nkululeko)
-import ast
-import os
-import pickle
-import random
-import time
-import audeer
-import audformat
-import numpy as np
-import pandas as pd
-from sklearn.preprocessing import LabelEncoder
-import nkululeko.glob_conf as glob_conf
-from nkululeko.data.dataset import Dataset
-from nkululeko.data.dataset_csv import Dataset_CSV
-from nkululeko.demo_predictor import Demo_predictor
-from nkululeko.feat_extract.feats_analyser import FeatureAnalyser
-from nkululeko.feature_extractor import FeatureExtractor
-from nkululeko.file_checker import FileChecker
-from nkululeko.filter_data import DataFilter
-from nkululeko.plots import Plots
-from nkululeko.reporting.report import Report
-from nkululeko.runmanager import Runmanager
-from nkululeko.scaler import Scaler
-from nkululeko.test_predictor import TestPredictor
-from nkululeko.utils.util import Util
-class Experiment:
-    """Main class specifying an experiment"""
-    def __init__(self, config_obj):
-        """
-        Parameters
-        ----------
-        config_obj : a config parser object that sets the experiment parameters and being set as a global object.
-        """
-        self.set_globals(config_obj)
-        self.name = glob_conf.config["EXP"]["name"]
-        self.root = os.path.join(glob_conf.config["EXP"]["root"], "")
-        self.data_dir = os.path.join(self.root, self.name)
-        audeer.mkdir(self.data_dir)  # create the experiment directory
-        self.util = Util("experiment")
-        glob_conf.set_util(self.util)
-        fresh_report = eval(self.util.config_val("REPORT", "fresh", "False"))
-        if not fresh_report:
-            try:
-                with open(os.path.join(self.data_dir, "report.pkl"), "rb") as handle:
-                    self.report = pickle.load(handle)
-            except FileNotFoundError:
-                self.report = Report()
-        else:
-            self.util.debug("starting a fresh report")
-            self.report = Report()
-        glob_conf.set_report(self.report)
-        self.loso = self.util.config_val("MODEL", "loso", False)
-        self.logo = self.util.config_val("MODEL", "logo", False)
-        self.xfoldx = self.util.config_val("MODEL", "k_fold_cross", False)
-        self.start = time.process_time()
-    def set_module(self, module):
-        glob_conf.set_module(module)
-    def store_report(self):
-        with open(os.path.join(self.data_dir, "report.pkl"), "wb") as handle:
-            pickle.dump(self.report, handle)
-        if eval(self.util.config_val("REPORT", "show", "False")):
-            self.report.print()
-        if self.util.config_val("REPORT", "latex", False):
-            self.report.export_latex()
-    def get_name(self):
-        return self.util.get_exp_name()
-    def set_globals(self, config_obj):
-        """install a config object in the global space"""
-        glob_conf.init_config(config_obj)
-    def load_datasets(self):
-        """Load all databases specified in the configuration and map the labels"""
-        ds = ast.literal_eval(glob_conf.config["DATA"]["databases"])
-        self.datasets = {}
-        self.got_speaker, self.got_gender, self.got_age = False, False, False
-        for d in ds:
-            ds_type = self.util.config_val_data(d, "type", "audformat")
-            if ds_type == "audformat":
-                data = Dataset(d)
-            elif ds_type == "csv":
-                data = Dataset_CSV(d)
-            else:
-                self.util.error(f"unknown data type: {ds_type}")
-            data.load()
-            data.prepare()
-            if data.got_gender:
-                self.got_gender = True
-            if data.got_age:
-                self.got_age = True
-            if data.got_speaker:
-                self.got_speaker = True
-            self.datasets.update({d: data})
-        self.target = self.util.config_val("DATA", "target", "emotion")
-        glob_conf.set_target(self.target)
-        # print target via debug
-        self.util.debug(f"target: {self.target}")
-        # print keys/column
-        dbs = ",".join(list(self.datasets.keys()))
-        labels = self.util.config_val("DATA", "labels", False)
-        if labels:
-            self.labels = ast.literal_eval(labels)
-            self.util.debug(f"Target labels (from config): {labels}")
-        else:
-            self.labels = list(
-                next(iter(self.datasets.values())).df[self.target].unique()
-            )
-            self.util.debug(f"Target labels (from database): {labels}")
-        glob_conf.set_labels(self.labels)
-        self.util.debug(f"loaded databases {dbs}")
-    def _import_csv(self, storage):
-        # df = pd.read_csv(storage, header=0, index_col=[0,1,2])
-        # df.index.set_levels(pd.to_timedelta(df.index.levels[1]), level=1)
-        # df.index.set_levels(pd.to_timedelta(df.index.levels[2]), level=2)
-        df = audformat.utils.read_csv(storage)
-        df.is_labeled = True if self.target in df else False
-        # print(df.head())
-        return df
-    def fill_tests(self):
-        """Only fill a new test set"""
-        test_dbs = ast.literal_eval(glob_conf.config["DATA"]["tests"])
-        self.df_test = pd.DataFrame()
-        start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
-        store = self.util.get_path("store")
-        storage_test = f"{store}extra_testdf.csv"
-        if os.path.isfile(storage_test) and not start_fresh:
-            self.util.debug(f"reusing previously stored {storage_test}")
-            self.df_test = self._import_csv(storage_test)
-        else:
-            for d in test_dbs:
-                ds_type = self.util.config_val_data(d, "type", "audformat")
-                if ds_type == "audformat":
-                    data = Dataset(d)
-                elif ds_type == "csv":
-                    data = Dataset_CSV(d)
-                else:
-                    self.util.error(f"unknown data type: {ds_type}")
-                data.load()
-                if data.got_gender:
-                    self.got_gender = True
-                if data.got_age:
-                    self.got_age = True
-                if data.got_speaker:
-                    self.got_speaker = True
-                data.split()
-                data.prepare_labels()
-                self.df_test = pd.concat(
-                    [self.df_test, self.util.make_segmented_index(data.df_test)]
-                )
-                self.df_test.is_labeled = data.is_labeled
-            self.df_test.got_gender = self.got_gender
-            self.df_test.got_speaker = self.got_speaker
-            # self.util.set_config_val('FEATS', 'needs_features_extraction', 'True')
-            # self.util.set_config_val('FEATS', 'no_reuse', 'True')
-            self.df_test["class_label"] = self.df_test[self.target]
-            self.df_test[self.target] = self.label_encoder.transform(
-                self.df_test[self.target]
-            )
-            self.df_test.to_csv(storage_test)
-    def fill_train_and_tests(self):
-        """Set up train and development sets. The method should be specified in the config."""
-        store = self.util.get_path("store")
-        storage_test = f"{store}testdf.csv"
-        storage_train = f"{store}traindf.csv"
-        start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
-        if (
-            os.path.isfile(storage_train)
-            and os.path.isfile(storage_test)
-            and not start_fresh
-        ):
-            self.util.debug(
-                f"reusing previously stored {storage_test} and {storage_train}"
-            )
-            self.df_test = self._import_csv(storage_test)
-            # print(f"df_test: {self.df_test}")
-            self.df_train = self._import_csv(storage_train)
-            # print(f"df_train: {self.df_train}")
-        else:
-            self.df_train, self.df_test = pd.DataFrame(), pd.DataFrame()
-            for d in self.datasets.values():
-                d.split()
-                d.prepare_labels()
-                if d.df_train.shape[0] == 0:
-                    self.util.debug(f"warn: {d.name} train empty")
-                self.df_train = pd.concat([self.df_train, d.df_train])
-                # print(f"df_train: {self.df_train}")
-                self.util.copy_flags(d, self.df_train)
-                if d.df_test.shape[0] == 0:
-                    self.util.debug(f"warn: {d.name} test empty")
-                self.df_test = pd.concat([self.df_test, d.df_test])
-                self.util.copy_flags(d, self.df_test)
-            store = self.util.get_path("store")
-            storage_test = f"{store}testdf.csv"
-            storage_train = f"{store}traindf.csv"
-            self.df_test.to_csv(storage_test)
-            self.df_train.to_csv(storage_train)
-        self.util.copy_flags(self, self.df_test)
-        self.util.copy_flags(self, self.df_train)
-        # Try data checks
-        datachecker = FileChecker(self.df_train)
-        self.df_train = datachecker.all_checks()
-        datachecker.set_data(self.df_test)
-        self.df_test = datachecker.all_checks()
-        # Check for filters
-        filter_sample_selection = self.util.config_val(
-            "DATA", "filter.sample_selection", "all"
-        )
-        if filter_sample_selection == "all":
-            datafilter = DataFilter(self.df_train)
-            self.df_train = datafilter.all_filters()
-            datafilter = DataFilter(self.df_test)
-            self.df_test = datafilter.all_filters()
-        elif filter_sample_selection == "train":
-            datafilter = DataFilter(self.df_train)
-            self.df_train = datafilter.all_filters()
-        elif filter_sample_selection == "test":
-            datafilter = DataFilter(self.df_test)
-            self.df_test = datafilter.all_filters()
-        else:
-            self.util.error(
-                "unkown filter sample selection specifier"
-                f" {filter_sample_selection}, should be [all | train | test]"
-            )
-        # encode the labels
-        if self.util.exp_is_classification():
-            datatype = self.util.config_val("DATA", "type", "dummy")
-            if datatype == "continuous":
-                # if self.df_test.is_labeled:
-                #     # remember the target in case they get labelencoded later
-                #     self.df_test["class_label"] = self.df_test[self.target]
-                test_cats = self.df_test["class_label"].unique()
-                # else:
-                #     # if there is no target, copy a dummy label
-                #     self.df_test = self._add_random_target(self.df_test)
-                # if self.df_train.is_labeled:
-                #     # remember the target in case they get labelencoded later
-                #     self.df_train["class_label"] = self.df_train[self.target]
-                train_cats = self.df_train["class_label"].unique()
-            else:
-                if self.df_test.is_labeled:
-                    test_cats = self.df_test[self.target].unique()
-                else:
-                    # if there is no target, copy a dummy label
-                    self.df_test = self._add_random_target(self.df_test).astype("str")
-                train_cats = self.df_train[self.target].unique()
-                # print(f"df_train: {pd.DataFrame(self.df_train[self.target])}")
-                # print(f"train_cats with target {self.target}: {train_cats}")
-            if self.df_test.is_labeled:
-                if type(test_cats) == np.ndarray:
-                    self.util.debug(f"Categories test (nd.array): {test_cats}")
-                else:
-                    self.util.debug(f"Categories test (list): {list(test_cats)}")
-            if type(train_cats) == np.ndarray:
-                self.util.debug(f"Categories train (nd.array): {train_cats}")
-            else:
-                self.util.debug(f"Categories train (list): {list(train_cats)}")
-            # encode the labels as numbers
-            self.label_encoder = LabelEncoder()
-            self.df_train[self.target] = self.label_encoder.fit_transform(
-                self.df_train[self.target]
-            )
-            self.df_test[self.target] = self.label_encoder.transform(
-                self.df_test[self.target]
-            )
-            glob_conf.set_label_encoder(self.label_encoder)
-        if self.got_speaker:
-            self.util.debug(
-                f"{self.df_test.speaker.nunique()} speakers in test and"
-                f" {self.df_train.speaker.nunique()} speakers in train"
-            )
-        target_factor = self.util.config_val("DATA", "target_divide_by", False)
-        if target_factor:
-            self.df_test[self.target] = self.df_test[self.target] / float(target_factor)
-            self.df_train[self.target] = self.df_train[self.target] / float(
-                target_factor
-            )
-            if not self.util.exp_is_classification():
-                self.df_test["class_label"] = self.df_test["class_label"] / float(
-                    target_factor
-                )
-                self.df_train["class_label"] = self.df_train["class_label"] / float(
-                    target_factor
-                )
-    def _add_random_target(self, df):
-        labels = glob_conf.labels
-        a = [None] * len(df)
-        for i in range(0, len(df)):
-            a[i] = random.choice(labels)
-        df[self.target] = a
-        return df
-    def plot_distribution(self, df_labels):
-        """Plot the distribution of samples and speaker per target class and biological sex"""
-        plot = Plots()
-        sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
-        plot.plot_distributions(df_labels)
-        if self.got_speaker:
-            plot.plot_distributions_speaker(df_labels)
-    def extract_test_feats(self):
-        self.feats_test = pd.DataFrame()
-        feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["tests"]))
-        feats_types = self.util.config_val_list("FEATS", "type", ["os"])
-        self.feature_extractor = FeatureExtractor(
-            self.df_test, feats_types, feats_name, "test"
-        )
-        self.feats_test = self.feature_extractor.extract()
-        self.util.debug(f"Test features shape:{self.feats_test.shape}")
-    def extract_feats(self):
-        """Extract the features for train and dev sets.
-        They will be stored on disk and need to be removed manually.
-        The string FEATS.feats_type is read from the config, defaults to os.
-        """
-        df_train, df_test = self.df_train, self.df_test
-        feats_name = "_".join(ast.literal_eval(glob_conf.config["DATA"]["databases"]))
-        self.feats_test, self.feats_train = pd.DataFrame(), pd.DataFrame()
-        feats_types = self.util.config_val_list("FEATS", "type", ["os"])
-        self.feature_extractor = FeatureExtractor(
-            df_train, feats_types, feats_name, "train"
-        )
-        self.feats_train = self.feature_extractor.extract()
-        self.feature_extractor = FeatureExtractor(
-            df_test, feats_types, feats_name, "test"
-        )
-        self.feats_test = self.feature_extractor.extract()
-        self.util.debug(
-            f"All features: train shape : {self.feats_train.shape}, test"
-            f" shape:{self.feats_test.shape}"
-        )
-        if self.feats_train.shape[0] < self.df_train.shape[0]:
-            self.util.warn(
-                f"train feats ({self.feats_train.shape[0]}) != train labels"
-                f" ({self.df_train.shape[0]})"
-            )
-            self.df_train = self.df_train[
-                self.df_train.index.isin(self.feats_train.index)
-            ]
-            self.util.warn(f"new train labels shape: {self.df_train.shape[0]}")
-        if self.feats_test.shape[0] < self.df_test.shape[0]:
-            self.util.warn(
-                f"test feats ({self.feats_test.shape[0]}) != test labels"
-                f" ({self.df_test.shape[0]})"
-            )
-            self.df_test = self.df_test[self.df_test.index.isin(self.feats_test.index)]
-            self.util.warn(f"mew test labels shape: {self.df_test.shape[0]}")
-        self._check_scale()
-    def augment(self):
-        """
-        Augment the selected samples
-        """
-        from nkululeko.augmenting.augmenter import Augmenter
-        sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
-        if sample_selection == "all":
-            df = pd.concat([self.df_train, self.df_test])
-        elif sample_selection == "train":
-            df = self.df_train
-        elif sample_selection == "test":
-            df = self.df_test
-        else:
-            self.util.error(
-                f"unknown augmentation selection specifier {sample_selection},"
-                " should be [all | train | test]"
-            )
-        augmenter = Augmenter(df)
-        df_ret = augmenter.augment(sample_selection)
-        return df_ret
-    def autopredict(self):
-        """
-        Predict labels for samples with existing models and add to the dataframe.
-        """
-        sample_selection = self.util.config_val("PREDICT", "split", "all")
-        if sample_selection == "all":
-            df = pd.concat([self.df_train, self.df_test])
-        elif sample_selection == "train":
-            df = self.df_train
-        elif sample_selection == "test":
-            df = self.df_test
-        else:
-            self.util.error(
-                f"unknown augmentation selection specifier {sample_selection},"
-                " should be [all | train | test]"
-            )
-        targets = self.util.config_val_list("PREDICT", "targets", ["gender"])
-        for target in targets:
-            if target == "gender":
-                from nkululeko.autopredict.ap_gender import GenderPredictor
-                predictor = GenderPredictor(df)
-                df = predictor.predict(sample_selection)
-            elif target == "age":
-                from nkululeko.autopredict.ap_age import AgePredictor
-                predictor = AgePredictor(df)
-                df = predictor.predict(sample_selection)
-            elif target == "snr":
-                from nkululeko.autopredict.ap_snr import SNRPredictor
-                predictor = SNRPredictor(df)
-                df = predictor.predict(sample_selection)
-            elif target == "mos":
-                from nkululeko.autopredict.ap_mos import MOSPredictor
-                predictor = MOSPredictor(df)
-                df = predictor.predict(sample_selection)
-            elif target == "pesq":
-                from nkululeko.autopredict.ap_pesq import PESQPredictor
-                predictor = PESQPredictor(df)
-                df = predictor.predict(sample_selection)
-            elif target == "sdr":
-                from nkululeko.autopredict.ap_sdr import SDRPredictor
-                predictor = SDRPredictor(df)
-                df = predictor.predict(sample_selection)
-            elif target == "stoi":
-                from nkululeko.autopredict.ap_stoi import STOIPredictor
-                predictor = STOIPredictor(df)
-                df = predictor.predict(sample_selection)
-            elif target == "arousal":
-                from nkululeko.autopredict.ap_arousal import ArousalPredictor
-                predictor = ArousalPredictor(df)
-                df = predictor.predict(sample_selection)
-            elif target == "valence":
-                from nkululeko.autopredict.ap_valence import ValencePredictor
-                predictor = ValencePredictor(df)
-                df = predictor.predict(sample_selection)
-            elif target == "dominance":
-                from nkululeko.autopredict.ap_dominance import DominancePredictor
-                predictor = DominancePredictor(df)
-                df = predictor.predict(sample_selection)
-            else:
-                self.util.error(f"unknown auto predict target: {target}")
-        return df
-    def random_splice(self):
-        """
-        Random-splice the selected samples
-        """
-        from nkululeko.augmenting.randomsplicer import Randomsplicer
-        sample_selection = self.util.config_val("AUGMENT", "sample_selection", "all")
-        if sample_selection == "all":
-            df = pd.concat([self.df_train, self.df_test])
-        elif sample_selection == "train":
-            df = self.df_train
-        elif sample_selection == "test":
-            df = self.df_test
-        else:
-            self.util.error(
-                f"unknown augmentation selection specifier {sample_selection},"
-                " should be [all | train | test]"
-            )
-        randomsplicer = Randomsplicer(df)
-        df_ret = randomsplicer.run(sample_selection)
-        return df_ret
-    def analyse_features(self, needs_feats):
-        """Do a feature exploration."""
-        plot_feats = eval(
-            self.util.config_val("EXPL", "feature_distributions", "False")
-        )
-        sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
-        # get the data labels
-        if sample_selection == "all":
-            df_labels = pd.concat([self.df_train, self.df_test])
-            self.util.copy_flags(self.df_train, df_labels)
-        elif sample_selection == "train":
-            df_labels = self.df_train
-            self.util.copy_flags(self.df_train, df_labels)
-        elif sample_selection == "test":
-            df_labels = self.df_test
-            self.util.copy_flags(self.df_test, df_labels)
-        else:
-            self.util.error(
-                f"unknown sample selection specifier {sample_selection}, should"
-                " be [all | train | test]"
-            )
-        self.util.debug(f"sampling selection: {sample_selection}")
-        if self.util.config_val("EXPL", "value_counts", False):
-            self.plot_distribution(df_labels)
-        # check if data should be shown with the spotlight data visualizer
-        spotlight = eval(self.util.config_val("EXPL", "spotlight", "False"))
-        if spotlight:
-            self.util.debug("opening spotlight tab in web browser")
-            from renumics import spotlight
-            spotlight.show(df_labels.reset_index())
-        if not needs_feats:
-            return
-        # get the feature values
-        if sample_selection == "all":
-            df_feats = pd.concat([self.feats_train, self.feats_test])
-        elif sample_selection == "train":
-            df_feats = self.feats_train
-        elif sample_selection == "test":
-            df_feats = self.feats_test
-        else:
-            self.util.error(
-                f"unknown sample selection specifier {sample_selection}, should"
-                " be [all | train | test]"
-            )
-        feat_analyser = FeatureAnalyser(sample_selection, df_labels, df_feats)
-        # check if SHAP features should be analysed
-        shap = eval(self.util.config_val("EXPL", "shap", "False"))
-        if shap:
-            feat_analyser.analyse_shap(self.runmgr.get_best_model())
-        if plot_feats:
-            feat_analyser.analyse()
-        # check if a scatterplot should be done
-        scatter_var = eval(self.util.config_val("EXPL", "scatter", "False"))
-        scatter_target = self.util.config_val(
-            "EXPL", "scatter.target", "['class_label']"
-        )
-        if scatter_var:
-            scatters = ast.literal_eval(glob_conf.config["EXPL"]["scatter"])
-            scat_targets = ast.literal_eval(scatter_target)
-            plots = Plots()
-            for scat_target in scat_targets:
-                if self.util.is_categorical(df_labels[scat_target]):
-                    for scatter in scatters:
-                        plots.scatter_plot(df_feats, df_labels, scat_target, scatter)
-                else:
-                    self.util.debug(
-                        f"{self.name}: binning continuous variable to categories"
-                    )
-                    cat_vals = self.util.continuous_to_categorical(
-                        df_labels[scat_target]
-                    )
-                    df_labels[f"{scat_target}_bins"] = cat_vals.values
-                    for scatter in scatters:
-                        plots.scatter_plot(
-                            df_feats, df_labels, f"{scat_target}_bins", scatter
-                        )
-    def _check_scale(self):
-        scale_feats = self.util.config_val("FEATS", "scale", False)
-        # print the scale
-        self.util.debug(f"scaler: {scale_feats}")
-        if scale_feats:
-            self.scaler_feats = Scaler(
-                self.df_train,
-                self.df_test,
-                self.feats_train,
-                self.feats_test,
-                scale_feats,
-            )
-            self.feats_train, self.feats_test = self.scaler_feats.scale()
-            # store versions
-            self.util.save_to_store(self.feats_train, "feats_train_scaled")
-            self.util.save_to_store(self.feats_test, "feats_test_scaled")
-    def init_runmanager(self):
-        """Initialize the manager object for the runs."""
-        self.runmgr = Runmanager(
-            self.df_train, self.df_test, self.feats_train, self.feats_test
-        )
-    def run(self):
-        """Do the runs."""
-        self.runmgr.do_runs()
-        # access the best results all runs
-        self.reports = self.runmgr.best_results
-        last_epochs = self.runmgr.last_epochs
-        # try to save yourself
-        save = self.util.config_val("EXP", "save", False)
-        if save:
-            # save the experiment for future use
-            self.save(self.util.get_save_name())
-            # self.save_onnx(self.util.get_save_name())
-        # self.__collect_reports()
-        self.util.print_best_results(self.reports)
-        # check if the test predictions should be saved to disk
-        test_pred_file = self.util.config_val("EXP", "save_test", False)
-        if test_pred_file:
-            self.predict_test_and_save(test_pred_file)
-        # check if the majority voting for all speakers should be plotted
-        conf_mat_per_speaker_function = self.util.config_val(
-            "PLOT", "combine_per_speaker", False
-        )
-        if conf_mat_per_speaker_function:
-            self.plot_confmat_per_speaker(conf_mat_per_speaker_function)
-        used_time = time.process_time() - self.start
-        self.util.debug(f"Done, used {used_time:.3f} seconds")
-        # check if a test set should be labeled by the model:
-        label_data = self.util.config_val("DATA", "label_data", False)
-        label_result = self.util.config_val("DATA", "label_result", False)
-        if label_data and label_result:
-            self.predict_test_and_save(label_result)
-        return self.reports, last_epochs
-    def plot_confmat_per_speaker(self, function):
-        if self.loso or self.logo or self.xfoldx:
-            self.util.debug(
-                "plot combined speaker predictions not possible for cross" " validation"
-            )
-            return
-        best = self.get_best_report(self.reports)
-        # if not best.is_classification:
-        #     best.continuous_to_categorical()
-        truths = best.truths
-        preds = best.preds
-        speakers = self.df_test.speaker.values
-        print(f"{len(truths)} {len(preds)} {len(speakers) }")
-        df = pd.DataFrame(data={"truth": truths, "pred": preds, "speaker": speakers})
-        plot_name = "result_combined_per_speaker"
-        self.util.debug(
-            f"plotting speaker combination ({function}) confusion matrix to"
-            f" {plot_name}"
-        )
-        best.plot_per_speaker(df, plot_name, function)
-    def get_best_report(self, reports):
-        return self.runmgr.get_best_result(reports)
-    def print_best_model(self):
-        self.runmgr.print_best_result_runs()
-    def demo(self, file, is_list, outfile):
-        model = self.runmgr.get_best_model()
-        labelEncoder = None
-        try:
-            labelEncoder = self.label_encoder
-        except AttributeError:
-            pass
-        demo = Demo_predictor(
-            model, file, is_list, self.feature_extractor, labelEncoder, outfile
-        )
-        demo.run_demo()
-    def predict_test_and_save(self, result_name):
-        model = self.runmgr.get_best_model()
-        model.set_testdata(self.df_test, self.feats_test)
-        test_predictor = TestPredictor(
-            model, self.df_test, self.label_encoder, result_name
-        )
-        result = test_predictor.predict_and_store()
-        return result
-    def load(self, filename):
-        try:
-            f = open(filename, "rb")
-            tmp_dict = pickle.load(f)
-            f.close()
-        except EOFError as eof:
-            self.util.error(f"can't open file {filename}: {eof}")
-        self.__dict__.update(tmp_dict)
-        glob_conf.set_labels(self.labels)
-    def save(self, filename):
-        if self.runmgr.modelrunner.model.is_ann():
-            self.runmgr.modelrunner.model = None
-            self.util.warn(
-                "Save experiment: Can't pickle the trained model so saving without it. (it should be stored anyway)"
-            )
-        try:
-            f = open(filename, "wb")
-            pickle.dump(self.__dict__, f)
-            f.close()
-        except (TypeError, AttributeError) as error:
-            self.feature_extractor.feat_extractor.model = None
-            f = open(filename, "wb")
-            pickle.dump(self.__dict__, f)
-            f.close()
-            self.util.warn(
-                "Save experiment: Can't pickle the feature extraction model so saving without it."
-                + f"{type(error).__name__} {error}"
-            )
-        except RuntimeError as error:
-            self.util.warn(
-                "Save experiment: Can't pickle local object, NOT saving: "
-                + f"{type(error).__name__} {error}"
-            )
-    def save_onnx(self, filename):
-        # export the model to onnx
-        model = self.runmgr.get_best_model()
-        if model.is_ann():
-            print("converting to onnx from torch")
-        else:
-            print("converting to onnx from sklearn")
-        # save the rest
-        f = open(filename, "wb")
-        pickle.dump(self.__dict__, f)
-        f.close()

nkululeko/resample_cli.py DELETED Viewed

@@ -1,99 +0,0 @@
-import argparse
-import configparser
-import os
-import audformat
-import pandas as pd
-from nkululeko.augmenting.resampler import Resampler
-from nkululeko.constants import VERSION
-from nkululeko.experiment import Experiment
-from nkululeko.utils.util import Util
-def main(src_dir):
-    parser = argparse.ArgumentParser(
-        description="Call the nkululeko RESAMPLE framework."
-    )
-    parser.add_argument("--config", default=None, help="The base configuration")
-    parser.add_argument("--file", default=None, help="The input audio file to resample")
-    parser.add_argument(
-        "--replace", action="store_true", help="Replace the original audio file"
-    )
-    args = parser.parse_args()
-    if args.file is None and args.config is None:
-        print("ERROR: Either --file or --config argument must be provided.")
-        exit()
-    if args.file is not None:
-        # Load the audio file into a DataFrame
-        files = pd.Series([args.file])
-        df_sample = pd.DataFrame(index=files)
-        df_sample.index = audformat.utils.to_segmented_index(
-            df_sample.index, allow_nat=False
-        )
-        # Resample the audio file
-        util = Util("resampler", has_config=False)
-        util.debug(f"Resampling audio file: {args.file}")
-        rs = Resampler(df_sample, not_testing=True, replace=args.replace)
-        rs.resample()
-    else:
-        # Existing code for handling INI file
-        config_file = args.config
-        # Test if the configuration file exists
-        if not os.path.isfile(config_file):
-            print(f"ERROR: no such file: {config_file}")
-            exit()
-        # Load one configuration per experiment
-        config = configparser.ConfigParser()
-        config.read(config_file)
-        # Create a new experiment
-        expr = Experiment(config)
-        module = "resample"
-        expr.set_module(module)
-        util = Util(module)
-        util.debug(
-            f"running {expr.name} from config {config_file}, nkululeko version"
-            f" {VERSION}"
-        )
-        if util.config_val("EXP", "no_warnings", False):
-            import warnings
-            warnings.filterwarnings("ignore")
-        # Load the data
-        expr.load_datasets()
-        # Split into train and test
-        expr.fill_train_and_tests()
-        util.debug(
-            f"train shape : {expr.df_train.shape}, test shape:{expr.df_test.shape}"
-        )
-        sample_selection = util.config_val("RESAMPLE", "sample_selection", "all")
-        if sample_selection == "all":
-            df = pd.concat([expr.df_train, expr.df_test])
-        elif sample_selection == "train":
-            df = expr.df_train
-        elif sample_selection == "test":
-            df = expr.df_test
-        else:
-            util.error(
-                f"unknown selection specifier {sample_selection}, should be [all |"
-                " train | test]"
-            )
-        util.debug(f"resampling {sample_selection}: {df.shape[0]} samples")
-        replace = util.config_val("RESAMPLE", "replace", "False")
-        rs = Resampler(df, replace=replace)
-        rs.resample()
-if __name__ == "__main__":
-    cwd = os.path.dirname(os.path.abspath(__file__))
-    main(cwd)

{nkululeko-0.90.1.dist-info → nkululeko-0.90.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.90.1.dist-info → nkululeko-0.90.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.90.1.dist-info → nkululeko-0.90.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.90.1__py3-none-any.whl → 0.90.3__py3-none-any.whl

nkululeko 0.90.1py3-none-any.whl → 0.90.3py3-none-any.whl