PyPI - nkululeko - Versions diffs - 0.86.5__py3-none-any.whl → 0.86.7__py3-none-any.whl - Mend

nkululeko 0.86.5py3-none-any.whl → 0.86.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

nkululeko/constants.py +1 -1
nkululeko/data/dataset.py +8 -0
nkululeko/modelrunner.py +15 -4
nkululeko/reporting/reporter.py +12 -6
nkululeko/runmanager.py +9 -11
{nkululeko-0.86.5.dist-info → nkululeko-0.86.7.dist-info}/METADATA +9 -1
{nkululeko-0.86.5.dist-info → nkululeko-0.86.7.dist-info}/RECORD +10 -10
{nkululeko-0.86.5.dist-info → nkululeko-0.86.7.dist-info}/LICENSE +0 -0
{nkululeko-0.86.5.dist-info → nkululeko-0.86.7.dist-info}/WHEEL +0 -0
{nkululeko-0.86.5.dist-info → nkululeko-0.86.7.dist-info}/top_level.txt +0 -0

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.86.5"
+VERSION="0.86.7"
 SAMPLING_RATE = 16000

nkululeko/data/dataset.py CHANGED Viewed

@@ -150,6 +150,13 @@ class Dataset:
                 self.got_speaker = got_speaker2 or self.got_speaker
                 self.got_gender = got_gender2 or self.got_gender
                 self.got_age = got_age2 or self.got_age
+                if audformat.is_filewise_index(df_target.index):
+                    try:
+                        df_target = df_target.loc[df.index.get_level_values("file")]
+                        df_target = df_target.set_index(df.index)
+                    except KeyError:
+                        # just a try...
+                        pass
                 if got_target2:
                     df[self.target] = df_target[self.target]
                 if got_speaker2:
@@ -255,6 +262,7 @@ class Dataset:
         df = pd.DataFrame()
         for table in df_files:
             source_df = db.tables[table].df
+            # check if columns should be renamed
             source_df = self._check_cols(source_df)
             # create a dataframe with the index (the filenames)
             df_local = pd.DataFrame(index=source_df.index)

nkululeko/modelrunner.py CHANGED Viewed

@@ -30,6 +30,8 @@ class Modelrunner:
         # intialize a new model
         model_type = glob_conf.config["MODEL"]["type"]
         self._select_model(model_type)
+        self.best_performance = 0
+        self.best_epoch = 0
     def do_epochs(self):
         # initialze results
@@ -51,7 +53,8 @@ class Modelrunner:
             # epochs are handled by Huggingface API
             self.model.train()
             report = self.model.predict()
-            # todo: findout the best epoch
+            # todo: findout the best epoch, no need
+            # since oad_best_model_at_end is given in training args
             epoch = epoch_num
             report.set_id(self.run, epoch)
             plot_name = self.util.get_plot_name() + f"_{self.run}_{epoch:03d}_cnf"
@@ -77,10 +80,15 @@ class Modelrunner:
                 report.set_id(self.run, epoch)
                 plot_name = self.util.get_plot_name() + f"_{self.run}_{epoch:03d}_cnf"
                 reports.append(report)
+                test_score_metric = report.get_result().get_test_result()
                 self.util.debug(
-                    f"run: {self.run} epoch: {epoch}: result: "
-                    f"{reports[-1].get_result().get_test_result()}"
+                    f"run: {self.run} epoch: {epoch}: result: {test_score_metric}"
                 )
+                # print(f"performance: {performance.split(' ')[1]}")
+                performance = float(test_score_metric.split(' ')[1])
+                if performance > self.best_performance:
+                    self.best_performance = performance
+                    self.best_epoch = epoch
                 if plot_epochs:
                     self.util.debug(f"plotting conf matrix to {plot_name}")
                     report.plot_confmatrix(plot_name, epoch)
@@ -110,11 +118,14 @@ class Modelrunner:
                             f"reached patience ({str(patience)}): early stopping"
                         )
                         break
+        # After training, report the best performance and epoch
+        best_report = reports[self.best_epoch]
+        # self.util.debug(f"Best score at epoch: {self.best_epoch}, UAR: {self.best_performance}") # move to reporter below
         if not plot_epochs:
             # Do at least one confusion matrix plot
             self.util.debug(f"plotting confusion matrix to {plot_name}")
-            reports[-1].plot_confmatrix(plot_name, epoch)
+            best_report.plot_confmatrix(plot_name, self.best_epoch)
         return reports, epoch
     def _select_model(self, model_type):

nkululeko/reporting/reporter.py CHANGED Viewed

@@ -122,7 +122,7 @@ class Reporter:
         self.truths = np.digitize(self.truths, bins) - 1
         self.preds = np.digitize(self.preds, bins) - 1
-    def plot_confmatrix(self, plot_name, epoch):
+    def plot_confmatrix(self, plot_name, epoch=None):
         if not self.util.exp_is_classification():
             self.continuous_to_categorical()
         self._plot_confmat(self.truths, self.preds, plot_name, epoch)
@@ -156,9 +156,11 @@ class Reporter:
             pred = np.digitize(pred, bins) - 1
         self._plot_confmat(truth, pred.astype("int"), plot_name, 0)
-    def _plot_confmat(self, truths, preds, plot_name, epoch):
+    def _plot_confmat(self, truths, preds, plot_name, epoch=None):
         # print(truths)
         # print(preds)
+        if epoch is None:
+            epoch = self.epoch
         fig_dir = self.util.get_path("fig_dir")
         labels = glob_conf.labels
         fig = plt.figure()  # figsize=[5, 5]
@@ -225,7 +227,7 @@ class Reporter:
         res_dir = self.util.get_path("res_dir")
         rpt = (
-            f"epoch: {epoch}, UAR: {uar_str}"
+            f"Best score at epoch: {epoch}, UAR: {uar_str}"
             + f", (+-{up_str}/{low_str}), ACC: {acc_str}"
         )
         # print(rpt)
@@ -237,7 +239,9 @@ class Reporter:
     def set_filename_add(self, my_string):
         self.filenameadd = f"_{my_string}"
-    def print_results(self, epoch):
+    def print_results(self, epoch=None):
+        if epoch is None:
+            epoch = self.epoch
         """Print all evaluation values to text file."""
         res_dir = self.util.get_path("res_dir")
         file_name = f"{res_dir}{self.util.get_exp_name()}_{epoch}{self.filenameadd}.txt"
@@ -262,12 +266,14 @@ class Reporter:
                     c_res = rpt[l]["f1-score"]
                     c_ress[i] = float(f"{c_res:.3f}")
                 self.util.debug(f"labels: {labels}")
-                f1_per_class = f"result per class (F1 score): {c_ress}"
+                f1_per_class = (
+                    f"result per class (F1 score): {c_ress} from epoch: {epoch}"
+                )
                 if len(np.unique(self.truths)) == 2:
                     fpr, tpr, _ = roc_curve(self.truths, self.preds)
                     auc_score = auc(fpr, tpr)
                     pauc_score = roc_auc_score(self.truths, self.preds, max_fpr=0.1)
-                    auc_pauc = f"auc: {auc_score:.3f}, pauc: {pauc_score:.3f}"
+                    auc_pauc = f"auc: {auc_score:.3f}, pauc: {pauc_score:.3f} from epoch: {epoch}"
                     self.util.debug(auc_pauc)
                 self.util.debug(f1_per_class)
                 rpt_str = f"{json.dumps(rpt)}\n{f1_per_class}"

nkululeko/runmanager.py CHANGED Viewed

@@ -63,8 +63,7 @@ class Runmanager:
             )
             self.reports, last_epoch = self.modelrunner.do_epochs()
             # wrap up the run
-            plot_anim_progression = self.util.config_val(
-                "PLOT", "anim_progression", 0)
+            plot_anim_progression = self.util.config_val("PLOT", "anim_progression", 0)
             if plot_anim_progression:
                 plot_name_suggest = self.util.get_exp_name()
                 plot_name = (
@@ -88,8 +87,7 @@ class Runmanager:
                     + "_epoch_progression"
                 )
                 self.util.debug(f"plotting progression to {plot_name}")
-                self.reports[-1].plot_epoch_progression(
-                    self.reports, plot_name)
+                self.reports[-1].plot_epoch_progression(self.reports, plot_name)
             # remember the best run
             best_report = self.get_best_result(self.reports)
             plot_best_model = self.util.config_val("PLOT", "best_model", False)
@@ -107,9 +105,10 @@ class Runmanager:
                 )
                 self.print_model(best_report, plot_name)
             # finally, print out the numbers for this run
-            self.reports[-1].print_results(
-                int(self.util.config_val("EXP", "epochs", 1))
-            )
+            # self.reports[-1].print_results(
+            #     int(self.util.config_val("EXP", "epochs", 1))
+            # )
+            best_report.print_results(best_report.epoch)
             self.best_results.append(best_report)
             self.last_epochs.append(last_epoch)
@@ -145,19 +144,18 @@ class Runmanager:
         )
         self.print_model(report, plot_name)
-    def print_model(self, report, plot_name):
+    def print_model(self, reporter, plot_name):
         """Print a confusion matrix for a special report.
         Args:
             report: for which report (will be computed newly from model)
             plot_name: name of plot file
         """
-        epoch = report.epoch
         # self.load_model(report)
         # report = self.model.predict()
         self.util.debug(f"plotting conf matrix to {plot_name}")
-        report.plot_confmatrix(plot_name, epoch)
-        report.print_results(epoch)
+        reporter.plot_confmatrix(plot_name)
+        reporter.print_results()
     def load_model(self, report):
         """Load a model from disk for a specific run and epoch and evaluate it.

{nkululeko-0.86.5.dist-info → nkululeko-0.86.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nkululeko
-Version: 0.86.5
+Version: 0.86.7
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt
@@ -343,6 +343,14 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
 Changelog
 =========
+Version 0.86.7
+--------------
+* handles now audformat tables where the target is in a file index
+Version 0.86.6
+--------------
+* now best (not last) result is shown at end
 Version 0.86.5
 --------------
 * fix audio path detection in data csv import

{nkululeko-0.86.5.dist-info → nkululeko-0.86.7.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
 nkululeko/aug_train.py,sha256=YhuZnS_WVWnun9G-M6g5n6rbRxoVREz6Zh7k6qprFNQ,3194
 nkululeko/augment.py,sha256=4MG0apTAG5RgkuJrYEjGgDdbodZWi_HweSPNI1JJ5QA,3051
 nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
-nkululeko/constants.py,sha256=ctptCGup_HGCOxioUojLqMivtVfYq8CZDLHJprDr9aE,39
+nkululeko/constants.py,sha256=CscqJhC7nceHk2wmZd2bBFSeFExtr0HkXt99qpAZU4E,39
 nkululeko/demo.py,sha256=WSKr-W5uJ9DQfemK923g7Hd5V3kgAn03Er0JX1Pa45I,5142
 nkululeko/demo_feats.py,sha256=sAeGFojhEj9WEDFtG3SzPBmyYJWLF2rkbpp65m8Ujo4,2025
 nkululeko/demo_predictor.py,sha256=es56xbT8ifkS_vnrlb5NTZT54gNmeUtNlA4zVA_gnN8,4757
@@ -13,14 +13,14 @@ nkululeko/feature_extractor.py,sha256=8mssYKmo4LclVI-hiLmJEDZ0ZPyDavFG2YwtXcrGzw
 nkululeko/file_checker.py,sha256=LoLnL8aHpW-axMQ46qbqrManTs5otG9ShpEZuz9iRSk,3474
 nkululeko/filter_data.py,sha256=w-X2mhKdYr5DxDIz50E5yzO6Jmzk4jjDBoXsgOOVtcA,7222
 nkululeko/glob_conf.py,sha256=KL9YJQTHvTztxo1vr25qRRgaPnx4NTg0XrdbovKGMmw,525
-nkululeko/modelrunner.py,sha256=iCmfJxsS2UafcikjRdUqPQuqQMOYA-Ctr3et3HeNR3c,10452
+nkululeko/modelrunner.py,sha256=OU35qwP94GxW_EtL4I2-RhqB-wxbjNvp8CIHNbtnt7Q,11155
 nkululeko/multidb.py,sha256=fG3VukEWP1vreVN4gB1IRXxwwg4jLftsSEYtu0o1f78,5634
 nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
 nkululeko/nkululeko.py,sha256=Kn3s2E3yyH8cJ7z6lkMxrnqtCxTu7-qfe9Zr_ONTD5g,1968
 nkululeko/plots.py,sha256=C2mwQFK0Vxfl5ZM7CO87tULDoEf7G16ek0nU77bhOc4,23070
 nkululeko/predict.py,sha256=sF091sSSLnEWcISx9ZcULLie3tY5XeFsQJd6b3vrxFg,2409
 nkululeko/resample.py,sha256=2d9eao_0sLrGZ_KSl8OVKsPor3BkFrlmMhrpB9WelIs,4267
-nkululeko/runmanager.py,sha256=eTM1DNQKt1lxYhzt4vZyZluPXW9sWlIJHNQzex4lkJU,7624
+nkululeko/runmanager.py,sha256=Na8oPn59lRFiNMsYChRHBRgw40mBcw0Rwl2Kz1RUsA0,7614
 nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
 nkululeko/segment.py,sha256=YLKckX44tbvTb3LrdgYw9X4guzuF27sutl92z9DkpZU,4835
 nkululeko/syllable_nuclei.py,sha256=Sky-C__MeUDaxqHnDl2TGLLYOYvsahD35TUjWGeG31k,10047
@@ -45,7 +45,7 @@ nkululeko/autopredict/ap_stoi.py,sha256=It0Lk-ki-gohA2AzD8nkLAN2WahYvD9rPDGTQuvd
 nkululeko/autopredict/ap_valence.py,sha256=n-hctRKySzhmJtowuMOTUu0T_ld3uK5pnfOzWeWW4VM,1024
 nkululeko/autopredict/estimate_snr.py,sha256=S-bpS0xFkwWc4Ch75UrjbS8y538lQ0U3g_iLRFXureY,5048
 nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nkululeko/data/dataset.py,sha256=JGzMD6HIvkFkYBekmbmslIKc5ADaCj06T-8gpqH_kFo,27650
+nkululeko/data/dataset.py,sha256=hUD0NqWCfRaSHG8JNs1MsPb0zjUZAf8FJkg_c0ebq0Q,28046
 nkululeko/data/dataset_csv.py,sha256=dzOrbKB8t0UATAIYaKAOqHTogmYPBqskt6Hak7VjbSM,4537
 nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/feat_extract/feats_agender.py,sha256=Qm69G4kqAyTVVk7wwRgrXlNwGaDMGRYyKGpuf0vOEgM,3113
@@ -96,7 +96,7 @@ nkululeko/reporting/defines.py,sha256=IsY1YgKRMaABpylVKjBJgJ5bNCEbGCVA_E6pivraqS
 nkululeko/reporting/latex_writer.py,sha256=qiCRSmB4KOD_za4oHu5x-PhwjZohzfo8wecMOwlXZwc,1886
 nkululeko/reporting/report.py,sha256=W0rcigDdjBvxZQ3pZja_gvToILYvaZ1BFtnN2qFRfYI,1060
 nkululeko/reporting/report_item.py,sha256=siWeGNgo4bAE46YBMNcsdf3jTMTy76BO9Fi6DTvDig4,533
-nkululeko/reporting/reporter.py,sha256=II3QyeneAv8xQDBZ-qE_GJL8_WV_yXqLwBUYqrjqwPo,13938
+nkululeko/reporting/reporter.py,sha256=S9A62AxdMTEV-9XDUQNxdoevGLXBP52WiDmZ694QMV4,14161
 nkululeko/reporting/result.py,sha256=nSN5or-Py2GPRWHkWpGRh7UCi1W0er7WLEHz8fYLk-A,742
 nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=pmLHuXsaqvcdYxB4PSW9l1mbQWZZBJFhi_CGabqydas,1947
@@ -105,8 +105,8 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
 nkululeko/utils/stats.py,sha256=1yUq0FTOyqkU8TwUocJRYdJaqMU5SlOBBRUun9STo2M,2829
 nkululeko/utils/util.py,sha256=ILpfNuaeq-hy1bUkRhVrzO2wG9z9Upaozs9EBoIaMG0,14123
-nkululeko-0.86.5.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.86.5.dist-info/METADATA,sha256=HrTVTfGh3KDsmyBFijAp5tMINdiBvHhsC8E0_YwBjwE,37848
-nkululeko-0.86.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-nkululeko-0.86.5.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
-nkululeko-0.86.5.dist-info/RECORD,,
+nkululeko-0.86.7.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.86.7.dist-info/METADATA,sha256=t5cI43YRp3qmyJj03ACfgCbKoAuLYImDCLS1QkYbMQM,38024
+nkululeko-0.86.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+nkululeko-0.86.7.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
+nkululeko-0.86.7.dist-info/RECORD,,

{nkululeko-0.86.5.dist-info → nkululeko-0.86.7.dist-info}/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.86.5.dist-info → nkululeko-0.86.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.86.5.dist-info → nkululeko-0.86.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.86.5__py3-none-any.whl → 0.86.7__py3-none-any.whl

nkululeko 0.86.5py3-none-any.whl → 0.86.7py3-none-any.whl