PyPI - nkululeko - Versions diffs - 0.86.8__py3-none-any.whl → 0.88.0__py3-none-any.whl - Mend

nkululeko 0.86.8py3-none-any.whl → 0.88.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

nkululeko/constants.py +1 -1
nkululeko/data/dataset_csv.py +12 -14
nkululeko/demo.py +7 -10
nkululeko/ensemble.py +158 -0
nkululeko/feat_extract/feats_ast.py +118 -0
nkululeko/feat_extract/feats_wav2vec2.py +2 -4
nkululeko/feat_extract/feats_wavlm.py +7 -4
nkululeko/feature_extractor.py +5 -9
nkululeko/modelrunner.py +5 -5
nkululeko/models/model.py +23 -3
nkululeko/models/model_cnn.py +41 -22
nkululeko/models/model_mlp.py +37 -17
nkululeko/models/model_mlp_regression.py +3 -1
nkululeko/plots.py +25 -37
nkululeko/reporting/reporter.py +69 -6
nkululeko/runmanager.py +8 -11
nkululeko/test_predictor.py +2 -9
nkululeko/utils/stats.py +11 -7
nkululeko/utils/util.py +24 -19
{nkululeko-0.86.8.dist-info → nkululeko-0.88.0.dist-info}/METADATA +22 -1
{nkululeko-0.86.8.dist-info → nkululeko-0.88.0.dist-info}/RECORD +24 -22
{nkululeko-0.86.8.dist-info → nkululeko-0.88.0.dist-info}/WHEEL +1 -1
{nkululeko-0.86.8.dist-info → nkululeko-0.88.0.dist-info}/LICENSE +0 -0
{nkululeko-0.86.8.dist-info → nkululeko-0.88.0.dist-info}/top_level.txt +0 -0

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.86.8"
+VERSION="0.88.0"
 SAMPLING_RATE = 16000

nkululeko/data/dataset_csv.py CHANGED Viewed

@@ -23,6 +23,9 @@ class Dataset_CSV(Dataset):
         root = os.path.dirname(data_file)
         audio_path = self.util.config_val_data(self.name, "audio_path", "./")
         df = pd.read_csv(data_file)
+        # trim all string values
+        df_obj = df.select_dtypes("object")
+        df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
         # special treatment for segmented dataframes with only one column:
         if "start" in df.columns and len(df.columns) == 4:
             index = audformat.segmented_index(
@@ -49,8 +52,7 @@ class Dataset_CSV(Dataset):
                     .map(lambda x: root + "/" + audio_path + "/" + x)
                     .values
                 )
-                df = df.set_index(df.index.set_levels(
-                    file_index, level="file"))
+                df = df.set_index(df.index.set_levels(file_index, level="file"))
             else:
                 if not isinstance(df, pd.DataFrame):
                     df = pd.DataFrame(df)
@@ -59,27 +61,24 @@ class Dataset_CSV(Dataset):
                         lambda x: root + "/" + audio_path + "/" + x
                     )
                 )
-        else: # absolute path is True
+        else:  # absolute path is True
             if audformat.index_type(df.index) == "segmented":
                 file_index = (
-                    df.index.levels[0]
-                    .map(lambda x: audio_path + "/" + x)
-                    .values
+                    df.index.levels[0].map(lambda x: audio_path + "/" + x).values
                 )
-                df = df.set_index(df.index.set_levels(
-                    file_index, level="file"))
+                df = df.set_index(df.index.set_levels(file_index, level="file"))
             else:
                 if not isinstance(df, pd.DataFrame):
                     df = pd.DataFrame(df)
-                df = df.set_index(df.index.to_series().apply(
-                    lambda x: audio_path + "/" + x ))
+                df = df.set_index(
+                    df.index.to_series().apply(lambda x: audio_path + "/" + x)
+                )
         self.df = df
         self.db = None
         self.got_target = True
         self.is_labeled = self.got_target
-        self.start_fresh = eval(
-            self.util.config_val("DATA", "no_reuse", "False"))
+        self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
         is_index = False
         try:
             if self.is_labeled and not "class_label" in self.df.columns:
@@ -106,8 +105,7 @@ class Dataset_CSV(Dataset):
                 f" {self.got_gender}, got age: {self.got_age}"
             )
         self.util.debug(r_string)
-        glob_conf.report.add_item(ReportItem(
-            "Data", "Loaded report", r_string))
+        glob_conf.report.add_item(ReportItem("Data", "Loaded report", r_string))
     def prepare(self):
         super().prepare()

nkululeko/demo.py CHANGED Viewed

@@ -20,20 +20,19 @@ Options:   \n
 import argparse
 import configparser
 import os
 import pandas as pd
+from transformers import pipeline
+import nkululeko.glob_conf as glob_conf
 from nkululeko.constants import VERSION
 from nkululeko.experiment import Experiment
-import nkululeko.glob_conf as glob_conf
 from nkululeko.utils.util import Util
-from transformers import pipeline
 def main(src_dir):
-    parser = argparse.ArgumentParser(
-        description="Call the nkululeko DEMO framework.")
-    parser.add_argument("--config", default="exp.ini",
-                        help="The base configuration")
+    parser = argparse.ArgumentParser(description="Call the nkululeko DEMO framework.")
+    parser.add_argument("--config", default="exp.ini", help="The base configuration")
     parser.add_argument(
         "--file", help="A file that should be processed (16kHz mono wav)"
     )
@@ -84,8 +83,7 @@ def main(src_dir):
     )
     def print_pipe(files, outfile):
-        """
-        Prints the pipeline output for a list of files, and optionally writes the results to an output file.
+        """Prints the pipeline output for a list of files, and optionally writes the results to an output file.
         Args:
             files (list): A list of file paths to process through the pipeline.
@@ -108,8 +106,7 @@ def main(src_dir):
                 f.write("\n".join(results))
     if util.get_model_type() == "finetune":
-        model_path = os.path.join(
-            util.get_exp_dir(), "models", "run_0", "torch")
+        model_path = os.path.join(util.get_exp_dir(), "models", "run_0", "torch")
         pipe = pipeline("audio-classification", model=model_path)
         if args.file is not None:
             print_pipe([args.file], args.outfile)

nkululeko/ensemble.py ADDED Viewed

@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import configparser
+import time
+from argparse import ArgumentParser
+from pathlib import Path
+import pandas as pd
+from nkululeko.constants import VERSION
+from nkululeko.experiment import Experiment
+from nkululeko.utils.util import Util
+def ensemble_predictions(config_files, method, no_labels):
+    """
+    Ensemble predictions from multiple experiments.
+    Args:
+        config_files (list): List of configuration file paths.
+        method (str): Ensemble method to use. Options are 'majority_voting', 'mean', 'max', or 'sum'.
+        no_labels (bool): Flag indicating whether the predictions have labels or not.
+    Returns:
+        pandas.DataFrame: The ensemble predictions.
+    Raises:
+        ValueError: If an unknown ensemble method is provided.
+        AssertionError: If the number of config files is less than 2 for majority voting.
+    """
+    ensemble_preds = []
+    # labels = []
+    for config_file in config_files:
+        if no_labels:
+            # for ensembling results from Nkululeko.demo
+            pred = pd.read_csv(config_file)
+            labels = pred.columns[1:-2]
+        else:
+            # for ensembling results from Nkululeko.nkululeko
+            config = configparser.ConfigParser()
+            config.read(config_file)
+            expr = Experiment(config)
+            module = "ensemble"
+            expr.set_module(module)
+            util = Util(module, has_config=True)
+            util.debug(
+                f"running {expr.name} from config {config_file}, nkululeko version"
+                f" {VERSION}"
+            )
+            # get labels
+            labels = expr.util.get_labels()
+            # load the experiment
+            # get CSV files of predictions
+            pred = expr.util.get_pred_name()
+            print(f"Loading predictions from {pred}")
+            preds = pd.read_csv(pred)
+        ensemble_preds.append(preds)
+    # pd concate
+    ensemble_preds = pd.concat(ensemble_preds, axis=1)
+    if method == "majority_voting":
+        # majority voting, get mode, works for odd number of models
+        # raise error when number of configs only two:
+        assert (
+            len(config_files) > 2
+        ), "Majority voting only works for more than two models"
+        ensemble_preds["predicted"] = ensemble_preds.mode(axis=1)[0]
+    elif method == "mean":
+        for label in labels:
+            ensemble_preds[label] = ensemble_preds[label].mean(axis=1)
+    elif method == "max":
+        for label in labels:
+            ensemble_preds[label] = ensemble_preds[label].max(axis=1)
+            # get max value from all labels to inver that labels
+    elif method == "sum":
+        for label in labels:
+            ensemble_preds[label] = ensemble_preds[label].sum(axis=1)
+    else:
+        raise ValueError(f"Unknown ensemble method: {method}")
+    # get the highest value from all labels to inver that labels
+    # replace the old first predicted column
+    ensemble_preds["predicted"] = ensemble_preds[labels].idxmax(axis=1)
+    if no_labels:
+        return ensemble_preds
+    # Drop start, end columns
+    ensemble_preds = ensemble_preds.drop(columns=["start", "end"])
+    # Drop other column except until truth
+    ensemble_preds = ensemble_preds.iloc[:, : len(labels) + 3]
+    # calculate UAR from predicted and truth columns
+    truth = ensemble_preds["truth"]
+    predicted = ensemble_preds["predicted"]
+    uar = (truth == predicted).mean()
+    Util("ensemble").debug(f"UAR: {uar:.3f}")
+    # only return until 'predicted' column
+    return ensemble_preds
+def main(src_dir):
+    parser = ArgumentParser()
+    parser.add_argument(
+        "configs",
+        nargs="+",
+        help="Paths to the configuration files of the experiments to ensemble. \
+             Can be INI files for Nkululeko.nkululeo or CSV files from Nkululeko.demo.",
+    )
+    parser.add_argument(
+        "--method",
+        default="majority_voting",
+        choices=["majority_voting", "mean", "max", "sum"],
+        help="Ensemble method to use (default: majority_voting)",
+    )
+    parser.add_argument(
+        "--outfile",
+        default="ensemble_result.csv",
+        help="Output file path for the ensemble predictions (default: ensemble_predictions.csv)",
+    )
+    # add argument if true label is not available
+    parser.add_argument(
+        "--no_labels",
+        action="store_true",
+        help="True if true labels are not available. For Nkululeko.demo results.",
+    )
+    args = parser.parse_args()
+    start = time.time()
+    ensemble_preds = ensemble_predictions(args.configs, args.method, args.no_labels)
+    # save to csv
+    ensemble_preds.to_csv(args.outfile, index=False)
+    print(f"Ensemble predictions saved to: {args.outfile}")
+    print(f"Ensemble done, used {time.time()-start:.2f} seconds")
+    print("DONE")
+if __name__ == "__main__":
+    cwd = Path(__file__).parent
+    main(cwd)

nkululeko/feat_extract/feats_ast.py ADDED Viewed

@@ -0,0 +1,118 @@
+# feats_ast.py
+import os
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn.functional as F
+import torchaudio
+from tqdm import tqdm
+from transformers import AutoProcessor, ASTModel
+import nkululeko.glob_conf as glob_conf
+from nkululeko.feat_extract.featureset import Featureset
+class Ast(Featureset):
+    """Class to extract AST (Audio Spectrogram Transformer) embeddings"""
+    def __init__(self, name, data_df, feat_type):
+        super().__init__(name, data_df, feat_type)
+        cuda = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = self.util.config_val("MODEL", "device", cuda)
+        self.model_initialized = False
+        self.feat_type = feat_type
+    def init_model(self):
+        self.util.debug("loading AST model...")
+        model_path = self.util.config_val(
+            "FEATS", "ast.model", "MIT/ast-finetuned-audioset-10-10-0.4593"
+        )
+        self.processor = AutoProcessor.from_pretrained(model_path)
+        self.model = ASTModel.from_pretrained(model_path).to(self.device)
+        print(f"initialized AST model on {self.device}")
+        self.model.eval()
+        self.model_initialized = True
+    def extract(self):
+        """Extract the features or load them from disk if present."""
+        store = self.util.get_path("store")
+        storage = f"{store}{self.name}.pkl"
+        extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
+        no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
+        if extract or no_reuse or not os.path.isfile(storage):
+            if not self.model_initialized:
+                self.init_model()
+            self.util.debug("extracting wavlm embeddings, this might take a while...")
+            emb_series = pd.Series(index=self.data_df.index, dtype=object)
+            length = len(self.data_df.index)
+            for idx, (file, start, end) in enumerate(
+                tqdm(self.data_df.index.to_list())
+            ):
+                signal, sampling_rate = torchaudio.load(
+                    file,
+                    frame_offset=int(start.total_seconds() * 16000),
+                    num_frames=int((end - start).total_seconds() * 16000),
+                )
+                # make mono if stereo
+                if signal.shape[0] == 2:
+                    signal = torch.mean(signal, dim=0, keepdim=True)
+                assert (
+                    sampling_rate == 16000
+                ), f"sampling rate should be 16000 but is {sampling_rate}"
+                emb = self.get_embeddings(signal, sampling_rate, file)
+                emb_series.iloc[idx] = emb
+            self.df = pd.DataFrame(emb_series.values.tolist(), index=self.data_df.index)
+            self.df.to_pickle(storage)
+            try:
+                glob_conf.config["DATA"]["needs_feature_extraction"] = "false"
+            except KeyError:
+                pass
+        else:
+            self.util.debug(f"reusing extracted {self.feat_type} embeddings")
+            self.df = pd.read_pickle(storage)
+            if self.df.isnull().values.any():
+                # nanrows = self.df.columns[self.df.isna().any()].tolist()
+                # print(nanrows)
+                self.util.error(
+                    f"got nan: {self.df.shape} {self.df.isnull().sum().sum()}"
+                )
+    def get_embeddings(self, signal, sampling_rate, file):
+        """Extract embeddings from raw audio signal."""
+        try:
+            inputs = self.processor(signal.numpy(), sampling_rate=sampling_rate, return_tensors="pt")
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            with torch.no_grad():
+                # Get the hidden states
+                outputs = self.model(**inputs)
+            # Get the hidden states from the last layer
+            last_hidden_state = outputs.last_hidden_state
+            # print(f"last_hidden_state shape: {last_hidden_state.shape}")
+            # Average pooling over the time dimension
+            embeddings = torch.mean(last_hidden_state, dim=1)
+            embeddings = embeddings.cpu().numpy()
+            # convert the same from (768,) to (1, 768)
+            # embeddings = embeddings.reshape(1, -1)
+            print(f"hs shape: {embeddings.shape}")
+        except Exception as e:
+            self.util.error(f"Error extracting embeddings for file {file}: {str(e)}, fill with")
+            return np.zeros(
+                self.model.config.hidden_size
+            )  # Return zero vector on error
+        return embeddings.ravel()
+    def extract_sample(self, signal, sr):
+        self.init_model()
+        feats = self.get_embeddings(signal, sr, "no file")
+        return feats

nkululeko/feat_extract/feats_wav2vec2.py CHANGED Viewed

@@ -47,9 +47,7 @@ class Wav2vec2(Featureset):
         config.num_hidden_layers = layer_num - hidden_layer
         self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
         self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
-        self.model = Wav2Vec2Model.from_pretrained(model_path, config=config).to(
-            self.device
-        )
+        self.model = Wav2Vec2Model.from_pretrained(model_path, config=config).to(self.device)
         print(f"intialized Wav2vec model on {self.device}")
         self.model.eval()
         self.model_initialized = True
@@ -90,7 +88,7 @@ class Wav2vec2(Featureset):
             self.util.debug("reusing extracted wav2vec2 embeddings")
             self.df = pd.read_pickle(storage)
             if self.df.isnull().values.any():
-                nanrows = self.df.columns[self.df.isna().any()].tolist()
+                # nanrows = self.df.columns[self.df.isna().any()].tolist()
                 # print(nanrows)
                 self.util.error(
                     f"got nan: {self.df.shape} {self.df.isnull().sum().sum()}"

nkululeko/feat_extract/feats_wavlm.py CHANGED Viewed

@@ -79,8 +79,8 @@ class Wavlm(Featureset):
             self.util.debug(f"reusing extracted {self.feat_type} embeddings")
             self.df = pd.read_pickle(storage)
             if self.df.isnull().values.any():
-                nanrows = self.df.columns[self.df.isna().any()].tolist()
-                print(nanrows)
+                # nanrows = self.df.columns[self.df.isna().any()].tolist()
+                # print(nanrows)
                 self.util.error(
                     f"got nan: {self.df.shape} {self.df.isnull().sum().sum()}"
                 )
@@ -104,11 +104,14 @@ class Wavlm(Featureset):
                 # pool result and convert to numpy
                 y = torch.mean(y, dim=1)
                 y = y.detach().cpu().numpy()
+                # print(f"hs shape: {y.shape}")
         except RuntimeError as re:
             print(str(re))
-            self.util.error(f"couldn't extract file: {file}")
+            self.util.error(f"Couldn't extract file: {file}")
-        return y.flatten()
+        return y.ravel()
     def extract_sample(self, signal, sr):
         self.init_model()

nkululeko/feature_extractor.py CHANGED Viewed

@@ -39,12 +39,10 @@ class FeatureExtractor:
         self.feats = pd.DataFrame()
         for feats_type in self.feats_types:
             store_name = f"{self.data_name}_{feats_type}"
-            self.feat_extractor = self._get_feat_extractor(
-                store_name, feats_type)
+            self.feat_extractor = self._get_feat_extractor(store_name, feats_type)
             self.feat_extractor.extract()
             self.feat_extractor.filter()
-            self.feats = pd.concat(
-                [self.feats, self.feat_extractor.df], axis=1)
+            self.feats = pd.concat([self.feats, self.feat_extractor.df], axis=1)
         return self.feats
     def extract_sample(self, signal, sr):
@@ -77,7 +75,7 @@ class FeatureExtractor:
             return TRILLset
         elif feats_type.startswith(
-            ("wav2vec2", "hubert", "wavlm", "spkrec", "whisper")
+            ("wav2vec2", "hubert", "wavlm", "spkrec", "whisper", "ast")
         ):
             return self._get_feat_extractor_by_prefix(feats_type)
@@ -107,15 +105,13 @@ class FeatureExtractor:
         prefix, _, ext = feats_type.partition("-")
         from importlib import import_module
-        module = import_module(
-            f"nkululeko.feat_extract.feats_{prefix.lower()}")
+        module = import_module(f"nkululeko.feat_extract.feats_{prefix.lower()}")
         class_name = f"{prefix.capitalize()}"
         return getattr(module, class_name)
     def _get_feat_extractor_by_name(self, feats_type):
         from importlib import import_module
-        module = import_module(
-            f"nkululeko.feat_extract.feats_{feats_type.lower()}")
+        module = import_module(f"nkululeko.feat_extract.feats_{feats_type.lower()}")
         class_name = f"{feats_type.capitalize()}Set"
         return getattr(module, class_name)

nkululeko/modelrunner.py CHANGED Viewed

@@ -85,7 +85,7 @@ class Modelrunner:
                     f"run: {self.run} epoch: {epoch}: result: {test_score_metric}"
                 )
                 # print(f"performance: {performance.split(' ')[1]}")
-                performance = float(test_score_metric.split(' ')[1])
+                performance = float(test_score_metric.split(" ")[1])
                 if performance > self.best_performance:
                     self.best_performance = performance
                     self.best_epoch = epoch
@@ -204,15 +204,15 @@ class Modelrunner:
                 self.df_train, self.df_test, self.feats_train, self.feats_test
             )
         elif model_type == "cnn":
-            from nkululeko.models.model_cnn import CNN_model
+            from nkululeko.models.model_cnn import CNNModel
-            self.model = CNN_model(
+            self.model = CNNModel(
                 self.df_train, self.df_test, self.feats_train, self.feats_test
             )
         elif model_type == "mlp":
-            from nkululeko.models.model_mlp import MLP_model
+            from nkululeko.models.model_mlp import MLPModel
-            self.model = MLP_model(
+            self.model = MLPModel(
                 self.df_train, self.df_test, self.feats_train, self.feats_test
             )
         elif model_type == "mlp_reg":

nkululeko/models/model.py CHANGED Viewed

@@ -247,8 +247,25 @@ class Model:
                 self.clf.fit(feats, labels)
     def get_predictions(self):
-        predictions = self.clf.predict(self.feats_test.to_numpy())
-        return predictions
+        #        predictions = self.clf.predict(self.feats_test.to_numpy())
+        if self.util.exp_is_classification():
+            # make a dataframe for the class probabilities
+            proba_d = {}
+            for c in self.clf.classes_:
+                proba_d[c] = []
+            # get the class probabilities
+            predictions = self.clf.predict_proba(self.feats_test.to_numpy())
+            # pred = self.clf.predict(features)
+            for i, c in enumerate(self.clf.classes_):
+                proba_d[c] = list(predictions.T[i])
+            probas = pd.DataFrame(proba_d)
+            probas = probas.set_index(self.feats_test.index)
+            predictions = probas.idxmax(axis=1).values
+        else:
+            predictions = self.clf.predict(self.feats_test.to_numpy())
+            probas = None
+        return predictions, probas
     def predict(self):
         if self.feats_test.isna().to_numpy().any():
@@ -263,13 +280,16 @@ class Model:
             )
             return report
         """Predict the whole eval feature set"""
-        predictions = self.get_predictions()
+        predictions, probas = self.get_predictions()
         report = Reporter(
             self.df_test[self.target].to_numpy().astype(float),
             predictions,
             self.run,
             self.epoch,
+            probas=probas,
         )
+        report.print_probabilities()
         return report
     def get_type(self):

nkululeko/models/model_cnn.py CHANGED Viewed

@@ -5,33 +5,40 @@ Inspired by code from Su Lei
 """
+import ast
+from collections import OrderedDict
+import numpy as np
+import pandas as pd
+from PIL import Image
+from sklearn.metrics import recall_score
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import torchvision
-import torchvision.transforms as transforms
 from torch.utils.data import Dataset
-import ast
-import numpy as np
-from sklearn.metrics import recall_score
-from collections import OrderedDict
-from PIL import Image
-from traitlets import default
+import torchvision.transforms as transforms
-from nkululeko.utils.util import Util
 import nkululeko.glob_conf as glob_conf
+from nkululeko.losses.loss_softf1loss import SoftF1Loss
 from nkululeko.models.model import Model
 from nkululeko.reporting.reporter import Reporter
-from nkululeko.losses.loss_softf1loss import SoftF1Loss
+from nkululeko.utils.util import Util
-class CNN_model(Model):
-    """CNN = convolutional neural net"""
+class CNNModel(Model):
+    """CNN = convolutional neural net."""
     is_classifier = True
     def __init__(self, df_train, df_test, feats_train, feats_test):
-        """Constructor taking the configuration and all dataframes"""
+        """Constructor, taking all dataframes.
+        Args:
+            df_train (pd.DataFrame): The train labels.
+            df_test (pd.DataFrame): The test labels.
+            feats_train (pd.DataFrame): The train features.
+            feats_test (pd.DataFrame): The test features.
+        """
         super().__init__(df_train, df_test, feats_train, feats_test)
         super().set_model_type("ann")
         self.name = "cnn"
@@ -147,7 +154,20 @@ class CNN_model(Model):
             self.optimizer.step()
         self.loss = (np.asarray(losses)).mean()
-    def evaluate_model(self, model, loader, device):
+    def get_probas(self, logits):
+        # make a dataframe for probabilites (logits)
+        proba_d = {}
+        classes = self.df_test[self.target].unique()
+        classes.sort()
+        for c in classes:
+            proba_d[c] = []
+        for i, c in enumerate(classes):
+            proba_d[c] = list(logits.numpy().T[i])
+        probas = pd.DataFrame(proba_d)
+        probas = probas.set_index(self.df_test.index)
+        return probas
+    def evaluate(self, model, loader, device):
         logits = torch.zeros(len(loader.dataset), self.class_num)
         targets = torch.zeros(len(loader.dataset))
         model.eval()
@@ -169,14 +189,15 @@ class CNN_model(Model):
         self.loss_eval = (np.asarray(losses)).mean()
         predictions = logits.argmax(dim=1)
         uar = recall_score(targets.numpy(), predictions.numpy(), average="macro")
-        return uar, targets, predictions
+        return uar, targets, predictions, logits
     def predict(self):
-        _, truths, predictions = self.evaluate_model(
+        _, truths, predictions, logits = self.evaluate(
             self.model, self.testloader, self.device
         )
-        uar, _, _ = self.evaluate_model(self.model, self.trainloader, self.device)
-        report = Reporter(truths, predictions, self.run, self.epoch)
+        uar, _, _, _ = self.evaluate(self.model, self.trainloader, self.device)
+        probas = self.get_probas(logits)
+        report = Reporter(truths, predictions, self.run, self.epoch, probas=probas)
         try:
             report.result.loss = self.loss
         except AttributeError:  # if the model was loaded from disk the loss is unknown
@@ -189,13 +210,11 @@ class CNN_model(Model):
         return report
     def get_predictions(self):
-        _, truths, predictions = self.evaluate_model(
-            self.model, self.testloader, self.device
-        )
+        _, _, predictions, _ = self.evaluate(self.model, self.testloader, self.device)
         return predictions.numpy()
     def predict_sample(self, features):
-        """Predict one sample"""
+        """Predict one sample."""
         with torch.no_grad():
             logits = self.model(torch.from_numpy(features).to(self.device))
         a = logits.numpy()

nkululeko 0.86.8__py3-none-any.whl → 0.88.0__py3-none-any.whl

nkululeko 0.86.8py3-none-any.whl → 0.88.0py3-none-any.whl