PyPI - pg-sui - Versions diffs - 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl - Mend

pg-sui 0.2.0py3-none-any.whl → 1.6.14.dev9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

{pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/METADATA +101 -79
pg_sui-1.6.14.dev9.dist-info/RECORD +81 -0
{pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/WHEEL +1 -1
pg_sui-1.6.14.dev9.dist-info/entry_points.txt +4 -0
{pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info/licenses}/LICENSE +0 -0
pg_sui-1.6.14.dev9.dist-info/top_level.txt +1 -0
pgsui/__init__.py +35 -54
pgsui/_version.py +34 -0
pgsui/cli.py +909 -0
pgsui/data_processing/__init__.py +0 -0
pgsui/data_processing/config.py +565 -0
pgsui/data_processing/containers.py +1424 -0
pgsui/data_processing/transformers.py +557 -907
pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
pgsui/electron/app/__main__.py +5 -0
pgsui/electron/app/extra-resources/.gitkeep +1 -0
pgsui/electron/app/icons/icons/1024x1024.png +0 -0
pgsui/electron/app/icons/icons/128x128.png +0 -0
pgsui/electron/app/icons/icons/16x16.png +0 -0
pgsui/electron/app/icons/icons/24x24.png +0 -0
pgsui/electron/app/icons/icons/256x256.png +0 -0
pgsui/electron/app/icons/icons/32x32.png +0 -0
pgsui/electron/app/icons/icons/48x48.png +0 -0
pgsui/electron/app/icons/icons/512x512.png +0 -0
pgsui/electron/app/icons/icons/64x64.png +0 -0
pgsui/electron/app/icons/icons/icon.icns +0 -0
pgsui/electron/app/icons/icons/icon.ico +0 -0
pgsui/electron/app/main.js +227 -0
pgsui/electron/app/package-lock.json +6894 -0
pgsui/electron/app/package.json +51 -0
pgsui/electron/app/preload.js +15 -0
pgsui/electron/app/server.py +157 -0
pgsui/electron/app/ui/logo.png +0 -0
pgsui/electron/app/ui/renderer.js +131 -0
pgsui/electron/app/ui/styles.css +59 -0
pgsui/electron/app/ui/ui_shim.js +72 -0
pgsui/electron/bootstrap.py +43 -0
pgsui/electron/launch.py +57 -0
pgsui/electron/package.json +14 -0
pgsui/example_data/__init__.py +0 -0
pgsui/example_data/phylip_files/__init__.py +0 -0
pgsui/example_data/phylip_files/test.phy +0 -0
pgsui/example_data/popmaps/__init__.py +0 -0
pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
pgsui/example_data/structure_files/__init__.py +0 -0
pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
pgsui/impute/__init__.py +0 -0
pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
pgsui/impute/deterministic/imputers/mode.py +844 -0
pgsui/impute/deterministic/imputers/nmf.py +221 -0
pgsui/impute/deterministic/imputers/phylo.py +973 -0
pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
pgsui/impute/supervised/__init__.py +0 -0
pgsui/impute/supervised/base.py +343 -0
pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
pgsui/impute/supervised/imputers/random_forest.py +291 -0
pgsui/impute/unsupervised/__init__.py +0 -0
pgsui/impute/unsupervised/base.py +1118 -0
pgsui/impute/unsupervised/callbacks.py +92 -262
{simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
pgsui/impute/unsupervised/imputers/autoencoder.py +1285 -0
pgsui/impute/unsupervised/imputers/nlpca.py +1554 -0
pgsui/impute/unsupervised/imputers/ubp.py +1575 -0
pgsui/impute/unsupervised/imputers/vae.py +1228 -0
pgsui/impute/unsupervised/loss_functions.py +261 -0
pgsui/impute/unsupervised/models/__init__.py +0 -0
pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
pgsui/impute/unsupervised/models/vae_model.py +269 -630
pgsui/impute/unsupervised/nn_scorers.py +255 -0
pgsui/utils/__init__.py +0 -0
pgsui/utils/classification_viz.py +608 -0
pgsui/utils/logging_utils.py +22 -0
pgsui/utils/misc.py +35 -480
pgsui/utils/plotting.py +996 -829
pgsui/utils/pretty_metrics.py +290 -0
pgsui/utils/scorers.py +213 -666
pg_sui-0.2.0.dist-info/RECORD +0 -75
pg_sui-0.2.0.dist-info/top_level.txt +0 -3
pgsui/example_data/phylip_files/test_n10.phy +0 -118
pgsui/example_data/phylip_files/test_n100.phy +0 -118
pgsui/example_data/phylip_files/test_n2.phy +0 -118
pgsui/example_data/phylip_files/test_n500.phy +0 -118
pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
pgsui/example_data/trees/test.iqtree +0 -376
pgsui/example_data/trees/test.qmat +0 -5
pgsui/example_data/trees/test.rate +0 -2033
pgsui/example_data/trees/test.tre +0 -1
pgsui/example_data/trees/test_n10.rate +0 -19
pgsui/example_data/trees/test_n100.rate +0 -109
pgsui/example_data/trees/test_n500.rate +0 -509
pgsui/example_data/trees/test_siterates.txt +0 -2024
pgsui/example_data/trees/test_siterates_n10.txt +0 -10
pgsui/example_data/trees/test_siterates_n100.txt +0 -100
pgsui/example_data/trees/test_siterates_n500.txt +0 -500
pgsui/example_data/vcf_files/test.vcf +0 -244
pgsui/example_data/vcf_files/test.vcf.gz +0 -0
pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
pgsui/impute/estimators.py +0 -1268
pgsui/impute/impute.py +0 -1463
pgsui/impute/simple_imputers.py +0 -1431
pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
pgsui/impute/unsupervised/keras_classifiers.py +0 -697
pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
pgsui/pg_sui.py +0 -261
pgsui/utils/sequence_tools.py +0 -407
simulation/sim_benchmarks.py +0 -333
simulation/sim_treeparams.py +0 -475
test/__init__.py +0 -0
test/pg_sui_simtest.py +0 -215
test/pg_sui_testing.py +0 -523
test/test.py +0 -151
test/test_pgsui.py +0 -374
test/test_tkc.py +0 -185

pgsui/utils/scorers.py CHANGED Viewed

@@ -1,750 +1,297 @@
-import sys
+from typing import Dict, Literal
 import numpy as np
 from sklearn.metrics import (
-    roc_curve,
-    auc,
     accuracy_score,
-    hamming_loss,
-    make_scorer,
-    precision_recall_curve,
     average_precision_score,
-    multilabel_confusion_matrix,
     f1_score,
+    precision_score,
+    recall_score,
+    roc_auc_score,
 )
 from sklearn.preprocessing import label_binarize
+from snpio.utils.logging import LoggerManager
+from torch import Tensor
-try:
-    from ..impute.unsupervised.neural_network_methods import (
-        NeuralNetworkMethods,
-    )
-except (ModuleNotFoundError, ValueError, ImportError):
-    from impute.unsupervised.neural_network_methods import NeuralNetworkMethods
-class Scorers:
-    @staticmethod
-    def compute_roc_auc_micro_macro(
-        y_true, y_pred, num_classes=3, binarize_pred=True
-    ):
-        """Compute ROC curve with AUC scores.
-        ROC (Receiver Operating Characteristic) curves and AUC (area under curve) scores are computed per-class and for micro and macro averages.
-        Args:
-            y_true (numpy.ndarray): Ravelled numpy array of shape (n_samples * n_features,). y_true should be integer-encoded.
-            y_pred (numpy.ndarray): Ravelled numpy array of shape (n_samples * n_features,). y_pred should be probabilities.
-            num_classes (int, optional): How many classes to use. Defaults to 3.
-            binarize_pred (bool, optional): Whether to binarize y_pred. If False, y_pred should be probabilities of each class. Defaults to True.
-        Returns:
-            Dict[str, Any]: Dictionary with true and false positive rates along probability threshold curve per class, micro and macro tpr and fpr curves averaged across classes, and AUC scores per-class and for micro and macro averages.
-        """
-        cats = range(num_classes)
-        # Get only classes that appear in y_true.
-        classes = [i for i in cats if i in y_true]
-        # Binarize the output for use with ROC-AUC.
-        y_true_bin = label_binarize(y_true, classes=cats)
-        if binarize_pred:
-            y_pred_bin = label_binarize(y_pred, classes=cats)
-        else:
-            y_pred_bin = y_pred
-        for i in range(y_true_bin.shape[1]):
-            if i not in classes:
-                y_true_bin = np.delete(y_true_bin, i, axis=-1)
-                y_pred_bin = np.delete(y_pred_bin, i, axis=-1)
-        n_classes = len(classes)
-        # Compute ROC curve and ROC area for each class.
-        fpr = dict()
-        tpr = dict()
-        roc_auc = dict()
-        for i, c in enumerate(classes):
-            fpr[c], tpr[c], _ = roc_curve(y_true_bin[:, i], y_pred_bin[:, i])
-            roc_auc[c] = auc(fpr[c], tpr[c])
-        # Compute micro-average ROC curve and ROC area.
-        fpr["micro"], tpr["micro"], _ = roc_curve(
-            y_true_bin.ravel(), y_pred_bin.ravel()
-        )
-        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
-        # Aggregate all false positive rates
-        all_fpr = np.unique(np.concatenate([fpr[i] for i in classes]))
+from pgsui.utils.logging_utils import configure_logger
+from pgsui.utils.misc import validate_input_type
-        # Then interpolate all ROC curves at these points.
-        mean_tpr = np.zeros_like(all_fpr)
-        for i in classes:
-            mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
-        # Finally, average it and compute AUC.
-        mean_tpr /= n_classes
+class Scorer:
+    """Class for evaluating the performance of a model using various metrics.
-        fpr["macro"] = all_fpr
-        tpr["macro"] = mean_tpr
+    This class is used to evaluate the performance of a model using various metrics, such as accuracy, F1 score, precision, recall, average precision, and ROC AUC. The class can be used to evaluate the performance of a model on a dataset with ground truth labels. The class can also be used to evaluate the performance of a model in objective mode for hyperparameter tuning.
+    """
-        roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
+    def __init__(
+        self,
+        prefix: str,
+        average: Literal["micro", "macro", "weighted"] = "macro",
+        verbose: bool = False,
+        debug: bool = False,
+    ) -> None:
+        """Initialize a Scorer object.
-        roc_auc["fpr_macro"] = fpr["macro"]
-        roc_auc["tpr_macro"] = tpr["macro"]
-        roc_auc["fpr_micro"] = fpr["micro"]
-        roc_auc["tpr_micro"] = tpr["micro"]
-        for i in classes:
-            roc_auc[f"fpr_{i}"] = fpr[i]
-            roc_auc[f"tpr_{i}"] = tpr[i]
-        return roc_auc
-    @staticmethod
-    def compute_pr(y_true, y_pred, use_int_encodings=False, num_classes=3):
-        """Compute precision-recall curve with Average Precision scores.
-        PR and AP scores are computed per-class and for micro and macro averages.
+        This class is used to evaluate the performance of a model using various metrics, such as accuracy, F1 score, precision, recall, average precision, and ROC AUC. The class can be used to evaluate the performance of a model on a dataset with ground truth labels. The class can also be used to evaluate the performance of a model in objective mode for hyperparameter tuning.
         Args:
-            y_true (numpy.ndarray): Ravelled numpy array of shape (n_samples * n_features,).
-            y_pred (numpy.ndarray): Ravelled numpy array of shape (n_samples * n_features,). y_pred should be integer-encoded.
+            prefix (str): Prefix for logging messages.
+            average (Literal["micro", "macro", "weighted"]): Average method for metrics. Must be one of 'micro', 'macro', or 'weighted'.
+            verbose (bool): Verbosity level for logging messages. Default is False.
+            debug (bool): Debug mode for logging messages. Default is False.
-            use_int_encodings (bool, optional): Whether the imputer model is a neural network model. Defaults to False.
-            num_classes (int, optional): How many classes to use. Defaults to 3.
-         Returns:
-            Dict[str, Any]: Dictionary with precision and recall curves per class and micro and macro averaged across classes, plus AP scores per-class and for micro and macro averages.
+        Raises:
+            ValueError: If the average parameter is invalid. Must be one of 'micro', 'macro', or 'weighted'.
         """
-        cats = range(num_classes)
-        is_multiclass = True if num_classes != 4 else False
-        # Get only classes that appear in y_true.
-        classes = [i for i in cats if i in y_true]
-        # Binarize the output for use with ROC-AUC.
-        y_true_bin = label_binarize(y_true, classes=cats)
-        y_pred_proba_bin = y_pred
-        if is_multiclass:
-            for i in range(y_true_bin.shape[1]):
-                if i not in classes:
-                    y_true_bin = np.delete(y_true_bin, i, axis=-1)
-                    y_pred_proba_bin = np.delete(y_pred_proba_bin, i, axis=-1)
-        nn = NeuralNetworkMethods()
-        if use_int_encodings:
-            y_pred_012 = nn.decode_masked(y_true_bin, y_pred_proba_bin)
-            thresh = 0.5
-        else:
-            y_pred_012 = nn.decode_masked(
-                y_true_bin,
-                y_pred_proba_bin,
-                is_multiclass=is_multiclass,
-                return_int=False,
-                return_multilab=True,
-            )
-            encode_func = (
-                nn.encode_multiclass if is_multiclass else nn.encode_multilab
-            )
-            y_true = encode_func(y_true, num_classes=num_classes)
-        # Make confusion matrix to get true negatives and true positives.
-        mcm = multilabel_confusion_matrix(y_true, y_pred_012)
-        tn = np.sum(mcm[:, 0, 0])
-        tn /= num_classes
-        tp = np.sum(mcm[:, 1, 1])
-        tp /= num_classes
-        baseline = tp / (tn + tp)
-        precision = dict()
-        recall = dict()
-        average_precision = dict()
-        for i, c in enumerate(classes):
-            precision[c], recall[c], _ = precision_recall_curve(
-                y_true_bin[:, i], y_pred_proba_bin[:, i]
-            )
-            average_precision[c] = average_precision_score(
-                y_true_bin[:, i], y_pred_proba_bin[:, i]
-            )
-        # A "micro-average": quantifying score on all classes jointly.
-        precision["micro"], recall["micro"], _ = precision_recall_curve(
-            y_true_bin.ravel(), y_pred_proba_bin.ravel()
-        )
-        average_precision["micro"] = average_precision_score(
-            y_true_bin, y_pred_proba_bin, average="micro"
+        logman = LoggerManager(
+            name=__name__, prefix=prefix, debug=debug, verbose=verbose >= 1
         )
-        average_precision["macro"] = average_precision_score(
-            y_true_bin, y_pred_proba_bin, average="macro"
+        self.logger = configure_logger(
+            logman.get_logger(), verbose=verbose >= 1, debug=debug
         )
-        if use_int_encodings:
-            y_pred_012 = (
-                nn.decode_masked(
-                    y_true_bin,
-                    y_pred_proba_bin,
-                    threshold=thresh,
-                    return_multilab=True,
-                    predict_still_missing=False,
-                ),
-            )
-        f1 = f1_score(y_true_bin, y_pred_012, average="macro")
-        # Aggregate all recalls
-        all_recall = np.unique(np.concatenate([recall[i] for i in classes]))
-        # Then interpolate all PR curves at these points.
-        mean_precision = np.zeros_like(all_recall)
-        for i in classes:
-            mean_precision += np.interp(all_recall, precision[i], recall[i])
-        # Finally, average it and compute AUC.
-        mean_precision /= num_classes
-        recall["macro"] = all_recall
-        precision["macro"] = mean_precision
-        results = dict()
-        results["micro"] = average_precision["micro"]
-        results["macro"] = average_precision["macro"]
-        results["f1_score"] = f1
-        results["recall_macro"] = all_recall
-        results["precision_macro"] = mean_precision
-        results["recall_micro"] = recall["micro"]
-        results["precision_micro"] = precision["micro"]
-        for i in classes:
-            results[f"recall_{i}"] = recall[i]
-            results[f"precision_{i}"] = precision[i]
-            results[i] = average_precision[i]
-        results["baseline"] = baseline
+        if average not in {"micro", "macro", "weighted"}:
+            msg = f"Invalid average parameter: {average}. Must be one of 'micro', 'macro', or 'weighted'."
+            self.logger.error(msg)
+            raise ValueError(msg)
-        return results
+        self.average: Literal["micro", "macro", "weighted"] = average
-    @staticmethod
-    def check_if_tuple(y_pred):
-        """Checks if y_pred is a tuple and if so, returns the first element of the tuple."""
-        if isinstance(y_pred, tuple):
-            y_pred = y_pred[0]
-        return y_pred
+    def accuracy(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """Calculate the accuracy of the model.
-    @staticmethod
-    def accuracy_scorer(y_true, y_pred, **kwargs):
-        """Get accuracy score for grid search.
-        If provided, only calculates score where missing_mask is True (i.e., data were missing). This is so that users can simulate missing data for known values, and then the predictions for only those known values can be evaluated.
+        This method calculates the accuracy of the model by comparing the ground truth labels with the predicted labels.
         Args:
-            y_true (numpy.ndarray): 012-encoded true target values.
-            y_pred (tensorflow.EagerTensor): Predictions from model as probabilities. They must first be decoded to use with accuracy_score.
-            kwargs (Any): Keyword arguments to use with scorer. Supported options include ``missing_mask`` and ``testing``\.
+            y_true (np.ndarray): Ground truth labels.
+            y_pred (np.ndarray): Predicted labels.
         Returns:
-            float: Metric score by comparing y_true and y_pred.
+            float: Accuracy score.
         """
-        # Get missing mask if provided.
-        # Otherwise default is all missing values (array all True).
-        missing_mask = kwargs.get("missing_mask")
-        testing = kwargs.get("testing", False)
-        nn_model = kwargs.get("nn_model", True)
-        y_pred = Scorers.check_if_tuple(y_pred)
-        if nn_model:
-            nn = NeuralNetworkMethods()
-        y_true_masked = y_true[missing_mask]
-        y_pred_masked = y_pred[missing_mask]
-        if nn_model:
-            y_pred_masked_decoded = nn.decode_masked(
-                y_true_masked, y_pred_masked, predict_still_missing=False
-            )
-        else:
-            y_pred_masked_decoded = y_pred_masked
+        return float(accuracy_score(y_true, y_pred))
-        acc = accuracy_score(y_true_masked, y_pred_masked_decoded)
+    def f1(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """Calculate the F1 score of the model.
-        if testing:
-            np.set_printoptions(threshold=np.inf)
-            print(y_true_masked)
-            print(y_pred_masked_decoded)
-        return acc
-    @staticmethod
-    def hamming_scorer(y_true, y_pred, **kwargs):
-        """Get Hamming score for grid search.
-        If provided, only calculates score where missing_mask is True (i.e., data were missing). This is so that users can simulate missing data for known values, and then the predictions for only those known values can be evaluated.
+        This method calculates the F1 score of the model by comparing the ground truth labels with the predicted labels.
         Args:
-            y_true (numpy.ndarray): 012-encoded true target values.
-            y_pred (tensorflow.EagerTensor): Predictions from model as probabilities. They must first be decoded to use with hamming_scorer.
-            kwargs (Any): Keyword arguments to use with scorer. Supported options include ``missing_mask`` and ``testing``\.
+            y_true (np.ndarray): Ground truth labels.
+            y_pred (np.ndarray): Predicted labels.
         Returns:
-            float: Metric score by comparing y_true and y_pred.
+            float: F1 score.
         """
-        # Get missing mask if provided.
-        # Otherwise default is all missing values (array all True).
-        missing_mask = kwargs.get("missing_mask")
-        testing = kwargs.get("testing", False)
-        nn_model = kwargs.get("nn_model", True)
-        num_classes = kwargs.get("num_classes", 3)
-        y_pred = Scorers.check_if_tuple(y_pred)
-        if nn_model:
-            nn = NeuralNetworkMethods()
-        y_true_masked = y_true[missing_mask]
-        y_pred_masked = y_pred[missing_mask]
-        if nn_model:
-            y_pred_masked_decoded = nn.decode_masked(
-                y_true_masked,
-                y_pred_masked,
-                predict_still_missing=False,
-            )
-        else:
-            y_pred_masked_decoded = y_pred_masked
-        ham = hamming_loss(y_true_masked, y_pred_masked_decoded)
-        if testing:
-            np.set_printoptions(threshold=np.inf)
-            print(y_true_masked)
-            print(y_pred_masked_decoded)
-        return ham
+        avg: str = self.average
+        return float(f1_score(y_true, y_pred, average=avg, zero_division=0))
-    @staticmethod
-    def auc_macro(y_true, y_pred, **kwargs):
-        """Get AUC score with macro averaging for grid search.
+    def precision(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """Calculate the precision of the model.
-        If provided, only calculates score where missing_mask is True (i.e., data were missing). This is so that users can simulate missing data for known values, and then the predictions for only those known values can be evaluated.
+        This method calculates the precision of the model by comparing the ground truth labels with the predicted labels.
         Args:
-            y_true (numpy.ndarray): 012-encoded true target values.
-            y_pred (tensorflow.EagerTensor): Predictions from model as probabilities.
-            kwargs (Any): Keyword arguments to use with scorer. Supported options include ``missing_mask`` and ``testing``\.
+            y_true (np.ndarray): Ground truth labels.
+            y_pred (np.ndarray): Predicted labels.
         Returns:
-            float: Metric score by comparing y_true and y_pred.
+            float: Precision score.
         """
-        # Get missing mask if provided.
-        # Otherwise default is all missing values (array all True).
-        missing_mask = kwargs.get("missing_mask")
-        num_classes = kwargs.get("num_classes", 3)
-        nn_model = kwargs.get("nn_model", True)
-        testing = kwargs.get("testing", False)
-        is_multiclass = True if num_classes != 4 else False
-        y_pred = Scorers.check_if_tuple(y_pred)
-        if nn_model:
-            nn = NeuralNetworkMethods()
+        avg: str = self.average
+        return float(precision_score(y_true, y_pred, average=avg, zero_division=0))
-        y_true_masked = y_true[missing_mask]
-        y_pred_masked = y_pred[missing_mask]
+    def recall(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
+        """Calculate the recall of the model.
-        if nn_model:
-            y_pred_masked_decoded = nn.decode_masked(
-                y_true_masked, y_pred_masked, is_multiclass=is_multiclass
-            )
-        else:
-            y_pred_masked_decoded = y_pred_masked
-        roc_auc = Scorers.compute_roc_auc_micro_macro(
-            y_true_masked,
-            y_pred_masked,
-            num_classes=num_classes,
-            binarize_pred=False,
-        )
-        return roc_auc["macro"]
-    @staticmethod
-    def auc_micro(y_true, y_pred, **kwargs):
-        """Get AUC score with micro averaging for grid search.
-        If provided, only calculates score where missing_mask is True (i.e., data were missing). This is so that users can simulate missing data for known values, and then the predictions for only those known values can be evaluated.
+        This method calculates the recall of the model by comparing the ground truth labels with the predicted labels.
         Args:
-            y_true (numpy.ndarray): 012-encoded true target values.
-            y_pred (tensorflow.EagerTensor): Predictions from model as probabilities.
-            kwargs (Any): Keyword arguments to use with scorer. Supported options include ``missing_mask`` and ``testing``\.
+            y_true (np.ndarray): Ground truth labels.
+            y_pred (np.ndarray): Predicted labels.
         Returns:
-            float: Metric score by comparing y_true and y_pred.
+            float: Recall score.
         """
-        # Get missing mask if provided.
-        # Otherwise default is all missing values (array all True).
-        missing_mask = kwargs.get("missing_mask")
-        nn_model = kwargs.get("nn_model", True)
-        num_classes = kwargs.get("num_classes", 3)
-        is_multiclass = True if num_classes != 4 else False
+        avg: str = self.average
+        return float(recall_score(y_true, y_pred, average=avg, zero_division=0))
-        y_pred = Scorers.check_if_tuple(y_pred)
+    def roc_auc(self, y_true: np.ndarray, y_pred_proba: np.ndarray) -> float:
+        """Multiclass ROC-AUC with label targets.
-        if nn_model:
-            nn = NeuralNetworkMethods()
-        y_true_masked = y_true[missing_mask]
-        y_pred_masked = y_pred[missing_mask]
-        if nn_model:
-            y_pred_masked_decoded = nn.decode_masked(
-                y_true_masked, y_pred_masked, is_multiclass=is_multiclass
-            )
-        else:
-            y_pred_masked_decoded = y_pred_masked
-        roc_auc = Scorers.compute_roc_auc_micro_macro(
-            y_true_masked,
-            y_pred_masked,
-            num_classes=num_classes,
-            binarize_pred=False,
-        )
-        return roc_auc["micro"]
-    @staticmethod
-    def pr_macro(y_true, y_pred, **kwargs):
-        """Get Precision-Recall score with macro averaging for grid search.
-        If provided, only calculates score where missing_mask is True (i.e., data were missing). This is so that users can simulate missing data for known values, and then the predictions for only those known values can be evaluated.
+        This method calculates the ROC-AUC score for multiclass classification problems. It handles both 1D integer labels and 2D one-hot/indicator matrices for the ground truth labels.
         Args:
-            y_true (numpy.ndarray): 012-encoded true target values.
-            y_pred (tensorflow.EagerTensor): Predictions from model as probabilities.
-            kwargs (Any): Keyword arguments to use with scorer. Supported options include ``missing_mask`` and ``testing``\.
-        Returns:
-            float: Metric score by comparing y_true and y_pred.
+            y_true: 1D integer labels (shape: [n]).
+                    If a one-hot/indicator matrix is supplied, we convert to labels.
+            y_pred_proba: 2D probabilities (shape: [n, n_classes]).
         """
-        # Get missing mask if provided.
-        # Otherwise default is all missing values (array all True).
-        missing_mask = kwargs.get("missing_mask")
-        num_classes = kwargs.get("num_classes", 3)
-        testing = kwargs.get("testing", False)
-        y_pred = Scorers.check_if_tuple(y_pred)
-        y_true_masked = y_true[missing_mask]
-        y_pred_masked = y_pred[missing_mask]
-        pr_ap = Scorers.compute_pr(
-            y_true_masked, y_pred_masked, num_classes=num_classes
+        y_true = np.asarray(y_true)
+        y_pred_proba = np.asarray(y_pred_proba)
+        if y_pred_proba.ndim == 3:
+            y_pred_proba = y_pred_proba.reshape(-1, y_pred_proba.shape[-1])
+        # If user passed indicator/one-hot, convert to labels.
+        if y_true.ndim == 2 and y_true.shape[1] == y_pred_proba.shape[1]:
+            y_true = y_true.argmax(axis=1)
+        # Guard: need >1 class present for AUC
+        if np.unique(y_true).size < 2:
+            return 0.5
+        return float(
+            roc_auc_score(
+                y_true,
+                y_pred_proba,
+                multi_class="ovr",
+                average=self.average,
+            )
         )
-        return pr_ap["macro"]
-    @staticmethod
-    def pr_samples(y_true, y_pred, **kwargs):
-        """Get Precision-Recall score with samples averaging for grid search.
-        If provided, only calculates score where missing_mask is True (i.e., data were missing). This is so that users can simulate missing data for known values, and then the predictions for only those known values can be evaluated.
+    def evaluate(
+        self,
+        y_true: np.ndarray | Tensor | list,
+        y_pred: np.ndarray | Tensor | list,
+        y_true_ohe: np.ndarray | Tensor | list,
+        y_pred_proba: np.ndarray | Tensor | list,
+        objective_mode: bool = False,
+        tune_metric: Literal[
+            "pr_macro",
+            "roc_auc",
+            "average_precision",
+            "accuracy",
+            "f1",
+            "precision",
+            "recall",
+        ] = "pr_macro",
+    ) -> Dict[str, float] | None:
+        """Evaluate the model using various metrics.
+        This method evaluates the performance of a model using various metrics, such as accuracy, F1 score, precision, recall, average precision, and ROC AUC. The method can be used to evaluate the performance of a model on a dataset with ground truth labels. The method can also be used to evaluate the performance of a model in objective mode for hyperparameter tuning.
         Args:
-            y_true (numpy.ndarray): 012-encoded true target values.
-            y_pred (tensorflow.EagerTensor): Predictions from model as probabilities.
-            kwargs (Any): Keyword arguments to use with scorer. Supported options include ``missing_mask`` and ``testing``\.
+            y_true (np.ndarray | torch.Tensor): Ground truth labels.
+            y_pred (np.ndarray | torch.Tensor): Predicted labels.
+            y_true_ohe (np.ndarray | torch.Tensor): One-hot encoded ground truth labels.
+            y_pred_proba (np.ndarray | torch.Tensor): Predicted probabilities.
+            objective_mode (bool): Whether to use objective mode for evaluation. Default is False.
+            tune_metric (Literal["pr_macro", "roc_auc", "average_precision", "accuracy", "f1", "precision", "recall"]): Metric to use for tuning. Ignored if `objective_mode` is False. Default is 'pr_macro'.
         Returns:
-            float: Metric score by comparing y_true and y_pred.
-        """
-        # Get missing mask if provided.
-        # Otherwise default is all missing values (array all True).
-        missing_mask = kwargs.get("missing_mask")
-        num_classes = kwargs.get("num_classes", 3)
-        testing = kwargs.get("testing", False)
-        y_pred = Scorers.check_if_tuple(y_pred)
-        nn = NeuralNetworkMethods()
-        y_true_masked = y_true[missing_mask]
-        y_pred_masked = y_pred[missing_mask]
-        pr_ap = Scorers.compute_pr(
-            y_true_masked, y_pred_masked, num_classes=num_classes
-        )
-        return pr_ap["samples"]
-    @staticmethod
-    def f1_samples(y_true, y_pred, **kwargs):
-        """Get F1 score with samples averaging for grid search.
+            Dict[str, float]: Dictionary of evaluation metrics. Keys are 'accuracy', 'f1', 'precision', 'recall', 'roc_auc', 'average_precision', and 'pr_macro'.
-        If provided, only calculates score where missing_mask is True (i.e., data were missing). This is so that users can simulate missing data for known values, and then the predictions for only those known values can be evaluated.
-        Args:
-            y_true (numpy.ndarray): 012-encoded true target values.
-            y_pred (tensorflow.EagerTensor): Predictions from model as probabilities.
-            kwargs (Any): Keyword arguments to use with scorer. Supported options include ``missing_mask`` and ``testing``\.
-        Returns:
-            float: Metric score by comparing y_true and y_pred.
+        Raises:
+            ValueError: If the input data is invalid.
+            ValueError: If an invalid tune_metric is provided.
         """
-        # Get missing mask if provided.
-        # Otherwise default is all missing values (array all True).
-        missing_mask = kwargs.get("missing_mask")
-        num_classes = kwargs.get("num_classes", 3)
-        y_pred = Scorers.check_if_tuple(y_pred)
-        y_true_masked = y_true[missing_mask]
-        y_pred_masked = y_pred[missing_mask]
-        pr_ap = Scorers.compute_pr(
-            y_true_masked, y_pred_masked, num_classes=num_classes
+        y_true = np.asarray(validate_input_type(y_true, return_type="array"))
+        y_pred = np.asarray(validate_input_type(y_pred, return_type="array"))
+        y_true_ohe = np.asarray(validate_input_type(y_true_ohe, return_type="array"))
+        y_pred_proba = np.asarray(
+            validate_input_type(y_pred_proba, return_type="array")
         )
-        return pr_ap["f1_score"]
+        if not y_true.ndim < 3:
+            msg = "y_true must have 1 or 2 dimensions."
+            self.logger.error(msg)
+            raise ValueError(msg)
+        if not y_pred.ndim < 3:
+            msg = "y_pred must have 1 or 2 dimensions."
+            self.logger.error(msg)
+            raise ValueError(msg)
+        if not y_true_ohe.ndim == 2:
+            msg = "y_true_ohe must have 2 dimensions."
+            self.logger.error(msg)
+            raise ValueError(msg)
+        if y_pred_proba.ndim != 2:
+            y_pred_proba = y_pred_proba.reshape(-1, y_true_ohe.shape[-1])
+            self.logger.debug(f"Reshaped y_pred_proba to {y_pred_proba.shape}")
+        if objective_mode:
+            if tune_metric == "pr_macro":
+                metrics = {"pr_macro": self.pr_macro(y_true_ohe, y_pred_proba)}
+            elif tune_metric == "roc_auc":
+                metrics = {"roc_auc": self.roc_auc(y_true, y_pred_proba)}
+            elif tune_metric == "average_precision":
+                metrics = {
+                    "average_precision": self.average_precision(y_true, y_pred_proba)
+                }
+            elif tune_metric == "accuracy":
+                metrics = {"accuracy": self.accuracy(y_true, y_pred)}
+            elif tune_metric == "f1":
+                metrics = {"f1": self.f1(y_true, y_pred)}
+            elif tune_metric == "precision":
+                metrics = {"precision": self.precision(y_true, y_pred)}
+            elif tune_metric == "recall":
+                metrics = {"recall": self.recall(y_true, y_pred)}
+            else:
+                msg = f"Invalid tune_metric provided: '{tune_metric}'."
+                self.logger.error(msg)
+                raise ValueError(msg)
+        else:
+            metrics = {
+                "accuracy": self.accuracy(y_true, y_pred),
+                "f1": self.f1(y_true, y_pred),
+                "precision": self.precision(y_true, y_pred),
+                "recall": self.recall(y_true, y_pred),
+                "roc_auc": self.roc_auc(y_true, y_pred_proba),
+                "average_precision": self.average_precision(y_true, y_pred_proba),
+                "pr_macro": self.pr_macro(y_true_ohe, y_pred_proba),
+            }
-    @staticmethod
-    def pr_micro(y_true, y_pred, **kwargs):
-        """Get Precision-Recall score with micro averaging for grid search.
+        return {k: float(v) for k, v in metrics.items()}
-        If provided, only calculates score where missing_mask is True (i.e., data were missing). This is so that users can simulate missing data for known values, and then the predictions for only those known values can be evaluated.
+    def average_precision(self, y_true: np.ndarray, y_pred_proba: np.ndarray) -> float:
+        """Average precision with safe multiclass handling.
-        Args:
-            y_true (numpy.ndarray): 012-encoded true target values.
+        If y_true is 1D of class indices, it is binarized against the number of columns in y_pred_proba. If y_true is already one-hot or indicator, it is used as-is.
-            y_pred (tensorflow.EagerTensor): Predictions from model as probabilities.
-            kwargs (Any): Keyword arguments to use with scorer. Supported options include ``missing_mask`` and ``testing``\.
+        Args:
+            y_true (np.ndarray): Ground truth labels (1D class indices or 2D one-hot/indicator).
+            y_pred_proba (np.ndarray): Predicted probabilities (2D array).
         Returns:
-            float: Metric score by comparing y_true and y_pred.
+            float: Average precision score.
         """
-        # Get missing mask if provided.
-        # Otherwise default is all missing values (array all True).
-        missing_mask = kwargs.get("missing_mask")
-        num_classes = kwargs.get("num_classes", 3)
-        testing = kwargs.get("testing", False)
+        y_true_arr = np.asarray(y_true)
+        y_proba_arr = np.asarray(y_pred_proba)
-        y_pred = Scorers.check_if_tuple(y_pred)
+        if y_proba_arr.ndim == 3:
+            y_proba_arr = y_proba_arr.reshape(-1, y_proba_arr.shape[-1])
-        nn = NeuralNetworkMethods()
-        y_true_masked = y_true[missing_mask]
-        y_pred_masked = y_pred[missing_mask]
-        pr_ap = Scorers.compute_pr(
-            y_true_masked, y_pred_masked, num_classes=num_classes
-        )
+        # If y_true already matches proba columns (one-hot / indicator)
+        if y_true_arr.ndim == 2 and y_true_arr.shape[1] == y_proba_arr.shape[1]:
+            y_bin = y_true_arr
+        else:
+            # Interpret y_true as class indices
+            n_classes = y_proba_arr.shape[1]
+            y_bin = label_binarize(y_true_arr.ravel(), classes=np.arange(n_classes))
-        return pr_ap["micro"]
+        return float(average_precision_score(y_bin, y_proba_arr, average=self.average))
-    @classmethod
-    def make_multimetric_scorer(
-        cls, metrics, missing_mask, num_classes=3, testing=False
-    ):
-        """Get all scoring metrics and make an sklearn scorer.
+    def pr_macro(self, y_true_ohe: np.ndarray, y_pred_proba: np.ndarray) -> float:
+        """Macro-averaged average precision (precision-recall AUC) across classes.
         Args:
-            metrics (str or List[str]): Metrics to use with grid search. If string, it will be converted to a list of one element.
-            missing_mask (numpy.ndarray): Missing mask to use to demarcate values to use with scoring.
+            y_true_ohe (np.ndarray): One-hot encoded ground truth labels (2D array).
+            y_pred_proba (np.ndarray): Predicted probabilities (2D array).
-            num_classes (int, optional): How many classes to use. Defaults to 3.
-            testing (bool, optional): True if in test mode, wherein it prints y_true and y_pred_decoded as 1D lists for comparison. Otherwise False. Defaults to False.
         Returns:
-            Dict[str, Callable]: Dictionary with callable scoring functions to use with grid search as the values.
-        Raises:
-            ValueError: Invalid scoring metric provided.
+            float: Macro-averaged average precision score.
         """
-        if isinstance(metrics, str):
-            metrics = [metrics]
-        scorers = dict()
-        for item in metrics:
-            if item.lower() == "accuracy":
-                scorers["accuracy"] = make_scorer(
-                    cls.accuracy_scorer,
-                    missing_mask=missing_mask,
-                    num_classes=num_classes,
-                    testing=testing,
-                )
-            elif item.lower() == "hamming":
-                scorers["hamming"] = make_scorer(
-                    cls.hamming_scorer,
-                    missing_mask=missing_mask,
-                    num_classes=num_classes,
-                    testing=testing,
-                    greater_is_better=False,
-                )
-            elif item.lower() == "auc_macro":
-                scorers["auc_macro"] = make_scorer(
-                    cls.auc_macro,
-                    missing_mask=missing_mask,
-                    num_classes=num_classes,
-                    testing=testing,
-                )
-            elif item.lower() == "auc_micro":
-                scorers["auc_micro"] = make_scorer(
-                    cls.auc_micro,
-                    missing_mask=missing_mask,
-                    num_classes=num_classes,
-                    testing=testing,
-                )
-            elif item.lower() == "precision_recall_macro":
-                scorers["precision_recall_macro"] = make_scorer(
-                    cls.pr_macro,
-                    missing_mask=missing_mask,
-                    num_classes=num_classes,
-                    testing=testing,
-                )
-            elif item.lower() == "precision_recall_micro":
-                scorers["precision_recall_micro"] = make_scorer(
-                    cls.pr_micro,
-                    missing_mask=missing_mask,
-                    num_classes=num_classes,
-                    testing=testing,
-                )
-            elif item.lower() == "precision_recall_samples":
-                scorers["precision_recall_samples"] = make_scorer(
-                    cls.pr_samples,
-                    missing_mask=missing_mask,
-                    num_classes=num_classes,
-                    testing=testing,
-                )
-            elif item.lower() == "f1_score":
-                scorers["f1_score"] = make_scorer(
-                    cls.f1_samples,
-                    missing_mask=missing_mask,
-                    num_classes=num_classes,
-                    testing=testing,
-                )
-            else:
-                raise ValueError(f"Invalid scoring_metric provided: {item}")
-        return scorers
-    @staticmethod
-    def scorer(y_true, y_pred, **kwargs):
-        # Get missing mask if provided.
-        # Otherwise default is all missing values (array all True).
-        missing_mask = kwargs.get("missing_mask")
-        nn_model = kwargs.get("nn_model", True)
-        num_classes = kwargs.get("num_classes", 3)
-        testing = kwargs.get("testing", False)
-        is_multiclass = True if num_classes != 4 else False
-        if nn_model:
-            nn = NeuralNetworkMethods()
-        # VAE has tuple output.
-        if isinstance(y_pred, tuple):
-            y_pred = y_pred[0]
-        y_true_masked = y_true[missing_mask]
-        y_pred_masked = y_pred[missing_mask]
-        roc_auc = Scorers.compute_roc_auc_micro_macro(
-            y_true_masked,
-            y_pred_masked,
-            num_classes=num_classes,
-            binarize_pred=False,
-        )
+        y_true_arr = np.asarray(y_true_ohe)
+        y_proba_arr = np.asarray(y_pred_proba)
-        pr_ap = Scorers.compute_pr(
-            y_true_masked,
-            y_pred_masked,
-            num_classes=num_classes,
-        )
+        if y_proba_arr.ndim == 3:
+            y_proba_arr = y_proba_arr.reshape(-1, y_proba_arr.shape[-1])
-        acc = accuracy_score(
-            y_true_masked,
-            nn.decode_masked(
-                y_true_masked,
-                y_pred_masked,
-                is_multiclass=is_multiclass,
-                return_int=True,
-            ),
-        )
-        ham = hamming_loss(
-            y_true_masked,
-            nn.decode_masked(
-                y_true_masked,
-                y_pred_masked,
-                is_multiclass=is_multiclass,
-                return_int=True,
-            ),
-        )
-        if testing:
-            y_pred_masked_decoded = nn.decode_masked(
-                y_true_masked,
-                y_pred_masked,
-                is_multiclass=is_multiclass,
-                return_int=True,
-            )
+        # Ensure 2D indicator truth
+        if y_true_arr.ndim == 1:
+            n_classes = y_proba_arr.shape[1]
+            y_true_arr = label_binarize(y_true_arr, classes=np.arange(n_classes))
-            bin_mapping = [np.array2string(x) for x in y_pred_masked]
-            with open("genotype_dist.csv", "w") as fout:
-                fout.write(
-                    "site,prob_vector,imputed_genotype,expected_genotype\n"
-                )
-                for i, (yt, yp, ypd) in enumerate(
-                    zip(y_true_masked, bin_mapping, y_pred_masked_decoded)
-                ):
-                    fout.write(f"{i},{yp},{ypd},{yt}\n")
-            # np.set_printoptions(threshold=np.inf)
-            # print(y_true_masked)
-            # print(y_pred_masked_decoded)
-        metrics = dict()
-        metrics["accuracy"] = acc
-        metrics["roc_auc"] = roc_auc
-        metrics["precision_recall"] = pr_ap
-        metrics["hamming"] = ham
-        return metrics
+        return float(average_precision_score(y_true_arr, y_proba_arr, average="macro"))

pg-sui 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl

pg-sui 0.2.0py3-none-any.whl → 1.6.14.dev9py3-none-any.whl