PyPI - pertpy - Versions diffs - 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

pertpy/__init__.py +4 -2
pertpy/data/__init__.py +66 -1
pertpy/data/_dataloader.py +28 -26
pertpy/data/_datasets.py +261 -92
pertpy/metadata/__init__.py +6 -0
pertpy/metadata/_cell_line.py +795 -0
pertpy/metadata/_compound.py +128 -0
pertpy/metadata/_drug.py +238 -0
pertpy/metadata/_look_up.py +569 -0
pertpy/metadata/_metadata.py +70 -0
pertpy/metadata/_moa.py +125 -0
pertpy/plot/__init__.py +0 -13
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +89 -6
pertpy/tools/__init__.py +48 -15
pertpy/tools/_augur.py +329 -32
pertpy/tools/_cinemaot.py +145 -6
pertpy/tools/_coda/_base_coda.py +1237 -116
pertpy/tools/_coda/_sccoda.py +66 -36
pertpy/tools/_coda/_tasccoda.py +46 -39
pertpy/tools/_dialogue.py +180 -77
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +29 -24
pertpy/tools/_distances/_distances.py +584 -98
pertpy/tools/_enrichment.py +460 -0
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +406 -49
pertpy/tools/_mixscape.py +677 -55
pertpy/tools/_perturbation_space/_clustering.py +10 -3
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
pertpy/tools/_perturbation_space/_simple.py +52 -11
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +706 -0
pertpy/tools/_scgen/_utils.py +3 -5
pertpy/tools/decoupler_LICENSE +674 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
pertpy-0.8.0.dist-info/RECORD +57 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -234
pertpy/plot/_cinemaot.py +0 -81
pertpy/plot/_coda.py +0 -1001
pertpy/plot/_dialogue.py +0 -91
pertpy/plot/_guide_rna.py +0 -82
pertpy/plot/_milopy.py +0 -284
pertpy/plot/_mixscape.py +0 -594
pertpy/plot/_scgen.py +0 -337
pertpy/tools/_differential_gene_expression.py +0 -99
pertpy/tools/_metadata/__init__.py +0 -0
pertpy/tools/_metadata/_cell_line.py +0 -613
pertpy/tools/_metadata/_look_up.py +0 -342
pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
pertpy/tools/_scgen/_jax_scgen.py +0 -370
pertpy-0.6.0.dist-info/RECORD +0 -50
/pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_augur.py CHANGED Viewed

@@ -4,14 +4,17 @@ import random
 from collections import defaultdict
 from dataclasses import dataclass
 from math import floor, nan
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Any, Literal
+import anndata as ad
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import scanpy as sc
 import statsmodels.api as sm
 from anndata import AnnData
 from joblib import Parallel, delayed
+from lamin_utils import logger
 from rich import print
 from rich.progress import track
 from scipy import sparse, stats
@@ -34,6 +37,10 @@ from sklearn.preprocessing import LabelEncoder
 from skmisc.loess import loess
 from statsmodels.stats.multitest import fdrcorrection
+if TYPE_CHECKING:
+    from matplotlib.axes import Axes
+    from matplotlib.figure import Figure
 @dataclass
 class Params:
@@ -121,7 +128,7 @@ class Augur:
                     _ = input[cell_type_col]
                     _ = input[label_col]
                 except KeyError:
-                    print("[bold red]No column names matching cell_type_col and label_col.")
+                    logger.error("No column names matching cell_type_col and label_col.")
             label = input[label_col] if meta is None else meta[label_col]
             cell_type = input[cell_type_col] if meta is None else meta[cell_type_col]
@@ -134,9 +141,9 @@ class Augur:
         if adata.obs["label"].dtype.name == "category":
             # filter samples according to label
             if condition_label is not None and treatment_label is not None:
-                print(f"Filtering samples with {condition_label} and {treatment_label} labels.")
-                adata = AnnData.concatenate(
-                    adata[adata.obs["label"] == condition_label], adata[adata.obs["label"] == treatment_label]
+                logger.info(f"Filtering samples with {condition_label} and {treatment_label} labels.")
+                adata = ad.concat(
+                    [adata[adata.obs["label"] == condition_label], adata[adata.obs["label"] == treatment_label]]
                 )
             label_encoder = LabelEncoder()
             adata.obs["y_"] = label_encoder.fit_transform(adata.obs["label"])
@@ -214,7 +221,9 @@ class Augur:
             >>> loaded_data = ag_rfc.load(adata)
             >>> ag_rfc.select_highly_variable(loaded_data)
             >>> features = loaded_data.var_names
-            >>> subsample = ag_rfc.sample(loaded_data, categorical=True, subsample_size=20, random_state=42, features=loaded_data.var_names)
+            >>> subsample = ag_rfc.sample(
+            ...     loaded_data, categorical=True, subsample_size=20, random_state=42, features=loaded_data.var_names
+            ... )
         """
         # export subsampling.
         random.seed(random_state)
@@ -230,7 +239,7 @@ class Augur:
                         random_state=random_state,
                     )
                 )
-            subsample = AnnData.concatenate(*label_subsamples, index_unique=None)
+            subsample = ad.concat([*label_subsamples], index_unique=None)
         else:
             subsample = sc.pp.subsample(adata[:, features], n_obs=subsample_size, copy=True, random_state=random_state)
@@ -409,8 +418,8 @@ class Augur:
         """
         if multiclass:
             return {
-                "augur_score": make_scorer(roc_auc_score, multi_class="ovo", needs_proba=True),
-                "auc": make_scorer(roc_auc_score, multi_class="ovo", needs_proba=True),
+                "augur_score": make_scorer(roc_auc_score, multi_class="ovo", response_method="predict_proba"),
+                "auc": make_scorer(roc_auc_score, multi_class="ovo", response_method="predict_proba"),
                 "accuracy": make_scorer(accuracy_score),
                 "precision": make_scorer(precision_score, average="macro", zero_division=zero_division),
                 "f1": make_scorer(f1_score, average="macro"),
@@ -418,8 +427,8 @@ class Augur:
             }
         return (
             {
-                "augur_score": make_scorer(roc_auc_score, needs_proba=True),
-                "auc": make_scorer(roc_auc_score, needs_proba=True),
+                "augur_score": make_scorer(roc_auc_score, response_method="predict_proba"),
+                "auc": make_scorer(roc_auc_score, response_method="predict_proba"),
                 "accuracy": make_scorer(accuracy_score),
                 "precision": make_scorer(precision_score, average="binary", zero_division=zero_division),
                 "f1": make_scorer(f1_score, average="binary"),
@@ -488,7 +497,7 @@ class Augur:
         # feature importances
         feature_importances = defaultdict(list)
         if isinstance(self.estimator, RandomForestClassifier) or isinstance(self.estimator, RandomForestRegressor):
-            for fold, estimator in list(zip(range(len(results["estimator"])), results["estimator"])):
+            for fold, estimator in list(zip(range(len(results["estimator"])), results["estimator"], strict=False)):
                 feature_importances["genes"].extend(x.columns.tolist())
                 feature_importances["feature_importances"].extend(estimator.feature_importances_.tolist())
                 feature_importances["subsample_idx"].extend(len(x.columns) * [subsample_idx])
@@ -497,7 +506,7 @@ class Augur:
         # standardized coefficients with Agresti method
         # cf. https://think-lab.github.io/d/205/#3
         if isinstance(self.estimator, LogisticRegression):
-            for fold, self.estimator in list(zip(range(len(results["estimator"])), results["estimator"])):
+            for fold, self.estimator in list(zip(range(len(results["estimator"])), results["estimator"], strict=False)):
                 feature_importances["genes"].extend(x.columns.tolist())
                 feature_importances["feature_importances"].extend(
                     (self.estimator.coef_ * self.estimator.coef_.std()).flatten().tolist()
@@ -548,7 +557,7 @@ class Augur:
             try:
                 sc.pp.highly_variable_genes(adata)
             except ValueError:
-                print("[bold yellow]Data not normalized. Normalizing now using scanpy log1p normalize.")
+                logger.warn("Data not normalized. Normalizing now using scanpy log1p normalize.")
                 sc.pp.log1p(adata)
                 sc.pp.highly_variable_genes(adata)
@@ -600,7 +609,7 @@ class Augur:
             var_quantile: The quantile below which features will be filtered, based on their residuals in a loess model.
             filter_negative_residuals: if `True`, filter residuals at a fixed threshold of zero, instead of `var_quantile`
             span: Smoothing factor, as a fraction of the number of points to take into account.
-                  Should be in the range (0, 1]. Defaults to 0.75
+                  Should be in the range (0, 1].
         Return:
             AnnData object with additional select_variance column in var.
@@ -692,13 +701,11 @@ class Augur:
             feature_perc: proportion of genes that are randomly selected as features for input to the classifier in each
                           subsample using the random gene filter
             var_quantile: The quantile below which features will be filtered, based on their residuals in a loess model.
-                          Defaults to 0.5.
             span: Smoothing factor, as a fraction of the number of points to take into account. Should be in the range (0, 1].
-                  Defaults to 0.75.
             filter_negative_residuals: if `True`, filter residuals at a fixed threshold of zero, instead of `var_quantile`
             n_threads: number of threads to use for parallelization
             select_variance_features: Whether to select genes based on the original Augur implementation (True)
-                                      or using scanpy's highly_variable_genes (False). Defaults to True.
+                                      or using scanpy's highly_variable_genes (False).
             key_added: Key to add results to in .uns
             augur_mode: One of 'default', 'velocity' or 'permute'. Setting augur_mode = "velocity" disables feature selection,
                         assuming feature selection has been performed by the RNA velocity procedure to produce the input matrix,
@@ -723,6 +730,7 @@ class Augur:
             >>> loaded_data = ag_rfc.load(adata)
             >>> h_adata, h_results = ag_rfc.predict(loaded_data, subsample_size=20, n_threads=4)
         """
+        adata = adata.copy()
         if augur_mode == "permute" and n_subsamples < 100:
             n_subsamples = 500
         if is_regressor(self.estimator) and len(adata.obs["y_"].unique()) <= 3:
@@ -742,8 +750,8 @@ class Augur:
             "full_results": defaultdict(list),
         }
         if select_variance_features:
-            print("[bold yellow]Set smaller span value in the case of a `segmentation fault` error.")
-            print("[bold yellow]Set larger span in case of svddc or other near singularities error.")
+            logger.warning("Set smaller span value in the case of a `segmentation fault` error.")
+            logger.warning("Set larger span in case of svddc or other near singularities error.")
         adata.obs["augur_score"] = nan
         for cell_type in track(adata.obs["cell_type"].unique(), description="Processing data..."):
             cell_type_subsample = adata[adata.obs["cell_type"] == cell_type].copy()
@@ -759,17 +767,18 @@ class Augur:
                     )
                 )
             if len(cell_type_subsample) < min_cells:
-                print(
-                    f"[bold red]Skipping {cell_type} cell type - {len(cell_type_subsample)} samples is less than min_cells {min_cells}."
+                logger.warning(
+                    f"Skipping {cell_type} cell type - {len(cell_type_subsample)} samples is less than min_cells {min_cells}."
                 )
             elif (
                 cell_type_subsample.obs.groupby(
                     ["cell_type", "label"],
+                    observed=True,
                 ).y_.count()
                 < subsample_size
             ).any():
-                print(
-                    f"[bold red]Skipping {cell_type} cell type - the number of samples for at least one class type is less than "
+                logger.warning(
+                    f"Skipping {cell_type} cell type - the number of samples for at least one class type is less than "
                     f"subsample size {subsample_size}."
                 )
             else:
@@ -804,14 +813,14 @@ class Augur:
                     * (len(results["feature_importances"]["genes"]) - len(results["feature_importances"]["cell_type"]))
                 )
-                for idx, cv in zip(range(n_subsamples), results[cell_type]):
+                for idx, cv in zip(range(n_subsamples), results[cell_type], strict=False):
                     results["full_results"]["idx"].extend([idx] * folds)
                     results["full_results"]["augur_score"].extend(cv["test_augur_score"])
                     results["full_results"]["folds"].extend(range(folds))
                 results["full_results"]["cell_type"].extend([cell_type] * folds * n_subsamples)
         # make sure one cell type worked
         if len(results) <= 2:
-            print("[bold red]No cells types had more than min_cells needed. Please adjust data or min_cells parameter.")
+            logger.warning("No cells types had more than min_cells needed. Please adjust data or min_cells parameter.")
         results["summary_metrics"] = pd.DataFrame(results["summary_metrics"])
         results["feature_importances"] = pd.DataFrame(results["feature_importances"])
@@ -840,7 +849,7 @@ class Augur:
             augur2: Augurpy results from condition 2, obtained from `predict()[1]`
             permuted1: permuted Augurpy results from condition 1, obtained from `predict()` with argument `augur_mode=permute`
             permuted2: permuted Augurpy results from condition 2, obtained from `predict()` with argument `augur_mode=permute`
-            n_subsamples: number of subsamples to pool when calculating the mean augur score for each permutation; Defaults to 50.
+            n_subsamples: number of subsamples to pool when calculating the mean augur score for each permutation.
             n_permutations: the total number of mean augur scores to calculate from a background distribution
         Returns:
@@ -869,28 +878,31 @@ class Augur:
             & set(permuted_results1["summary_metrics"].columns)
             & set(permuted_results2["summary_metrics"].columns)
         )
+        cell_types_list = list(cell_types)
         # mean augur scores
         augur_score1 = (
             augur_results1["summary_metrics"]
-            .loc["mean_augur_score", cell_types]
+            .loc["mean_augur_score", cell_types_list]
             .reset_index()
             .rename(columns={"index": "cell_type"})
         )
         augur_score2 = (
             augur_results2["summary_metrics"]
-            .loc["mean_augur_score", cell_types]
+            .loc["mean_augur_score", cell_types_list]
             .reset_index()
             .rename(columns={"index": "cell_type"})
         )
         # mean permuted scores over cross validation runs
         permuted_cv_augur1 = (
-            permuted_results1["full_results"][permuted_results1["full_results"]["cell_type"].isin(cell_types)]
+            permuted_results1["full_results"][permuted_results1["full_results"]["cell_type"].isin(cell_types_list)]
             .groupby(["cell_type", "idx"], as_index=False)
             .mean()
         )
         permuted_cv_augur2 = (
-            permuted_results2["full_results"][permuted_results2["full_results"]["cell_type"].isin(cell_types)]
+            permuted_results2["full_results"][permuted_results2["full_results"]["cell_type"].isin(cell_types_list)]
             .groupby(["cell_type", "idx"], as_index=False)
             .mean()
         )
@@ -901,7 +913,7 @@ class Augur:
         # draw mean aucs for permute1 and permute2
         for celltype in permuted_cv_augur1["cell_type"].unique():
             df1 = permuted_cv_augur1[permuted_cv_augur1["cell_type"] == celltype]
-            df2 = permuted_cv_augur2[permuted_cv_augur1["cell_type"] == celltype]
+            df2 = permuted_cv_augur2[permuted_cv_augur2["cell_type"] == celltype]
             for permutation_idx in range(n_permutations):
                 # subsample
                 sample1 = df1.sample(n=n_subsamples, random_state=permutation_idx, axis="index")
@@ -961,3 +973,288 @@ class Augur:
         delta["padj"] = fdrcorrection(delta["pval"])[1]
         return delta
+    def plot_dp_scatter(
+        self,
+        results: pd.DataFrame,
+        top_n: int = None,
+        return_fig: bool | None = None,
+        ax: Axes = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> Axes | Figure | None:
+        """Plot scatterplot of differential prioritization.
+        Args:
+            results: Results after running differential prioritization.
+            top_n: optionally, the number of top prioritized cell types to label in the plot
+            ax: optionally, axes used to draw plot
+        Returns:
+            Axes of the plot.
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.bhattacherjee()
+            >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
+            >>> data_15 = ag_rfc.load(adata, condition_label="Maintenance_Cocaine", treatment_label="withdraw_15d_Cocaine")
+            >>> adata_15, results_15 = ag_rfc.predict(data_15, random_state=None, n_threads=4)
+            >>> adata_15_permute, results_15_permute = ag_rfc.predict(data_15, augur_mode="permute", n_subsamples=100, random_state=None, n_threads=4)
+            >>> data_48 = ag_rfc.load(adata, condition_label="Maintenance_Cocaine", treatment_label="withdraw_48h_Cocaine")
+            >>> adata_48, results_48 = ag_rfc.predict(data_48, random_state=None, n_threads=4)
+            >>> adata_48_permute, results_48_permute = ag_rfc.predict(data_48, augur_mode="permute", n_subsamples=100, random_state=None, n_threads=4)
+            >>> pvals = ag_rfc.predict_differential_prioritization(augur_results1=results_15, augur_results2=results_48, \
+                permuted_results1=results_15_permute, permuted_results2=results_48_permute)
+            >>> ag_rfc.plot_dp_scatter(pvals)
+        Preview:
+            .. image:: /_static/docstring_previews/augur_dp_scatter.png
+        """
+        x = results["mean_augur_score1"]
+        y = results["mean_augur_score2"]
+        if ax is None:
+            fig, ax = plt.subplots()
+        scatter = ax.scatter(x, y, c=results.z, cmap="Greens")
+        # adding optional labels
+        top_n_index = results.sort_values(by="pval").index[:top_n]
+        for idx in top_n_index:
+            ax.annotate(
+                results.loc[idx, "cell_type"],
+                (results.loc[idx, "mean_augur_score1"], results.loc[idx, "mean_augur_score2"]),
+            )
+        # add diagonal
+        limits = max(ax.get_xlim(), ax.get_ylim())
+        (_,) = ax.plot(limits, limits, ls="--", c=".3")
+        # formatting and details
+        plt.xlabel("Augur scores 1")
+        plt.ylabel("Augur scores 2")
+        legend1 = ax.legend(*scatter.legend_elements(), loc="center left", title="z-scores", bbox_to_anchor=(1, 0.5))
+        ax.add_artist(legend1)
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None
+    def plot_important_features(
+        self,
+        data: dict[str, Any],
+        key: str = "augurpy_results",
+        top_n: int = 10,
+        return_fig: bool | None = None,
+        ax: Axes = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> Axes | None:
+        """Plot a lollipop plot of the n features with largest feature importances.
+        Args:
+            results: results after running `predict()` as dictionary or the AnnData object.
+            key: Key in the AnnData object of the results
+            top_n: n number feature importance values to plot. Default is 10.
+            ax: optionally, axes used to draw plot
+            return_figure: if `True` returns figure of the plot, default is `False`
+        Returns:
+            Axes of the plot.
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.sc_sim_augur()
+            >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
+            >>> loaded_data = ag_rfc.load(adata)
+            >>> v_adata, v_results = ag_rfc.predict(
+            ...     loaded_data, subsample_size=20, select_variance_features=True, n_threads=4
+            ... )
+            >>> ag_rfc.plot_important_features(v_results)
+        Preview:
+            .. image:: /_static/docstring_previews/augur_important_features.png
+        """
+        if isinstance(data, AnnData):
+            results = data.uns[key]
+        else:
+            results = data
+        n_features = (
+            results["feature_importances"]
+            .groupby("genes", as_index=False)
+            .feature_importances.mean()
+            .sort_values(by="feature_importances")[-top_n:]
+        )
+        if ax is None:
+            fig, ax = plt.subplots()
+        y_axes_range = range(1, top_n + 1)
+        ax.hlines(
+            y_axes_range,
+            xmin=0,
+            xmax=n_features["feature_importances"],
+        )
+        ax.plot(n_features["feature_importances"], y_axes_range, "o")
+        plt.xlabel("Mean Feature Importance")
+        plt.ylabel("Gene")
+        plt.yticks(y_axes_range, n_features["genes"])
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None
+    def plot_lollipop(
+        self,
+        data: dict[str, Any],
+        key: str = "augurpy_results",
+        return_fig: bool | None = None,
+        ax: Axes = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> Axes | Figure | None:
+        """Plot a lollipop plot of the mean augur values.
+        Args:
+            results: results after running `predict()` as dictionary or the AnnData object.
+            key: Key in the AnnData object of the results
+            ax: optionally, axes used to draw plot
+            return_figure: if `True` returns figure of the plot
+        Returns:
+            Axes of the plot.
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.sc_sim_augur()
+            >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
+            >>> loaded_data = ag_rfc.load(adata)
+            >>> v_adata, v_results = ag_rfc.predict(
+            ...     loaded_data, subsample_size=20, select_variance_features=True, n_threads=4
+            ... )
+            >>> ag_rfc.plot_lollipop(v_results)
+        Preview:
+            .. image:: /_static/docstring_previews/augur_lollipop.png
+        """
+        if isinstance(data, AnnData):
+            results = data.uns[key]
+        else:
+            results = data
+        if ax is None:
+            fig, ax = plt.subplots()
+        y_axes_range = range(1, len(results["summary_metrics"].columns) + 1)
+        ax.hlines(
+            y_axes_range,
+            xmin=0,
+            xmax=results["summary_metrics"].sort_values("mean_augur_score", axis=1).loc["mean_augur_score"],
+        )
+        ax.plot(
+            results["summary_metrics"].sort_values("mean_augur_score", axis=1).loc["mean_augur_score"],
+            y_axes_range,
+            "o",
+        )
+        plt.xlabel("Mean Augur Score")
+        plt.ylabel("Cell Type")
+        plt.yticks(y_axes_range, results["summary_metrics"].sort_values("mean_augur_score", axis=1).columns)
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None
+    def plot_scatterplot(
+        self,
+        results1: dict[str, Any],
+        results2: dict[str, Any],
+        top_n: int = None,
+        return_fig: bool | None = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> Axes | Figure | None:
+        """Create scatterplot with two augur results.
+        Args:
+            results1: results after running `predict()`
+            results2: results after running `predict()`
+            top_n: optionally, the number of top prioritized cell types to label in the plot
+            return_figure: if `True` returns figure of the plot
+        Returns:
+            Axes of the plot.
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.sc_sim_augur()
+            >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
+            >>> loaded_data = ag_rfc.load(adata)
+            >>> h_adata, h_results = ag_rfc.predict(loaded_data, subsample_size=20, n_threads=4)
+            >>> v_adata, v_results = ag_rfc.predict(
+            ...     loaded_data, subsample_size=20, select_variance_features=True, n_threads=4
+            ... )
+            >>> ag_rfc.plot_scatterplot(v_results, h_results)
+        Preview:
+            .. image:: /_static/docstring_previews/augur_scatterplot.png
+        """
+        cell_types = results1["summary_metrics"].columns
+        fig, ax = plt.subplots()
+        ax.scatter(
+            results1["summary_metrics"].loc["mean_augur_score", cell_types],
+            results2["summary_metrics"].loc["mean_augur_score", cell_types],
+        )
+        # adding optional labels
+        top_n_cell_types = (
+            (results1["summary_metrics"].loc["mean_augur_score"] - results2["summary_metrics"].loc["mean_augur_score"])
+            .sort_values(ascending=False)
+            .index[:top_n]
+        )
+        for txt in top_n_cell_types:
+            ax.annotate(
+                txt,
+                (
+                    results1["summary_metrics"].loc["mean_augur_score", txt],
+                    results2["summary_metrics"].loc["mean_augur_score", txt],
+                ),
+            )
+        # adding diagonal
+        limits = max(ax.get_xlim(), ax.get_ylim())
+        (diag_line,) = ax.plot(limits, limits, ls="--", c=".3")
+        plt.xlabel("Augur scores 1")
+        plt.ylabel("Augur scores 2")
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None

pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl