PyPI - pertpy - Versions diffs - 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

pertpy 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

pertpy/__init__.py +3 -2
pertpy/data/__init__.py +5 -1
pertpy/data/_dataloader.py +2 -4
pertpy/data/_datasets.py +203 -92
pertpy/metadata/__init__.py +4 -0
pertpy/metadata/_cell_line.py +826 -0
pertpy/metadata/_compound.py +129 -0
pertpy/metadata/_drug.py +242 -0
pertpy/metadata/_look_up.py +582 -0
pertpy/metadata/_metadata.py +73 -0
pertpy/metadata/_moa.py +129 -0
pertpy/plot/__init__.py +1 -9
pertpy/plot/_augur.py +53 -116
pertpy/plot/_coda.py +277 -677
pertpy/plot/_guide_rna.py +17 -35
pertpy/plot/_milopy.py +59 -134
pertpy/plot/_mixscape.py +152 -391
pertpy/preprocessing/_guide_rna.py +88 -4
pertpy/tools/__init__.py +8 -13
pertpy/tools/_augur.py +315 -17
pertpy/tools/_cinemaot.py +143 -4
pertpy/tools/_coda/_base_coda.py +1210 -65
pertpy/tools/_coda/_sccoda.py +50 -21
pertpy/tools/_coda/_tasccoda.py +27 -19
pertpy/tools/_dialogue.py +164 -56
pertpy/tools/_differential_gene_expression.py +240 -14
pertpy/tools/_distances/_distance_tests.py +8 -8
pertpy/tools/_distances/_distances.py +184 -34
pertpy/tools/_enrichment.py +465 -0
pertpy/tools/_milo.py +345 -11
pertpy/tools/_mixscape.py +668 -50
pertpy/tools/_perturbation_space/_clustering.py +5 -1
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +526 -0
pertpy/tools/_perturbation_space/_perturbation_space.py +135 -43
pertpy/tools/_perturbation_space/_simple.py +51 -10
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_scgen.py +701 -0
pertpy/tools/_scgen/_utils.py +1 -3
pertpy/tools/decoupler_LICENSE +674 -0
{pertpy-0.6.0.dist-info → pertpy-0.7.0.dist-info}/METADATA +31 -12
pertpy-0.7.0.dist-info/RECORD +53 -0
{pertpy-0.6.0.dist-info → pertpy-0.7.0.dist-info}/WHEEL +1 -1
pertpy/plot/_cinemaot.py +0 -81
pertpy/plot/_dialogue.py +0 -91
pertpy/plot/_scgen.py +0 -337
pertpy/tools/_metadata/__init__.py +0 -0
pertpy/tools/_metadata/_cell_line.py +0 -613
pertpy/tools/_metadata/_look_up.py +0 -342
pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
pertpy/tools/_scgen/_jax_scgen.py +0 -370
pertpy-0.6.0.dist-info/RECORD +0 -50
/pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
{pertpy-0.6.0.dist-info → pertpy-0.7.0.dist-info}/licenses/LICENSE +0 -0

pertpy/preprocessing/_guide_rna.py CHANGED Viewed

@@ -1,12 +1,16 @@
 from __future__ import annotations
+import uuid
 from typing import TYPE_CHECKING
 import numpy as np
+import pandas as pd
+import scanpy as sc
 import scipy
 if TYPE_CHECKING:
     from anndata import AnnData
+    from matplotlib.axes import Axes
 class GuideAssignment:
@@ -39,7 +43,7 @@ class GuideAssignment:
             >>> import pertpy as pt
             >>> mdata = pt.data.papalexi_2021()
-            >>> gdo = mdata.mod['gdo']
+            >>> gdo = mdata.mod["gdo"]
             >>> ga = pt.pp.GuideAssignment()
             >>> ga.assign_by_threshold(gdo, assignment_threshold=5)
         """
@@ -71,7 +75,6 @@ class GuideAssignment:
         Args:
             adata: Annotated data matrix containing gRNA values
-                   assignment_threshold: If a gRNA is available for at least `assignment_threshold`, it will be recognized as assigned.
             assignment_threshold: The count threshold that is required for an assignment to be viable.
             layer: Key to the layer containing raw count values of the gRNAs.
                    adata.X is used if layer is None. Expects count data.
@@ -83,8 +86,8 @@ class GuideAssignment:
             Each cell is assigned to the most expressed gRNA if it has at least 5 counts.
             >>> import pertpy as pt
-            >>> mdata = pt.data.papalexi_2021()
-            >>> gdo = mdata.mod['gdo']
+            >>> mdata = pt.dt.papalexi_2021()
+            >>> gdo = mdata.mod["gdo"]
             >>> ga = pt.pp.GuideAssignment()
             >>> ga.assign_to_max_guide(gdo, assignment_threshold=5)
         """
@@ -103,3 +106,84 @@ class GuideAssignment:
         adata.obs[output_key] = assigned_grna
         return None
+    def plot_heatmap(
+        self,
+        adata: AnnData,
+        layer: str | None = None,
+        order_by: np.ndarray | str | None = None,
+        key_to_save_order: str = None,
+        **kwargs,
+    ) -> list[Axes]:
+        """Heatmap plotting of guide RNA expression matrix.
+        Assuming guides have sparse expression, this function reorders cells
+        and plots guide RNA expression so that a nice sparse representation is achieved.
+        The cell ordering can be stored and reused in future plots to obtain consistent
+        plots before and after analysis of the guide RNA expression.
+        Note: This function expects a log-normalized or binary data.
+        Args:
+            adata: Annotated data matrix containing gRNA values
+            layer: Key to the layer containing log normalized count values of the gRNAs.
+                   adata.X is used if layer is None.
+            order_by: The order of cells in y axis. Defaults to None.
+                      If None, cells will be reordered to have a nice sparse representation.
+                      If a string is provided, adata.obs[order_by] will be used as the order.
+                      If a numpy array is provided, the array will be used for ordering.
+            key_to_save_order: The obs key to save cell orders in the current plot. Only saves if not None.
+            kwargs: Are passed to sc.pl.heatmap.
+        Returns:
+            List of Axes. Alternatively you can pass save or show parameters as they will be passed to sc.pl.heatmap.
+            Order of cells in the y-axis will be saved on adata.obs[key_to_save_order] if provided.
+        Examples:
+            Each cell is assigned to gRNA that occurs at least 5 times in the respective cell, which is then
+            visualized using a heatmap.
+            >>> import pertpy as pt
+            >>> mdata = pt.dt.papalexi_2021()
+            >>> gdo = mdata.mod["gdo"]
+            >>> ga = pt.pp.GuideAssignment()
+            >>> ga.assign_by_threshold(gdo, assignment_threshold=5)
+            >>> ga.plot_heatmap(gdo)
+        """
+        data = adata.X if layer is None else adata.layers[layer]
+        if order_by is None:
+            if scipy.sparse.issparse(data):
+                max_values = data.max(axis=1).A.squeeze()
+                data_argmax = data.argmax(axis=1).A.squeeze()
+                max_guide_index = np.where(max_values != data.min(axis=1).A.squeeze(), data_argmax, -1)
+            else:
+                max_guide_index = np.where(
+                    data.max(axis=1).squeeze() != data.min(axis=1).squeeze(), data.argmax(axis=1).squeeze(), -1
+                )
+            order = np.argsort(max_guide_index)
+        elif isinstance(order_by, str):
+            order = np.argsort(adata.obs[order_by])
+        else:
+            order = order_by
+        temp_col_name = f"_tmp_pertpy_grna_plot_{uuid.uuid4()}"
+        adata.obs[temp_col_name] = pd.Categorical(["" for _ in range(adata.shape[0])])
+        if key_to_save_order is not None:
+            adata.obs[key_to_save_order] = pd.Categorical(order)
+        try:
+            axis_group = sc.pl.heatmap(
+                adata[order, :],
+                var_names=adata.var.index.tolist(),
+                groupby=temp_col_name,
+                cmap="viridis",
+                use_raw=False,
+                dendrogram=False,
+                layer=layer,
+                **kwargs,
+            )
+        finally:
+            del adata.obs[temp_col_name]
+        return axis_group

pertpy/tools/__init__.py CHANGED Viewed

@@ -1,24 +1,19 @@
-from rich import print
 from pertpy.tools._augur import Augur
 from pertpy.tools._cinemaot import Cinemaot
+from pertpy.tools._coda._sccoda import Sccoda
+from pertpy.tools._coda._tasccoda import Tasccoda
 from pertpy.tools._dialogue import Dialogue
 from pertpy.tools._differential_gene_expression import DifferentialGeneExpression
 from pertpy.tools._distances._distance_tests import DistanceTest
 from pertpy.tools._distances._distances import Distance
-from pertpy.tools._metadata._cell_line import CellLineMetaData
+from pertpy.tools._enrichment import Enrichment
 from pertpy.tools._milo import Milo
 from pertpy.tools._mixscape import Mixscape
 from pertpy.tools._perturbation_space._clustering import ClusteringSpace
-from pertpy.tools._perturbation_space._discriminator_classifier import DiscriminatorClassifierSpace
+from pertpy.tools._perturbation_space._discriminator_classifiers import (
+    DiscriminatorClassifierSpace,
+    LRClassifierSpace,
+    MLPClassifierSpace,
+)
 from pertpy.tools._perturbation_space._simple import CentroidSpace, DBSCANSpace, KMeansSpace, PseudobulkSpace
 from pertpy.tools._scgen import SCGEN
-try:
-    from pertpy.tools._coda._sccoda import Sccoda
-    from pertpy.tools._coda._tasccoda import Tasccoda
-except ImportError as e:
-    if "ete3" in str(e):
-        print("[bold yellow]To use sccoda or tasccoda please install ete3 with [green]pip install ete3")
-    else:
-        raise e

pertpy/tools/_augur.py CHANGED Viewed

@@ -4,8 +4,10 @@ import random
 from collections import defaultdict
 from dataclasses import dataclass
 from math import floor, nan
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Any, Literal
+import anndata as ad
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import scanpy as sc
@@ -34,6 +36,10 @@ from sklearn.preprocessing import LabelEncoder
 from skmisc.loess import loess
 from statsmodels.stats.multitest import fdrcorrection
+if TYPE_CHECKING:
+    from matplotlib.axes import Axes
+    from matplotlib.figure import Figure
 @dataclass
 class Params:
@@ -135,8 +141,8 @@ class Augur:
             # filter samples according to label
             if condition_label is not None and treatment_label is not None:
                 print(f"Filtering samples with {condition_label} and {treatment_label} labels.")
-                adata = AnnData.concatenate(
-                    adata[adata.obs["label"] == condition_label], adata[adata.obs["label"] == treatment_label]
+                adata = ad.concat(
+                    [adata[adata.obs["label"] == condition_label], adata[adata.obs["label"] == treatment_label]]
                 )
             label_encoder = LabelEncoder()
             adata.obs["y_"] = label_encoder.fit_transform(adata.obs["label"])
@@ -214,7 +220,9 @@ class Augur:
             >>> loaded_data = ag_rfc.load(adata)
             >>> ag_rfc.select_highly_variable(loaded_data)
             >>> features = loaded_data.var_names
-            >>> subsample = ag_rfc.sample(loaded_data, categorical=True, subsample_size=20, random_state=42, features=loaded_data.var_names)
+            >>> subsample = ag_rfc.sample(
+            ...     loaded_data, categorical=True, subsample_size=20, random_state=42, features=loaded_data.var_names
+            ... )
         """
         # export subsampling.
         random.seed(random_state)
@@ -230,7 +238,7 @@ class Augur:
                         random_state=random_state,
                     )
                 )
-            subsample = AnnData.concatenate(*label_subsamples, index_unique=None)
+            subsample = ad.concat([*label_subsamples], index_unique=None)
         else:
             subsample = sc.pp.subsample(adata[:, features], n_obs=subsample_size, copy=True, random_state=random_state)
@@ -409,8 +417,8 @@ class Augur:
         """
         if multiclass:
             return {
-                "augur_score": make_scorer(roc_auc_score, multi_class="ovo", needs_proba=True),
-                "auc": make_scorer(roc_auc_score, multi_class="ovo", needs_proba=True),
+                "augur_score": make_scorer(roc_auc_score, multi_class="ovo", response_method="predict_proba"),
+                "auc": make_scorer(roc_auc_score, multi_class="ovo", response_method="predict_proba"),
                 "accuracy": make_scorer(accuracy_score),
                 "precision": make_scorer(precision_score, average="macro", zero_division=zero_division),
                 "f1": make_scorer(f1_score, average="macro"),
@@ -418,8 +426,8 @@ class Augur:
             }
         return (
             {
-                "augur_score": make_scorer(roc_auc_score, needs_proba=True),
-                "auc": make_scorer(roc_auc_score, needs_proba=True),
+                "augur_score": make_scorer(roc_auc_score, response_method="predict_proba"),
+                "auc": make_scorer(roc_auc_score, response_method="predict_proba"),
                 "accuracy": make_scorer(accuracy_score),
                 "precision": make_scorer(precision_score, average="binary", zero_division=zero_division),
                 "f1": make_scorer(f1_score, average="binary"),
@@ -488,7 +496,7 @@ class Augur:
         # feature importances
         feature_importances = defaultdict(list)
         if isinstance(self.estimator, RandomForestClassifier) or isinstance(self.estimator, RandomForestRegressor):
-            for fold, estimator in list(zip(range(len(results["estimator"])), results["estimator"])):
+            for fold, estimator in list(zip(range(len(results["estimator"])), results["estimator"], strict=False)):
                 feature_importances["genes"].extend(x.columns.tolist())
                 feature_importances["feature_importances"].extend(estimator.feature_importances_.tolist())
                 feature_importances["subsample_idx"].extend(len(x.columns) * [subsample_idx])
@@ -497,7 +505,7 @@ class Augur:
         # standardized coefficients with Agresti method
         # cf. https://think-lab.github.io/d/205/#3
         if isinstance(self.estimator, LogisticRegression):
-            for fold, self.estimator in list(zip(range(len(results["estimator"])), results["estimator"])):
+            for fold, self.estimator in list(zip(range(len(results["estimator"])), results["estimator"], strict=False)):
                 feature_importances["genes"].extend(x.columns.tolist())
                 feature_importances["feature_importances"].extend(
                     (self.estimator.coef_ * self.estimator.coef_.std()).flatten().tolist()
@@ -723,6 +731,7 @@ class Augur:
             >>> loaded_data = ag_rfc.load(adata)
             >>> h_adata, h_results = ag_rfc.predict(loaded_data, subsample_size=20, n_threads=4)
         """
+        adata = adata.copy()
         if augur_mode == "permute" and n_subsamples < 100:
             n_subsamples = 500
         if is_regressor(self.estimator) and len(adata.obs["y_"].unique()) <= 3:
@@ -765,6 +774,7 @@ class Augur:
             elif (
                 cell_type_subsample.obs.groupby(
                     ["cell_type", "label"],
+                    observed=True,
                 ).y_.count()
                 < subsample_size
             ).any():
@@ -804,7 +814,7 @@ class Augur:
                     * (len(results["feature_importances"]["genes"]) - len(results["feature_importances"]["cell_type"]))
                 )
-                for idx, cv in zip(range(n_subsamples), results[cell_type]):
+                for idx, cv in zip(range(n_subsamples), results[cell_type], strict=False):
                     results["full_results"]["idx"].extend([idx] * folds)
                     results["full_results"]["augur_score"].extend(cv["test_augur_score"])
                     results["full_results"]["folds"].extend(range(folds))
@@ -869,28 +879,31 @@ class Augur:
             & set(permuted_results1["summary_metrics"].columns)
             & set(permuted_results2["summary_metrics"].columns)
         )
+        cell_types_list = list(cell_types)
         # mean augur scores
         augur_score1 = (
             augur_results1["summary_metrics"]
-            .loc["mean_augur_score", cell_types]
+            .loc["mean_augur_score", cell_types_list]
             .reset_index()
             .rename(columns={"index": "cell_type"})
         )
         augur_score2 = (
             augur_results2["summary_metrics"]
-            .loc["mean_augur_score", cell_types]
+            .loc["mean_augur_score", cell_types_list]
             .reset_index()
             .rename(columns={"index": "cell_type"})
         )
         # mean permuted scores over cross validation runs
         permuted_cv_augur1 = (
-            permuted_results1["full_results"][permuted_results1["full_results"]["cell_type"].isin(cell_types)]
+            permuted_results1["full_results"][permuted_results1["full_results"]["cell_type"].isin(cell_types_list)]
             .groupby(["cell_type", "idx"], as_index=False)
             .mean()
         )
         permuted_cv_augur2 = (
-            permuted_results2["full_results"][permuted_results2["full_results"]["cell_type"].isin(cell_types)]
+            permuted_results2["full_results"][permuted_results2["full_results"]["cell_type"].isin(cell_types_list)]
             .groupby(["cell_type", "idx"], as_index=False)
             .mean()
         )
@@ -901,7 +914,7 @@ class Augur:
         # draw mean aucs for permute1 and permute2
         for celltype in permuted_cv_augur1["cell_type"].unique():
             df1 = permuted_cv_augur1[permuted_cv_augur1["cell_type"] == celltype]
-            df2 = permuted_cv_augur2[permuted_cv_augur1["cell_type"] == celltype]
+            df2 = permuted_cv_augur2[permuted_cv_augur2["cell_type"] == celltype]
             for permutation_idx in range(n_permutations):
                 # subsample
                 sample1 = df1.sample(n=n_subsamples, random_state=permutation_idx, axis="index")
@@ -961,3 +974,288 @@ class Augur:
         delta["padj"] = fdrcorrection(delta["pval"])[1]
         return delta
+    def plot_dp_scatter(
+        self,
+        results: pd.DataFrame,
+        top_n: int = None,
+        return_fig: bool | None = None,
+        ax: Axes = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> Axes | Figure | None:
+        """Plot scatterplot of differential prioritization.
+        Args:
+            results: Results after running differential prioritization.
+            top_n: optionally, the number of top prioritized cell types to label in the plot
+            ax: optionally, axes used to draw plot
+        Returns:
+            Axes of the plot.
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.bhattacherjee()
+            >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
+            >>> data_15 = ag_rfc.load(adata, condition_label="Maintenance_Cocaine", treatment_label="withdraw_15d_Cocaine")
+            >>> adata_15, results_15 = ag_rfc.predict(data_15, random_state=None, n_threads=4)
+            >>> adata_15_permute, results_15_permute = ag_rfc.predict(data_15, augur_mode="permute", n_subsamples=100, random_state=None, n_threads=4)
+            >>> data_48 = ag_rfc.load(adata, condition_label="Maintenance_Cocaine", treatment_label="withdraw_48h_Cocaine")
+            >>> adata_48, results_48 = ag_rfc.predict(data_48, random_state=None, n_threads=4)
+            >>> adata_48_permute, results_48_permute = ag_rfc.predict(data_48, augur_mode="permute", n_subsamples=100, random_state=None, n_threads=4)
+            >>> pvals = ag_rfc.predict_differential_prioritization(augur_results1=results_15, augur_results2=results_48, \
+                permuted_results1=results_15_permute, permuted_results2=results_48_permute)
+            >>> ag_rfc.plot_dp_scatter(pvals)
+        Preview:
+            .. image:: /_static/docstring_previews/augur_dp_scatter.png
+        """
+        x = results["mean_augur_score1"]
+        y = results["mean_augur_score2"]
+        if ax is None:
+            fig, ax = plt.subplots()
+        scatter = ax.scatter(x, y, c=results.z, cmap="Greens")
+        # adding optional labels
+        top_n_index = results.sort_values(by="pval").index[:top_n]
+        for idx in top_n_index:
+            ax.annotate(
+                results.loc[idx, "cell_type"],
+                (results.loc[idx, "mean_augur_score1"], results.loc[idx, "mean_augur_score2"]),
+            )
+        # add diagonal
+        limits = max(ax.get_xlim(), ax.get_ylim())
+        (_,) = ax.plot(limits, limits, ls="--", c=".3")
+        # formatting and details
+        plt.xlabel("Augur scores 1")
+        plt.ylabel("Augur scores 2")
+        legend1 = ax.legend(*scatter.legend_elements(), loc="center left", title="z-scores", bbox_to_anchor=(1, 0.5))
+        ax.add_artist(legend1)
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None
+    def plot_important_features(
+        self,
+        data: dict[str, Any],
+        key: str = "augurpy_results",
+        top_n: int = 10,
+        return_fig: bool | None = None,
+        ax: Axes = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> Axes | None:
+        """Plot a lollipop plot of the n features with largest feature importances.
+        Args:
+            results: results after running `predict()` as dictionary or the AnnData object.
+            key: Key in the AnnData object of the results
+            top_n: n number feature importance values to plot. Default is 10.
+            ax: optionally, axes used to draw plot
+            return_figure: if `True` returns figure of the plot, default is `False`
+        Returns:
+            Axes of the plot.
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.sc_sim_augur()
+            >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
+            >>> loaded_data = ag_rfc.load(adata)
+            >>> v_adata, v_results = ag_rfc.predict(
+            ...     loaded_data, subsample_size=20, select_variance_features=True, n_threads=4
+            ... )
+            >>> ag_rfc.plot_important_features(v_results)
+        Preview:
+            .. image:: /_static/docstring_previews/augur_important_features.png
+        """
+        if isinstance(data, AnnData):
+            results = data.uns[key]
+        else:
+            results = data
+        n_features = (
+            results["feature_importances"]
+            .groupby("genes", as_index=False)
+            .feature_importances.mean()
+            .sort_values(by="feature_importances")[-top_n:]
+        )
+        if ax is None:
+            fig, ax = plt.subplots()
+        y_axes_range = range(1, top_n + 1)
+        ax.hlines(
+            y_axes_range,
+            xmin=0,
+            xmax=n_features["feature_importances"],
+        )
+        ax.plot(n_features["feature_importances"], y_axes_range, "o")
+        plt.xlabel("Mean Feature Importance")
+        plt.ylabel("Gene")
+        plt.yticks(y_axes_range, n_features["genes"])
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None
+    def plot_lollipop(
+        self,
+        data: dict[str, Any],
+        key: str = "augurpy_results",
+        return_fig: bool | None = None,
+        ax: Axes = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> Axes | Figure | None:
+        """Plot a lollipop plot of the mean augur values.
+        Args:
+            results: results after running `predict()` as dictionary or the AnnData object.
+            key: Key in the AnnData object of the results
+            ax: optionally, axes used to draw plot
+            return_figure: if `True` returns figure of the plot
+        Returns:
+            Axes of the plot.
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.sc_sim_augur()
+            >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
+            >>> loaded_data = ag_rfc.load(adata)
+            >>> v_adata, v_results = ag_rfc.predict(
+            ...     loaded_data, subsample_size=20, select_variance_features=True, n_threads=4
+            ... )
+            >>> ag_rfc.plot_lollipop(v_results)
+        Preview:
+            .. image:: /_static/docstring_previews/augur_lollipop.png
+        """
+        if isinstance(data, AnnData):
+            results = data.uns[key]
+        else:
+            results = data
+        if ax is None:
+            fig, ax = plt.subplots()
+        y_axes_range = range(1, len(results["summary_metrics"].columns) + 1)
+        ax.hlines(
+            y_axes_range,
+            xmin=0,
+            xmax=results["summary_metrics"].sort_values("mean_augur_score", axis=1).loc["mean_augur_score"],
+        )
+        ax.plot(
+            results["summary_metrics"].sort_values("mean_augur_score", axis=1).loc["mean_augur_score"],
+            y_axes_range,
+            "o",
+        )
+        plt.xlabel("Mean Augur Score")
+        plt.ylabel("Cell Type")
+        plt.yticks(y_axes_range, results["summary_metrics"].sort_values("mean_augur_score", axis=1).columns)
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None
+    def plot_scatterplot(
+        self,
+        results1: dict[str, Any],
+        results2: dict[str, Any],
+        top_n: int = None,
+        return_fig: bool | None = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> Axes | Figure | None:
+        """Create scatterplot with two augur results.
+        Args:
+            results1: results after running `predict()`
+            results2: results after running `predict()`
+            top_n: optionally, the number of top prioritized cell types to label in the plot
+            return_figure: if `True` returns figure of the plot
+        Returns:
+            Axes of the plot.
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.sc_sim_augur()
+            >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
+            >>> loaded_data = ag_rfc.load(adata)
+            >>> h_adata, h_results = ag_rfc.predict(loaded_data, subsample_size=20, n_threads=4)
+            >>> v_adata, v_results = ag_rfc.predict(
+            ...     loaded_data, subsample_size=20, select_variance_features=True, n_threads=4
+            ... )
+            >>> ag_rfc.plot_scatterplot(v_results, h_results)
+        Preview:
+            .. image:: /_static/docstring_previews/augur_scatterplot.png
+        """
+        cell_types = results1["summary_metrics"].columns
+        fig, ax = plt.subplots()
+        ax.scatter(
+            results1["summary_metrics"].loc["mean_augur_score", cell_types],
+            results2["summary_metrics"].loc["mean_augur_score", cell_types],
+        )
+        # adding optional labels
+        top_n_cell_types = (
+            (results1["summary_metrics"].loc["mean_augur_score"] - results2["summary_metrics"].loc["mean_augur_score"])
+            .sort_values(ascending=False)
+            .index[:top_n]
+        )
+        for txt in top_n_cell_types:
+            ax.annotate(
+                txt,
+                (
+                    results1["summary_metrics"].loc["mean_augur_score", txt],
+                    results2["summary_metrics"].loc["mean_augur_score", txt],
+                ),
+            )
+        # adding diagonal
+        limits = max(ax.get_xlim(), ax.get_ylim())
+        (diag_line,) = ax.plot(limits, limits, ls="--", c=".3")
+        plt.xlabel("Augur scores 1")
+        plt.ylabel("Augur scores 2")
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None

pertpy 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

pertpy 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl