PyPI - pertpy - Versions diffs - 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

pertpy 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

pertpy/__init__.py +3 -2
pertpy/data/__init__.py +5 -1
pertpy/data/_dataloader.py +2 -4
pertpy/data/_datasets.py +203 -92
pertpy/metadata/__init__.py +4 -0
pertpy/metadata/_cell_line.py +826 -0
pertpy/metadata/_compound.py +129 -0
pertpy/metadata/_drug.py +242 -0
pertpy/metadata/_look_up.py +582 -0
pertpy/metadata/_metadata.py +73 -0
pertpy/metadata/_moa.py +129 -0
pertpy/plot/__init__.py +1 -9
pertpy/plot/_augur.py +53 -116
pertpy/plot/_coda.py +277 -677
pertpy/plot/_guide_rna.py +17 -35
pertpy/plot/_milopy.py +59 -134
pertpy/plot/_mixscape.py +152 -391
pertpy/preprocessing/_guide_rna.py +88 -4
pertpy/tools/__init__.py +8 -13
pertpy/tools/_augur.py +315 -17
pertpy/tools/_cinemaot.py +143 -4
pertpy/tools/_coda/_base_coda.py +1210 -65
pertpy/tools/_coda/_sccoda.py +50 -21
pertpy/tools/_coda/_tasccoda.py +27 -19
pertpy/tools/_dialogue.py +164 -56
pertpy/tools/_differential_gene_expression.py +240 -14
pertpy/tools/_distances/_distance_tests.py +8 -8
pertpy/tools/_distances/_distances.py +184 -34
pertpy/tools/_enrichment.py +465 -0
pertpy/tools/_milo.py +345 -11
pertpy/tools/_mixscape.py +668 -50
pertpy/tools/_perturbation_space/_clustering.py +5 -1
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +526 -0
pertpy/tools/_perturbation_space/_perturbation_space.py +135 -43
pertpy/tools/_perturbation_space/_simple.py +51 -10
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_scgen.py +701 -0
pertpy/tools/_scgen/_utils.py +1 -3
pertpy/tools/decoupler_LICENSE +674 -0
{pertpy-0.6.0.dist-info → pertpy-0.7.0.dist-info}/METADATA +31 -12
pertpy-0.7.0.dist-info/RECORD +53 -0
{pertpy-0.6.0.dist-info → pertpy-0.7.0.dist-info}/WHEEL +1 -1
pertpy/plot/_cinemaot.py +0 -81
pertpy/plot/_dialogue.py +0 -91
pertpy/plot/_scgen.py +0 -337
pertpy/tools/_metadata/__init__.py +0 -0
pertpy/tools/_metadata/_cell_line.py +0 -613
pertpy/tools/_metadata/_look_up.py +0 -342
pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
pertpy/tools/_scgen/_jax_scgen.py +0 -370
pertpy-0.6.0.dist-info/RECORD +0 -50
/pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
{pertpy-0.6.0.dist-info → pertpy-0.7.0.dist-info}/licenses/LICENSE +0 -0

pertpy/metadata/_moa.py ADDED Viewed

@@ -0,0 +1,129 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import TYPE_CHECKING
+import numpy as np
+import pandas as pd
+from rich import print
+from scanpy import settings
+from pertpy.data._dataloader import _download
+from ._look_up import LookUp
+from ._metadata import MetaData
+if TYPE_CHECKING:
+    from anndata import AnnData
+class Moa(MetaData):
+    """Utilities to fetch metadata for mechanism of action studies."""
+    def __init__(self):
+        self.clue = None
+    def _download_clue(self) -> None:
+        clue_path = Path(settings.cachedir) / "repurposing_drugs_20200324.txt"
+        if not Path(clue_path).exists():
+            print("[bold yellow]No metadata file was found for clue. Starting download now.")
+            _download(
+                url="https://s3.amazonaws.com/data.clue.io/repurposing/downloads/repurposing_drugs_20200324.txt",
+                output_file_name="repurposing_drugs_20200324.txt",
+                output_path=settings.cachedir,
+                block_size=4096,
+                is_zip=False,
+            )
+        self.clue = pd.read_csv(clue_path, sep="	", skiprows=9)
+        self.clue = self.clue[["pert_iname", "moa", "target"]]
+    def annotate(
+        self,
+        adata: AnnData,
+        query_id: str = "perturbation",
+        target: str | None = None,
+        verbosity: int | str = 5,
+        copy: bool = False,
+    ) -> AnnData:
+        """Annotate cells affected by perturbations by mechanism of action.
+        For each cell, we fetch the mechanism of action and molecular targets of the compounds sourced from clue.io.
+        Args:
+            adata: The data object to annotate.
+            query_id: The column of `.obs` with the name of a perturbagen. Defaults to 'perturbation'.
+            target: The column of `.obs` with target information. If set to None, all MoAs are retrieved without comparing molecular targets.
+                    Defaults to None.
+            verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
+                       Defaults to 5.
+            copy: Determines whether a copy of the `adata` is returned. Defaults to False.
+        Returns:
+            Returns an AnnData object with MoA annotation.
+        """
+        if copy:
+            adata = adata.copy()
+        if query_id not in adata.obs.columns:
+            raise ValueError(f"The requested query_id {query_id} is not in `adata.obs`.\n" "Please check again.")
+        if self.clue is None:
+            self._download_clue()
+        identifier_num_all = len(adata.obs[query_id].unique())
+        not_matched_identifiers = list(set(adata.obs[query_id].str.lower()) - set(self.clue["pert_iname"].str.lower()))
+        self._warn_unmatch(
+            total_identifiers=identifier_num_all,
+            unmatched_identifiers=not_matched_identifiers,
+            query_id=query_id,
+            reference_id="pert_iname",
+            metadata_type="moa",
+            verbosity=verbosity,
+        )
+        adata.obs = (
+            adata.obs.merge(
+                self.clue,
+                left_on=adata.obs[query_id].str.lower(),
+                right_on=self.clue["pert_iname"].str.lower(),
+                how="left",
+                suffixes=("", "_fromMeta"),
+            )
+            .set_index(adata.obs.index)
+            .drop("key_0", axis=1)
+        )
+        # If target column is given, check whether it is one of the targets listed in the metadata
+        # If inconsistent, treat this perturbagen as unmatched and overwrite the annotated metadata with NaN
+        if target is not None:
+            target_meta = "target" if target != "target" else "target_fromMeta"
+            adata.obs[target_meta] = adata.obs[target_meta].mask(
+                ~adata.obs.apply(lambda row: str(row[target]) in str(row[target_meta]), axis=1)
+            )
+            pertname_meta = "pert_iname" if query_id != "pert_iname" else "pert_iname_fromMeta"
+            adata.obs.loc[adata.obs[target_meta].isna(), [pertname_meta, "moa"]] = np.nan
+        # If query_id and reference_id have different names, there will be a column for each of them after merging
+        # which is redundant as they refer to the same information.
+        if query_id != "pert_iname":
+            del adata.obs["pert_iname"]
+        return adata
+    def lookup(self) -> LookUp:
+        """Generate LookUp object for Moa metadata.
+        The LookUp object provides an overview of the metadata to annotate.
+        annotate_moa function has a corresponding lookup function in the LookUp object,
+        where users can search the query_ids and targets in the metadata.
+        Returns:
+            Returns a LookUp object specific for MoA annotation.
+        """
+        if self.clue is None:
+            self._download_clue()
+        return LookUp(
+            type="moa",
+            transfer_metadata=[self.clue],
+        )

pertpy/plot/__init__.py CHANGED Viewed

@@ -1,13 +1,5 @@
 from pertpy.plot._augur import AugurpyPlot as ag
-from pertpy.plot._dialogue import DialoguePlot as dl
-try:
-    from pertpy.plot._coda import CodaPlot as coda
-except ImportError:
-    pass
-from pertpy.plot._cinemaot import CinemaotPlot as cot
+from pertpy.plot._coda import CodaPlot as coda
 from pertpy.plot._guide_rna import GuideRnaPlot as guide
 from pertpy.plot._milopy import MilopyPlot as milo
 from pertpy.plot._mixscape import MixscapePlot as ms
-from pertpy.plot._scgen import JaxscgenPlot as scg

pertpy/plot/_augur.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import warnings
 from typing import TYPE_CHECKING, Any
 from anndata import AnnData
@@ -15,7 +16,7 @@ class AugurpyPlot:
     """Plotting functions for Augurpy."""
     @staticmethod
-    def dp_scatter(results: pd.DataFrame, top_n=None, ax: Axes = None, return_figure: bool = False) -> Figure | Axes:
+    def dp_scatter(results: pd.DataFrame, top_n=None, ax: Axes = None) -> Figure | Axes:
         """Plot result of differential prioritization.
         Args:
@@ -42,38 +43,24 @@ class AugurpyPlot:
             >>> pvals = ag_rfc.predict_differential_prioritization(augur_results1=results_15, augur_results2=results_48, \
                 permuted_results1=results_15_permute, permuted_results2=results_48_permute)
-            >>> pt.pl.ag.dp_scatter(pvals)
+            >>> ag_rfc.plot_dp_scatter(pvals)
         """
-        x = results["mean_augur_score1"]
-        y = results["mean_augur_score2"]
-        if ax is None:
-            fig, ax = plt.subplots()
-        scatter = ax.scatter(x, y, c=results.z, cmap="Greens")
-        # adding optional labels
-        top_n_index = results.sort_values(by="pval").index[:top_n]
-        for idx in top_n_index:
-            ax.annotate(
-                results.loc[idx, "cell_type"],
-                (results.loc[idx, "mean_augur_score1"], results.loc[idx, "mean_augur_score2"]),
-            )
+        warnings.warn(
+            "This function is deprecated and will be removed in pertpy 0.8.0!"
+            " Please use the corresponding 'pt.tl' object",
+            FutureWarning,
+            stacklevel=2,
+        )
-        # add diagonal
-        limits = max(ax.get_xlim(), ax.get_ylim())
-        (diag_line,) = ax.plot(limits, limits, ls="--", c=".3")
+        from pertpy.tools import Augur
-        # formatting and details
-        plt.xlabel("Augur scores 1")
-        plt.ylabel("Augur scores 2")
-        legend1 = ax.legend(*scatter.legend_elements(), loc="center left", title="z-scores", bbox_to_anchor=(1, 0.5))
-        ax.add_artist(legend1)
+        ag = Augur("random_forest_classifier")
-        return fig if return_figure else ax
+        return ag.plot_dp_scatter(results=results, top_n=top_n, ax=ax)
     @staticmethod
     def important_features(
-        data: dict[str, Any], key: str = "augurpy_results", top_n=10, ax: Axes = None, return_figure: bool = False
+        data: dict[str, Any], key: str = "augurpy_results", top_n=10, ax: Axes = None
     ) -> Figure | Axes:
         """Plot a lollipop plot of the n features with largest feature importances.
@@ -92,44 +79,26 @@ class AugurpyPlot:
             >>> adata = pt.dt.sc_sim_augur()
             >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
             >>> loaded_data = ag_rfc.load(adata)
-            >>> v_adata, v_results = ag_rfc.predict(loaded_data, subsample_size=20, select_variance_features=True, n_threads=4)
-            >>> pt.pl.ag.important_features(v_results)
+            >>> v_adata, v_results = ag_rfc.predict(
+            ...     loaded_data, subsample_size=20, select_variance_features=True, n_threads=4
+            ... )
+            >>> ag_rfc.plot_important_features(v_results)
         """
-        if isinstance(data, AnnData):
-            results = data.uns[key]
-        else:
-            results = data
-        # top_n features to plot
-        n_features = (
-            results["feature_importances"]
-            .groupby("genes", as_index=False)
-            .feature_importances.mean()
-            .sort_values(by="feature_importances")[-top_n:]
+        warnings.warn(
+            "This function is deprecated and will be removed in pertpy 0.8.0!"
+            " Please use the corresponding 'pt.tl' object",
+            FutureWarning,
+            stacklevel=2,
         )
-        if ax is None:
-            fig, ax = plt.subplots()
-        y_axes_range = range(1, top_n + 1)
-        ax.hlines(
-            y_axes_range,
-            xmin=0,
-            xmax=n_features["feature_importances"],
-        )
-        # drawing the markers (circle)
-        ax.plot(n_features["feature_importances"], y_axes_range, "o")
+        from pertpy.tools import Augur
-        # formatting and details
-        plt.xlabel("Mean Feature Importance")
-        plt.ylabel("Gene")
-        plt.yticks(y_axes_range, n_features["genes"])
+        ag = Augur("random_forest_classifier")
-        return fig if return_figure else ax
+        return ag.plot_important_features(data=data, key=key, top_n=top_n, ax=ax)
     @staticmethod
-    def lollipop(
-        data: dict[str, Any], key: str = "augurpy_results", ax: Axes = None, return_figure: bool = False
-    ) -> Figure | Axes:
+    def lollipop(data: dict[str, Any], key: str = "augurpy_results", ax: Axes = None) -> Figure | Axes | None:
         """Plot a lollipop plot of the mean augur values.
         Args:
@@ -146,40 +115,26 @@ class AugurpyPlot:
             >>> adata = pt.dt.sc_sim_augur()
             >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
             >>> loaded_data = ag_rfc.load(adata)
-            >>> v_adata, v_results = ag_rfc.predict(loaded_data, subsample_size=20, select_variance_features=True, n_threads=4)
-            >>> pt.pl.ag.lollipop(v_results)
+            >>> v_adata, v_results = ag_rfc.predict(
+            ...     loaded_data, subsample_size=20, select_variance_features=True, n_threads=4
+            ... )
+            >>> ag_rfc.plot_lollipop(v_results)
         """
-        if isinstance(data, AnnData):
-            results = data.uns[key]
-        else:
-            results = data
-        if ax is None:
-            fig, ax = plt.subplots()
-        y_axes_range = range(1, len(results["summary_metrics"].columns) + 1)
-        ax.hlines(
-            y_axes_range,
-            xmin=0,
-            xmax=results["summary_metrics"].sort_values("mean_augur_score", axis=1).loc["mean_augur_score"],
+        warnings.warn(
+            "This function is deprecated and will be removed in pertpy 0.8.0!"
+            " Please use the corresponding 'pt.tl' object",
+            FutureWarning,
+            stacklevel=2,
         )
-        # drawing the markers (circle)
-        ax.plot(
-            results["summary_metrics"].sort_values("mean_augur_score", axis=1).loc["mean_augur_score"],
-            y_axes_range,
-            "o",
-        )
+        from pertpy.tools import Augur
-        # formatting and details
-        plt.xlabel("Mean Augur Score")
-        plt.ylabel("Cell Type")
-        plt.yticks(y_axes_range, results["summary_metrics"].sort_values("mean_augur_score", axis=1).columns)
+        ag = Augur("random_forest_classifier")
-        return fig if return_figure else ax
+        return ag.plot_lollipop(data=data, key=key, ax=ax)
     @staticmethod
-    def scatterplot(
-        results1: dict[str, Any], results2: dict[str, Any], top_n=None, return_figure: bool = False
-    ) -> Figure | Axes:
+    def scatterplot(results1: dict[str, Any], results2: dict[str, Any], top_n=None) -> Figure | Axes:
         """Create scatterplot with two augur results.
         Args:
@@ -197,38 +152,20 @@ class AugurpyPlot:
             >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
             >>> loaded_data = ag_rfc.load(adata)
             >>> h_adata, h_results = ag_rfc.predict(loaded_data, subsample_size=20, n_threads=4)
-            >>> v_adata, v_results = ag_rfc.predict(loaded_data, subsample_size=20, select_variance_features=True, n_threads=4)
-            >>> pt.pl.ag.scatterplot(v_results, h_results)
+            >>> v_adata, v_results = ag_rfc.predict(
+            ...     loaded_data, subsample_size=20, select_variance_features=True, n_threads=4
+            ... )
+            >>> ag_rfc.plot_scatterplot(v_results, h_results)
         """
-        cell_types = results1["summary_metrics"].columns
-        fig, ax = plt.subplots()
-        ax.scatter(
-            results1["summary_metrics"].loc["mean_augur_score", cell_types],
-            results2["summary_metrics"].loc["mean_augur_score", cell_types],
+        warnings.warn(
+            "This function is deprecated and will be removed in pertpy 0.8.0!"
+            " Please use the corresponding 'pt.tl' object",
+            FutureWarning,
+            stacklevel=2,
         )
-        # adding optional labels
-        top_n_cell_types = (
-            (results1["summary_metrics"].loc["mean_augur_score"] - results2["summary_metrics"].loc["mean_augur_score"])
-            .sort_values(ascending=False)
-            .index[:top_n]
-        )
-        for txt in top_n_cell_types:
-            ax.annotate(
-                txt,
-                (
-                    results1["summary_metrics"].loc["mean_augur_score", txt],
-                    results2["summary_metrics"].loc["mean_augur_score", txt],
-                ),
-            )
-        # adding diagonal
-        limits = max(ax.get_xlim(), ax.get_ylim())
-        (diag_line,) = ax.plot(limits, limits, ls="--", c=".3")
-        # formatting and details
-        plt.xlabel("Augur scores 1")
-        plt.ylabel("Augur scores 2")
-        return fig if return_figure else ax
+        from pertpy.tools import Augur
+        ag = Augur("random_forest_classifier")
+        return ag.plot_scatterplot(results1=results1, results2=results2, top_n=top_n)

pertpy 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

pertpy 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl