PyPI - pertpy - Versions diffs - 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

pertpy/__init__.py +4 -2
pertpy/data/__init__.py +66 -1
pertpy/data/_dataloader.py +28 -26
pertpy/data/_datasets.py +261 -92
pertpy/metadata/__init__.py +6 -0
pertpy/metadata/_cell_line.py +795 -0
pertpy/metadata/_compound.py +128 -0
pertpy/metadata/_drug.py +238 -0
pertpy/metadata/_look_up.py +569 -0
pertpy/metadata/_metadata.py +70 -0
pertpy/metadata/_moa.py +125 -0
pertpy/plot/__init__.py +0 -13
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +89 -6
pertpy/tools/__init__.py +48 -15
pertpy/tools/_augur.py +329 -32
pertpy/tools/_cinemaot.py +145 -6
pertpy/tools/_coda/_base_coda.py +1237 -116
pertpy/tools/_coda/_sccoda.py +66 -36
pertpy/tools/_coda/_tasccoda.py +46 -39
pertpy/tools/_dialogue.py +180 -77
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +29 -24
pertpy/tools/_distances/_distances.py +584 -98
pertpy/tools/_enrichment.py +460 -0
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +406 -49
pertpy/tools/_mixscape.py +677 -55
pertpy/tools/_perturbation_space/_clustering.py +10 -3
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
pertpy/tools/_perturbation_space/_simple.py +52 -11
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +706 -0
pertpy/tools/_scgen/_utils.py +3 -5
pertpy/tools/decoupler_LICENSE +674 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
pertpy-0.8.0.dist-info/RECORD +57 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -234
pertpy/plot/_cinemaot.py +0 -81
pertpy/plot/_coda.py +0 -1001
pertpy/plot/_dialogue.py +0 -91
pertpy/plot/_guide_rna.py +0 -82
pertpy/plot/_milopy.py +0 -284
pertpy/plot/_mixscape.py +0 -594
pertpy/plot/_scgen.py +0 -337
pertpy/tools/_differential_gene_expression.py +0 -99
pertpy/tools/_metadata/__init__.py +0 -0
pertpy/tools/_metadata/_cell_line.py +0 -613
pertpy/tools/_metadata/_look_up.py +0 -342
pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
pertpy/tools/_scgen/_jax_scgen.py +0 -370
pertpy-0.6.0.dist-info/RECORD +0 -50
/pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0

pertpy/plot/_dialogue.py DELETED Viewed

@@ -1,91 +0,0 @@
-import matplotlib.pyplot as plt
-import pandas as pd
-import scanpy as sc
-import seaborn as sns
-from anndata import AnnData
-from seaborn import PairGrid
-class DialoguePlot:
-    @staticmethod
-    def split_violins(
-        adata: AnnData,
-        split_key: str,
-        celltype_key=str,
-        split_which: tuple[str, str] = None,
-        mcp: str = "mcp_0",
-    ) -> plt.Axes:
-        """Plots split violin plots for a given MCP and split variable.
-        Any cells with a value for split_key not in split_which are removed from the plot.
-        Args:
-            adata: Annotated data object.
-            split_key: Variable in adata.obs used to split the data.
-            celltype_key: Key for cell type annotations.
-            split_which: Which values of split_key to plot. Required if more than 2 values in split_key.
-            mcp: Key for MCP data. Defaults to "mcp_0".
-        Returns:
-            A :class:`~matplotlib.axes.Axes` object
-        Examples:
-            >>> import pertpy as pt
-            >>> import scanpy as sc
-            >>> adata = pt.dt.dialogue_example()
-            >>> sc.pp.pca(adata)
-            >>> dl = pt.tl.Dialogue(sample_id = "clinical.status", celltype_key = "cell.subtypes", \
-                n_counts_key = "nCount_RNA", n_mpcs = 3)
-            >>> adata, mcps, ws, ct_subs = dl.calculate_multifactor_PMD(adata, normalize=True)
-            >>> pt.pl.dl.split_violins(adata, split_key='gender', celltype_key='cell.subtypes')
-        """
-        df = sc.get.obs_df(adata, [celltype_key, mcp, split_key])
-        if split_which is None:
-            split_which = df[split_key].unique()
-        df = df[df[split_key].isin(split_which)]
-        df[split_key] = df[split_key].cat.remove_unused_categories()
-        ax = sns.violinplot(data=df, x=celltype_key, y=mcp, hue=split_key, split=True)
-        ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
-        return ax
-    @staticmethod
-    def pairplot(adata: AnnData, celltype_key: str, color: str, sample_id: str, mcp: str = "mcp_0") -> PairGrid:
-        """Generate a pairplot visualization for multi-cell perturbation (MCP) data.
-        Computes the mean of a specified MCP feature (mcp) for each combination of sample and cell type,
-        then creates a pairplot to visualize the relationships between these mean MCP values.
-        Args:
-            adata: Annotated data object.
-            celltype_key: Key in adata.obs containing cell type annotations.
-            color: Key in adata.obs for color annotations. This parameter is used as the hue
-            sample_id: Key in adata.obs for the sample annotations.
-            mcp: Key in adata.obs for MCP feature values. Defaults to "mcp_0".
-        Returns:
-            Seaborn Pairgrid object.
-        Examples:
-            >>> import pertpy as pt
-            >>> import scanpy as sc
-            >>> adata = pt.dt.dialogue_example()
-            >>> sc.pp.pca(adata)
-            >>> dl = pt.tl.Dialogue(sample_id = "clinical.status", celltype_key = "cell.subtypes", \
-                n_counts_key = "nCount_RNA", n_mpcs = 3)
-            >>> adata, mcps, ws, ct_subs = dl.calculate_multifactor_PMD(adata, normalize=True)
-            >>> pt.pl.dl.pairplot(adata, celltype_key="cell.subtypes", color="gender", sample_id="clinical.status")
-        """
-        mean_mcps = adata.obs.groupby([sample_id, celltype_key])[mcp].mean()
-        mean_mcps = mean_mcps.reset_index()
-        mcp_pivot = pd.pivot(mean_mcps[[sample_id, celltype_key, mcp]], index=sample_id, columns=celltype_key)[mcp]
-        aggstats = adata.obs.groupby([sample_id])[color].describe()
-        aggstats = aggstats.loc[list(mcp_pivot.index), :]
-        aggstats[color] = aggstats["top"]
-        mcp_pivot = pd.concat([mcp_pivot, aggstats[color]], axis=1)
-        ax = sns.pairplot(mcp_pivot, hue=color, corner=True)
-        return ax

pertpy/plot/_guide_rna.py DELETED Viewed

@@ -1,82 +0,0 @@
-from __future__ import annotations
-from typing import TYPE_CHECKING
-import numpy as np
-import scanpy as sc
-if TYPE_CHECKING:
-    from anndata import AnnData
-    from matplotlib.axes import Axes
-class GuideRnaPlot:
-    @staticmethod
-    def heatmap(
-        adata: AnnData,
-        layer: str | None = None,
-        order_by: np.ndarray | str | None = None,
-        key_to_save_order: str = None,
-        **kwds,
-    ) -> list[Axes]:
-        """Heatmap plotting of guide RNA expression matrix.
-        Assuming guides have sparse expression, this function reorders cells
-        and plots guide RNA expression so that a nice sparse representation is achieved.
-        The cell ordering can be stored and reused in future plots to obtain consistent
-        plots before and after analysis of the guide RNA expression.
-        Note: This function expects a log-normalized or binary data.
-         Args:
-             adata: Annotated data matrix containing gRNA values
-             layer: Key to the layer containing log normalized count values of the gRNAs.
-                    adata.X is used if layer is None.
-             order_by: The order of cells in y axis. Defaults to None.
-                       If None, cells will be reordered to have a nice sparse representation.
-                       If a string is provided, adata.obs[order_by] will be used as the order.
-                       If a numpy array is provided, the array will be used for ordering.
-             key_to_save_order: The obs key to save cell orders in the current plot. Only saves if not None.
-             kwds: Are passed to sc.pl.heatmap.
-         Returns:
-             List of Axes. Alternatively you can pass save or show parameters as they will be passed to sc.pl.heatmap.
-             Order of cells in the y axis will be saved on adata.obs[key_to_save_order] if provided.
-        Examples:
-            Each cell is assigned to gRNA that occurs at least 5 times in the respective cell, which is then
-            visualized using a heatmap.
-            >>> import pertpy as pt
-            >>> mdata = pt.data.papalexi_2021()
-            >>> gdo = mdata.mod['gdo']
-            >>> ga = pt.pp.GuideAssignment()
-            >>> ga.assign_by_threshold(gdo, assignment_threshold=5)
-            >>> pt.pl.guide.heatmap(gdo)
-        """
-        data = adata.X if layer is None else adata.layers[layer]
-        if order_by is None:
-            max_guide_index = np.where(
-                np.array(data.max(axis=1)).squeeze() != data.min(), np.array(data.argmax(axis=1)).squeeze(), -1
-            )
-            order = np.argsort(max_guide_index)
-        elif isinstance(order_by, str):
-            order = adata.obs[order_by]
-        else:
-            order = order_by
-        adata.obs["_tmp_pertpy_grna_plot_dummy_group"] = ""
-        if key_to_save_order is not None:
-            adata.obs[key_to_save_order] = order
-        axis_group = sc.pl.heatmap(
-            adata[order],
-            adata.var.index.tolist(),
-            groupby="_tmp_pertpy_grna_plot_dummy_group",
-            cmap="viridis",
-            use_raw=False,
-            dendrogram=False,
-            layer=layer,
-            **kwds,
-        )
-        del adata.obs["_tmp_pertpy_grna_plot_dummy_group"]
-        return axis_group

pertpy/plot/_milopy.py DELETED Viewed

@@ -1,284 +0,0 @@
-from __future__ import annotations
-from typing import TYPE_CHECKING
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import scanpy as sc
-import seaborn as sns
-if TYPE_CHECKING:
-    from collections.abc import Sequence
-    from mudata import MuData
-class MilopyPlot:
-    """Plotting functions for Milopy."""
-    @staticmethod
-    def nhood_graph(
-        mdata: MuData,
-        alpha: float = 0.1,
-        min_logFC: float = 0,
-        min_size: int = 10,
-        plot_edges: bool = False,
-        title: str = "DA log-Fold Change",
-        show: bool | None = None,
-        save: bool | str | None = None,
-        **kwargs,
-    ) -> None:
-        """Visualize DA results on abstracted graph (wrapper around sc.pl.embedding)
-        Args:
-            mdata: MuData object
-            alpha: Significance threshold. (default: 0.1)
-            min_logFC: Minimum absolute log-Fold Change to show results. If is 0, show all significant neighbourhoods. (default: 0)
-            min_size: Minimum size of nodes in visualization. (default: 10)
-            plot_edges: If edges for neighbourhood overlaps whould be plotted. Defaults to False.
-            title: Plot title. Defaults to "DA log-Fold Change".
-            show: Show the plot, do not return axis.
-            save: If `True` or a `str`, save the figure. A string is appended to the default filename.
-                  Infer the filetype if ending on {`'.pdf'`, `'.png'`, `'.svg'`}.
-            **kwargs: Additional arguments to `scanpy.pl.embedding`.
-        Examples:
-            >>> import pertpy as pt
-            >>> adata = pt.dt.bhattacherjee()
-            >>> milo = pt.tl.Milo()
-            >>> mdata = milo.load(adata)
-            >>> sc.pp.neighbors(mdata["rna"])
-            >>> sc.tl.umap(mdata["rna"])
-            >>> milo.make_nhoods(mdata["rna"])
-            >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
-            >>> milo.da_nhoods(mdata, design="~label")
-            >>> milo.build_nhood_graph(mdata)
-            >>> pt.pl.milo.nhood_graph(mdata)
-            # TODO: If necessary adjust after fixing StopIteration error, which is currently thrown
-        """
-        nhood_adata = mdata["milo"].T.copy()
-        if "Nhood_size" not in nhood_adata.obs.columns:
-            raise KeyError(
-                'Cannot find "Nhood_size" column in adata.uns["nhood_adata"].obs -- \
-                    please run milopy.utils.build_nhood_graph(adata)'
-            )
-        nhood_adata.obs["graph_color"] = nhood_adata.obs["logFC"]
-        nhood_adata.obs.loc[nhood_adata.obs["SpatialFDR"] > alpha, "graph_color"] = np.nan
-        nhood_adata.obs["abs_logFC"] = abs(nhood_adata.obs["logFC"])
-        nhood_adata.obs.loc[nhood_adata.obs["abs_logFC"] < min_logFC, "graph_color"] = np.nan
-        # Plotting order - extreme logFC on top
-        nhood_adata.obs.loc[nhood_adata.obs["graph_color"].isna(), "abs_logFC"] = np.nan
-        ordered = nhood_adata.obs.sort_values("abs_logFC", na_position="first").index
-        nhood_adata = nhood_adata[ordered]
-        vmax = np.max([nhood_adata.obs["graph_color"].max(), abs(nhood_adata.obs["graph_color"].min())])
-        vmin = -vmax
-        sc.pl.embedding(
-            nhood_adata,
-            "X_milo_graph",
-            color="graph_color",
-            cmap="RdBu_r",
-            size=nhood_adata.obs["Nhood_size"] * min_size,
-            edges=plot_edges,
-            neighbors_key="nhood",
-            sort_order=False,
-            frameon=False,
-            vmax=vmax,
-            vmin=vmin,
-            title=title,
-            show=show,
-            save=save,
-            **kwargs,
-        )
-    @staticmethod
-    def nhood(
-        mdata: MuData,
-        ix: int,
-        feature_key: str | None = "rna",
-        basis="X_umap",
-        show: bool | None = None,
-        save: bool | str | None = None,
-        **kwargs,
-    ) -> None:
-        """Visualize cells in a neighbourhood.
-        Args:
-            mdata: MuData object with feature_key slot, storing neighbourhood assignments in `mdata[feature_key].obsm['nhoods']`
-            ix: index of neighbourhood to visualize
-            basis: Embedding to use for visualization. Defaults to "X_umap".
-            show: Show the plot, do not return axis.
-            save: If True or a str, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}.
-            **kwargs: Additional arguments to `scanpy.pl.embedding`.
-        Examples:
-            >>> import pertpy as pt
-            >>> import scanpy as sc
-            >>> adata = pt.dt.bhattacherjee()
-            >>> milo = pt.tl.Milo()
-            >>> mdata = milo.load(adata)
-            >>> sc.pp.neighbors(mdata["rna"])
-            >>> sc.tl.umap(mdata["rna"])
-            >>> milo.make_nhoods(mdata["rna"])
-            >>> pt.pl.milo.nhood(mdata, ix=0)
-        """
-        mdata[feature_key].obs["Nhood"] = mdata[feature_key].obsm["nhoods"][:, ix].toarray().ravel()
-        sc.pl.embedding(
-            mdata[feature_key], basis, color="Nhood", size=30, title="Nhood" + str(ix), show=show, save=save, **kwargs
-        )
-    @staticmethod
-    def da_beeswarm(
-        mdata: MuData,
-        feature_key: str | None = "rna",
-        anno_col: str = "nhood_annotation",
-        alpha: float = 0.1,
-        subset_nhoods: list[str] = None,
-        palette: str | Sequence[str] | dict[str, str] | None = None,
-    ):
-        """Plot beeswarm plot of logFC against nhood labels
-        Args:
-            mdata: MuData object
-            anno_col: Column in adata.uns['nhood_adata'].obs to use as annotation. (default: 'nhood_annotation'.)
-            alpha: Significance threshold. (default: 0.1)
-            subset_nhoods: List of nhoods to plot. If None, plot all nhoods. (default: None)
-            palette: Name of Seaborn color palette for violinplots.
-                     Defaults to pre-defined category colors for violinplots.
-        Examples:
-            >>> import pertpy as pt
-            >>> import scanpy as sc
-            >>> adata = pt.dt.bhattacherjee()
-            >>> milo = pt.tl.Milo()
-            >>> mdata = milo.load(adata)
-            >>> sc.pp.neighbors(mdata["rna"])
-            >>> milo.make_nhoods(mdata["rna"])
-            >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
-            >>> milo.da_nhoods(mdata, design="~label")
-            >>> milo.annotate_nhoods(mdata, anno_col='cell_type')
-            >>> pt.pl.milo.da_beeswarm(mdata)
-        """
-        try:
-            nhood_adata = mdata["milo"].T.copy()
-        except KeyError:
-            raise RuntimeError(
-                "mdata should be a MuData object with two slots: feature_key and 'milo'. Run 'milopy.count_nhoods(adata)' first."
-            ) from None
-        if subset_nhoods is not None:
-            nhood_adata = nhood_adata[subset_nhoods]
-        try:
-            nhood_adata.obs[anno_col]
-        except KeyError:
-            raise RuntimeError(
-                f"Unable to find {anno_col} in mdata.uns['nhood_adata']. Run 'milopy.utils.annotate_nhoods(adata, anno_col)' first"
-            ) from None
-        try:
-            nhood_adata.obs["logFC"]
-        except KeyError:
-            raise RuntimeError(
-                "Unable to find 'logFC' in mdata.uns['nhood_adata'].obs. Run 'core.da_nhoods(adata)' first."
-            ) from None
-        sorted_annos = (
-            nhood_adata.obs[[anno_col, "logFC"]].groupby(anno_col).median().sort_values("logFC", ascending=True).index
-        )
-        anno_df = nhood_adata.obs[[anno_col, "logFC", "SpatialFDR"]].copy()
-        anno_df["is_signif"] = anno_df["SpatialFDR"] < alpha
-        anno_df = anno_df[anno_df[anno_col] != "nan"]
-        try:
-            obs_col = nhood_adata.uns["annotation_obs"]
-            if palette is None:
-                palette = dict(
-                    zip(mdata[feature_key].obs[obs_col].cat.categories, mdata[feature_key].uns[f"{obs_col}_colors"])
-                )
-            sns.violinplot(
-                data=anno_df,
-                y=anno_col,
-                x="logFC",
-                order=sorted_annos,
-                size=190,
-                inner=None,
-                orient="h",
-                palette=palette,
-                linewidth=0,
-                scale="width",
-            )
-        except BaseException:  # noqa: BLE001
-            sns.violinplot(
-                data=anno_df,
-                y=anno_col,
-                x="logFC",
-                order=sorted_annos,
-                size=190,
-                inner=None,
-                orient="h",
-                linewidth=0,
-                scale="width",
-            )
-        sns.stripplot(
-            data=anno_df,
-            y=anno_col,
-            x="logFC",
-            order=sorted_annos,
-            size=2,
-            hue="is_signif",
-            palette=["grey", "black"],
-            orient="h",
-            alpha=0.5,
-        )
-        plt.legend(loc="upper left", title=f"< {int(alpha * 100)}% SpatialFDR", bbox_to_anchor=(1, 1), frameon=False)
-        plt.axvline(x=0, ymin=0, ymax=1, color="black", linestyle="--")
-    @staticmethod
-    def nhood_counts_by_cond(
-        mdata: MuData,
-        test_var: str,
-        subset_nhoods: list = None,
-        log_counts: bool = False,
-    ):
-        """Plot boxplot of cell numbers vs condition of interest
-        Args:
-            mdata: MuData object storing cell level and nhood level information
-            test_var: Name of column in adata.obs storing condition of interest (y-axis for boxplot)
-            subset_nhoods: List of obs_names for neighbourhoods to include in plot. If None, plot all nhoods. (default: None)
-            log_counts: Whether to plot log1p of cell counts. (default: False)
-        """
-        try:
-            nhood_adata = mdata["milo"].T.copy()
-        except KeyError:
-            raise RuntimeError(
-                "mdata should be a MuData object with two slots: feature_key and 'milo'. Run milopy.count_nhoods(mdata) first"
-            ) from None
-        if subset_nhoods is None:
-            subset_nhoods = nhood_adata.obs_names
-        pl_df = pd.DataFrame(nhood_adata[subset_nhoods].X.A, columns=nhood_adata.var_names).melt(
-            var_name=nhood_adata.uns["sample_col"], value_name="n_cells"
-        )
-        pl_df = pd.merge(pl_df, nhood_adata.var)
-        pl_df["log_n_cells"] = np.log1p(pl_df["n_cells"])
-        if not log_counts:
-            sns.boxplot(data=pl_df, x=test_var, y="n_cells", color="lightblue")
-            sns.stripplot(data=pl_df, x=test_var, y="n_cells", color="black", s=3)
-            plt.ylabel("# cells")
-        else:
-            sns.boxplot(data=pl_df, x=test_var, y="log_n_cells", color="lightblue")
-            sns.stripplot(data=pl_df, x=test_var, y="log_n_cells", color="black", s=3)
-            plt.ylabel("log(# cells + 1)")
-        plt.xticks(rotation=90)
-        plt.xlabel(test_var)

pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl