PyPI - pertpy - Versions diffs - 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

pertpy/__init__.py +4 -2
pertpy/data/__init__.py +66 -1
pertpy/data/_dataloader.py +28 -26
pertpy/data/_datasets.py +261 -92
pertpy/metadata/__init__.py +6 -0
pertpy/metadata/_cell_line.py +795 -0
pertpy/metadata/_compound.py +128 -0
pertpy/metadata/_drug.py +238 -0
pertpy/metadata/_look_up.py +569 -0
pertpy/metadata/_metadata.py +70 -0
pertpy/metadata/_moa.py +125 -0
pertpy/plot/__init__.py +0 -13
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +89 -6
pertpy/tools/__init__.py +48 -15
pertpy/tools/_augur.py +329 -32
pertpy/tools/_cinemaot.py +145 -6
pertpy/tools/_coda/_base_coda.py +1237 -116
pertpy/tools/_coda/_sccoda.py +66 -36
pertpy/tools/_coda/_tasccoda.py +46 -39
pertpy/tools/_dialogue.py +180 -77
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +29 -24
pertpy/tools/_distances/_distances.py +584 -98
pertpy/tools/_enrichment.py +460 -0
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +406 -49
pertpy/tools/_mixscape.py +677 -55
pertpy/tools/_perturbation_space/_clustering.py +10 -3
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
pertpy/tools/_perturbation_space/_simple.py +52 -11
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +706 -0
pertpy/tools/_scgen/_utils.py +3 -5
pertpy/tools/decoupler_LICENSE +674 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
pertpy-0.8.0.dist-info/RECORD +57 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -234
pertpy/plot/_cinemaot.py +0 -81
pertpy/plot/_coda.py +0 -1001
pertpy/plot/_dialogue.py +0 -91
pertpy/plot/_guide_rna.py +0 -82
pertpy/plot/_milopy.py +0 -284
pertpy/plot/_mixscape.py +0 -594
pertpy/plot/_scgen.py +0 -337
pertpy/tools/_differential_gene_expression.py +0 -99
pertpy/tools/_metadata/__init__.py +0 -0
pertpy/tools/_metadata/_cell_line.py +0 -613
pertpy/tools/_metadata/_look_up.py +0 -342
pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
pertpy/tools/_scgen/_jax_scgen.py +0 -370
pertpy-0.6.0.dist-info/RECORD +0 -50
/pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_milo.py CHANGED Viewed

@@ -3,21 +3,24 @@ from __future__ import annotations
 import logging
 import random
 import re
-from typing import Literal
+from typing import TYPE_CHECKING, Literal
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import scanpy as sc
+import seaborn as sns
 from anndata import AnnData
+from lamin_utils import logger
 from mudata import MuData
-from rich import print
-try:
-    from rpy2.robjects import conversion, numpy2ri, pandas2ri
-    from rpy2.robjects.packages import STAP, PackageNotInstalledError, importr
-except ModuleNotFoundError:
-    print(
-        "[bold yellow]ryp2 is not installed. Install with [green]pip install rpy2 [yellow]to run tools with R support."
-    )
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+    from matplotlib.axes import Axes
+    from matplotlib.colors import Colormap
+    from matplotlib.figure import Figure
 from scipy.sparse import csr_matrix
 from sklearn.metrics.pairwise import euclidean_distances
@@ -26,7 +29,16 @@ class Milo:
     """Python implementation of Milo."""
     def __init__(self):
-        pass
+        try:
+            from rpy2.robjects import conversion, numpy2ri, pandas2ri
+            from rpy2.robjects.packages import STAP, PackageNotInstalledError, importr
+        except ModuleNotFoundError:
+            raise ImportError("milo requires rpy2 to be installed.") from None
+        try:
+            importr("edgeR")
+        except ImportError as e:
+            raise ImportError("milo requires a valid R installation with edger installed:\n") from e
     def load(
         self,
@@ -39,7 +51,7 @@ class Milo:
             input: AnnData
             feature_key: Key to store the cell-level AnnData object in the MuData object
         Returns:
-            MuData: MuData object with original AnnData (default is `mudata[feature_key]`).
+            MuData: MuData object with original AnnData.
         Examples:
             >>> import pertpy as pt
@@ -71,11 +83,10 @@ class Milo:
             neighbors_key: The key in `adata.obsp` or `mdata[feature_key].obsp` to use as KNN graph.
                            If not specified, `make_nhoods` looks .obsp[‘connectivities’] for connectivities (default storage places for `scanpy.pp.neighbors`).
                            If specified, it looks at .obsp[.uns[neighbors_key][‘connectivities_key’]] for connectivities.
-                           (default: None)
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. (default: 'rna')
-            prop: Fraction of cells to sample for neighbourhood index search. (default: 0.1)
-            seed: Random seed for cell sampling. (default: 0)
-            copy: Determines whether a copy of the `adata` is returned. (default: False)
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
+            prop: Fraction of cells to sample for neighbourhood index search.
+            seed: Random seed for cell sampling.
+            copy: Determines whether a copy of the `adata` is returned.
         Returns:
             If `copy=True`, returns the copy of `adata` with the result in `.obs`, `.obsm`, and `.uns`.
@@ -119,7 +130,7 @@ class Milo:
             try:
                 knn_graph = adata.obsp["connectivities"].copy()
             except KeyError:
-                print('No "connectivities" slot in adata.obsp -- please run scanpy.pp.neighbors(adata) first')
+                logger.error('No "connectivities" slot in adata.obsp -- please run scanpy.pp.neighbors(adata) first')
                 raise
         else:
             try:
@@ -174,6 +185,7 @@ class Milo:
         dist_mat = knn_dists[nhood_ixs, :]
         k_distances = dist_mat.max(1).toarray().ravel()
         adata.obs["nhood_kth_distance"] = 0
+        adata.obs["nhood_kth_distance"] = adata.obs["nhood_kth_distance"].astype(float)
         adata.obs.loc[adata.obs["nhood_ixs_refined"] == 1, "nhood_kth_distance"] = k_distances
         if copy:
@@ -190,7 +202,7 @@ class Milo:
         Args:
             data: AnnData object with neighbourhoods defined in `obsm['nhoods']` or MuData object with a modality with neighbourhoods defined in `obsm['nhoods']`
             sample_col: Column in adata.obs that contains sample information
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. (default: 'rna')
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             MuData object storing the original (i.e. rna) AnnData in `mudata[feature_key]`
@@ -221,7 +233,7 @@ class Milo:
             try:
                 nhoods = adata.obsm["nhoods"]
             except KeyError:
-                print('Cannot find "nhoods" slot in adata.obsm -- please run milopy.make_nhoods(adata)')
+                logger.error('Cannot find "nhoods" slot in adata.obsm -- please run milopy.make_nhoods(adata)')
                 raise
         # Make nhood abundance matrix
         sample_dummies = pd.get_dummies(adata.obs[sample_col])
@@ -229,7 +241,7 @@ class Milo:
         sample_dummies = csr_matrix(sample_dummies.values)
         nhood_count_mat = nhoods.T.dot(sample_dummies)
         sample_obs = pd.DataFrame(index=all_samples)
-        sample_adata = AnnData(X=nhood_count_mat.T, obs=sample_obs, dtype=np.float32)
+        sample_adata = AnnData(X=nhood_count_mat.T, obs=sample_obs)
         sample_adata.uns["sample_col"] = sample_col
         # Save nhood index info
         sample_adata.var["index_cell"] = adata.obs_names[adata.obs["nhood_ixs_refined"] == 1]
@@ -261,10 +273,10 @@ class Milo:
             design: Formula for the test, following glm syntax from R (e.g. '~ condition').
                     Terms should be columns in `milo_mdata[feature_key].obs`.
             model_contrasts: A string vector that defines the contrasts used to perform DA testing, following glm syntax from R (e.g. "conditionDisease - conditionControl").
-                             If no contrast is specified (default), then the last categorical level in condition of interest is used as the test group. Defaults to None.
-            subset_samples: subset of samples (obs in `milo_mdata['milo']`) to use for the test. Defaults to None.
-            add_intercept: whether to include an intercept in the model. If False, this is equivalent to adding + 0 in the design formula. When model_contrasts is specified, this is set to False by default. Defaults to True.
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+                             If no contrast is specified (default), then the last categorical level in condition of interest is used as the test group.
+            subset_samples: subset of samples (obs in `milo_mdata['milo']`) to use for the test.
+            add_intercept: whether to include an intercept in the model. If False, this is equivalent to adding + 0 in the design formula. When model_contrasts is specified, this is set to False by default.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
             solver: The solver to fit the model to. One of "edger" (requires R, rpy2 and edgeR to be installed) or "batchglm"
         Returns:
@@ -288,8 +300,8 @@ class Milo:
         try:
             sample_adata = mdata["milo"]
         except KeyError:
-            print(
-                "[bold red]milo_mdata should be a MuData object with two slots:"
+            logger.error(
+                "milo_mdata should be a MuData object with two slots:"
                 " feature_key and 'milo' - please run milopy.count_nhoods() first"
             )
             raise
@@ -303,7 +315,7 @@ class Milo:
             sample_obs = adata.obs[covariates + [sample_col]].drop_duplicates()
         except KeyError:
             missing_cov = [x for x in covariates if x not in sample_adata.obs.columns]
-            print("Covariates {c} are not columns in adata.obs".format(c=" ".join(missing_cov)))
+            logger.warning("Covariates {c} are not columns in adata.obs".format(c=" ".join(missing_cov)))
             raise
         sample_obs = sample_obs[covariates + [sample_col]]
         sample_obs.index = sample_obs[sample_col].astype("str")
@@ -311,7 +323,7 @@ class Milo:
         try:
             assert sample_obs.loc[sample_adata.obs_names].shape[0] == len(sample_adata.obs_names)
         except AssertionError:
-            print(
+            logger.warning(
                 f"Values in mdata[{feature_key}].obs[{covariates}] cannot be unambiguously assigned to each sample"
                 f" -- each sample value should match a single covariate value"
             )
@@ -323,7 +335,9 @@ class Milo:
             design_df = sample_adata.obs[covariates]
         except KeyError:
             missing_cov = [x for x in covariates if x not in sample_adata.obs.columns]
-            print('Covariates {c} are not columns in adata.uns["sample_adata"].obs'.format(c=" ".join(missing_cov)))
+            logger.error(
+                'Covariates {c} are not columns in adata.uns["sample_adata"].obs'.format(c=" ".join(missing_cov))
+            )
             raise
         # Get count matrix
         count_mat = sample_adata.X.T.toarray()
@@ -367,6 +381,8 @@ class Milo:
                     return(colnames(m))
                 }
                 """
+                from rpy2.robjects.packages import STAP
                 get_model_cols = STAP(r_str, "get_model_cols")
                 model_mat_cols = get_model_cols.get_model_cols(design_df, design)
                 model_df = pd.DataFrame(model)
@@ -374,13 +390,16 @@ class Milo:
                 try:
                     mod_contrast = limma.makeContrasts(contrasts=model_contrasts, levels=model_df)
                 except ValueError:
-                    print("Model contrasts must be in the form 'A-B' or 'A+B'")
+                    logger.error("Model contrasts must be in the form 'A-B' or 'A+B'")
                     raise
                 res = base.as_data_frame(
                     edgeR.topTags(edgeR.glmQLFTest(fit, contrast=mod_contrast), sort_by="none", n=np.inf)
                 )
             else:
                 res = base.as_data_frame(edgeR.topTags(edgeR.glmQLFTest(fit, coef=n_coef), sort_by="none", n=np.inf))
+            from rpy2.robjects import conversion
             res = conversion.rpy2py(res)
             if not isinstance(res, pd.DataFrame):
                 res = pd.DataFrame(res)
@@ -405,7 +424,7 @@ class Milo:
         Args:
             mdata: MuData object
             anno_col: Column in adata.obs containing the cell annotations to use for nhood labelling
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             None. Adds in place:
@@ -423,12 +442,12 @@ class Milo:
             >>> sc.pp.neighbors(mdata["rna"])
             >>> milo.make_nhoods(mdata["rna"])
             >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
-            >>> milo.annotate_nhoods(mdata, anno_col='cell_type')
+            >>> milo.annotate_nhoods(mdata, anno_col="cell_type")
         """
         try:
             sample_adata = mdata["milo"]
         except KeyError:
-            print(
+            logger.error(
                 "milo_mdata should be a MuData object with two slots: feature_key and 'milo' - please run milopy.count_nhoods(adata) first"
             )
             raise
@@ -459,7 +478,7 @@ class Milo:
         Args:
             mdata: MuData object
             anno_col: Column in adata.obs containing the cell annotations to use for nhood labelling
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             None. Adds in place:
@@ -474,7 +493,7 @@ class Milo:
             >>> sc.pp.neighbors(mdata["rna"])
             >>> milo.make_nhoods(mdata["rna"])
             >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
-            >>> milo.annotate_nhoods_continuous(mdata, anno_col='nUMI')
+            >>> milo.annotate_nhoods_continuous(mdata, anno_col="nUMI")
         """
         if "milo" not in mdata.mod:
             raise ValueError(
@@ -500,7 +519,7 @@ class Milo:
         Args:
             mdata: MuData object
             new_covariates: columns in `milo_mdata[feature_key].obs` to add to `milo_mdata['milo'].obs`.
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             None, adds columns to `milo_mdata['milo']` in place
@@ -519,7 +538,7 @@ class Milo:
         try:
             sample_adata = mdata["milo"]
         except KeyError:
-            print(
+            logger.error(
                 "milo_mdata should be a MuData object with two slots: feature_key and 'milo' - please run milopy.count_nhoods(adata) first"
             )
             raise
@@ -533,14 +552,14 @@ class Milo:
             sample_obs = adata.obs[covariates + [sample_col]].drop_duplicates()
         except KeyError:
             missing_cov = [covar for covar in covariates if covar not in sample_adata.obs.columns]
-            print("Covariates {c} are not columns in adata.obs".format(c=" ".join(missing_cov)))
+            logger.error("Covariates {c} are not columns in adata.obs".format(c=" ".join(missing_cov)))
             raise
         sample_obs = sample_obs[covariates + [sample_col]].astype("str")
         sample_obs.index = sample_obs[sample_col]
         try:
             assert sample_obs.loc[sample_adata.obs_names].shape[0] == len(sample_adata.obs_names)
         except ValueError:
-            print(
+            logger.error(
                 "Covariates cannot be unambiguously assigned to each sample -- each sample value should match a single covariate value"
             )
             raise
@@ -551,8 +570,8 @@ class Milo:
         Args:
             mdata: MuData object
-            basis: Name of the obsm basis to use for layout of neighbourhoods (key in `adata.obsm`). Defaults to "X_umap".
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            basis: Name of the obsm basis to use for layout of neighbourhoods (key in `adata.obsm`).
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             - `milo_mdata['milo'].varp['nhood_connectivities']`: graph of overlap between neighbourhoods (i.e. no of shared cells)
@@ -584,13 +603,13 @@ class Milo:
             "distances_key": "",
         }
-    def add_nhood_expression(self, mdata: MuData, layer: str | None = None, feature_key: str | None = "rna"):
+    def add_nhood_expression(self, mdata: MuData, layer: str | None = None, feature_key: str | None = "rna") -> None:
         """Calculates the mean expression in neighbourhoods of each feature.
         Args:
             mdata: MuData object
-            layer: If provided, use `milo_mdata[feature_key][layer]` as expression matrix instead of `milo_mdata[feature_key].X`. Defaults to None.
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            layer: If provided, use `milo_mdata[feature_key][layer]` as expression matrix instead of `milo_mdata[feature_key].X`.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             Updates adata in place to store the matrix of average expression in each neighbourhood in `milo_mdata['milo'].varm['expr']`
@@ -609,7 +628,7 @@ class Milo:
         try:
             sample_adata = mdata["milo"]
         except KeyError:
-            print(
+            logger.error(
                 "milo_mdata should be a MuData object with two slots:"
                 " feature_key and 'milo' - please run milopy.count_nhoods(adata) first"
             )
@@ -633,6 +652,9 @@ class Milo:
         self,
     ):
         """Set up rpy2 to run edgeR"""
+        from rpy2.robjects import numpy2ri, pandas2ri
+        from rpy2.robjects.packages import importr
         numpy2ri.activate()
         pandas2ri.activate()
         edgeR = self._try_import_bioc_library("edgeR")
@@ -651,11 +673,13 @@ class Milo:
         Args:
             name (str): R packages name
         """
+        from rpy2.robjects.packages import PackageNotInstalledError, importr
         try:
             _r_lib = importr(name)
             return _r_lib
         except PackageNotInstalledError:
-            print(f"Install Bioconductor library `{name!r}` first as `BiocManager::install({name!r}).`")
+            logger.error(f"Install Bioconductor library `{name!r}` first as `BiocManager::install({name!r}).`")
             raise
     def _graph_spatial_fdr(
@@ -663,11 +687,13 @@ class Milo:
         sample_adata: AnnData,
         neighbors_key: str | None = None,
     ):
-        """FDR correction weighted on inverse of connectivity of neighbourhoods. The distance to the k-th nearest neighbor is used as a measure of connectivity.
+        """FDR correction weighted on inverse of connectivity of neighbourhoods.
+        The distance to the k-th nearest neighbor is used as a measure of connectivity.
         Args:
             sample_adata: Sample-level AnnData.
-            neighbors_key: The key in `adata.obsp` to use as KNN graph. Defaults to None.
+            neighbors_key: The key in `adata.obsp` to use as KNN graph.
         """
         # use 1/connectivity as the weighting for the weighted BH adjustment from Cydar
         w = 1 / sample_adata.var["kth_distance"]
@@ -686,3 +712,334 @@ class Milo:
         sample_adata.var["SpatialFDR"] = np.nan
         sample_adata.var.loc[keep_nhoods, "SpatialFDR"] = adjp
+    def plot_nhood_graph(
+        self,
+        mdata: MuData,
+        alpha: float = 0.1,
+        min_logFC: float = 0,
+        min_size: int = 10,
+        plot_edges: bool = False,
+        title: str = "DA log-Fold Change",
+        color_map: Colormap | str | None = None,
+        palette: str | Sequence[str] | None = None,
+        ax: Axes | None = None,
+        show: bool | None = None,
+        save: bool | str | None = None,
+        **kwargs,
+    ) -> None:
+        """Visualize DA results on abstracted graph (wrapper around sc.pl.embedding)
+        Args:
+            mdata: MuData object
+            alpha: Significance threshold. (default: 0.1)
+            min_logFC: Minimum absolute log-Fold Change to show results. If is 0, show all significant neighbourhoods.
+            min_size: Minimum size of nodes in visualization. (default: 10)
+            plot_edges: If edges for neighbourhood overlaps whould be plotted.
+            title: Plot title.
+            show: Show the plot, do not return axis.
+            save: If `True` or a `str`, save the figure. A string is appended to the default filename.
+                  Infer the filetype if ending on {`'.pdf'`, `'.png'`, `'.svg'`}.
+            **kwargs: Additional arguments to `scanpy.pl.embedding`.
+        Examples:
+            >>> import pertpy as pt
+            >>> import scanpy as sc
+            >>> adata = pt.dt.bhattacherjee()
+            >>> milo = pt.tl.Milo()
+            >>> mdata = milo.load(adata)
+            >>> sc.pp.neighbors(mdata["rna"])
+            >>> sc.tl.umap(mdata["rna"])
+            >>> milo.make_nhoods(mdata["rna"])
+            >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
+            >>> milo.da_nhoods(mdata,
+            >>>            design='~label',
+            >>>            model_contrasts='labelwithdraw_15d_Cocaine-labelwithdraw_48h_Cocaine')
+            >>> milo.build_nhood_graph(mdata)
+            >>> milo.plot_nhood_graph(mdata)
+        Preview:
+            .. image:: /_static/docstring_previews/milo_nhood_graph.png
+        """
+        nhood_adata = mdata["milo"].T.copy()
+        if "Nhood_size" not in nhood_adata.obs.columns:
+            raise KeyError(
+                'Cannot find "Nhood_size" column in adata.uns["nhood_adata"].obs -- \
+                    please run milopy.utils.build_nhood_graph(adata)'
+            )
+        nhood_adata.obs["graph_color"] = nhood_adata.obs["logFC"]
+        nhood_adata.obs.loc[nhood_adata.obs["SpatialFDR"] > alpha, "graph_color"] = np.nan
+        nhood_adata.obs["abs_logFC"] = abs(nhood_adata.obs["logFC"])
+        nhood_adata.obs.loc[nhood_adata.obs["abs_logFC"] < min_logFC, "graph_color"] = np.nan
+        # Plotting order - extreme logFC on top
+        nhood_adata.obs.loc[nhood_adata.obs["graph_color"].isna(), "abs_logFC"] = np.nan
+        ordered = nhood_adata.obs.sort_values("abs_logFC", na_position="first").index
+        nhood_adata = nhood_adata[ordered]
+        vmax = np.max([nhood_adata.obs["graph_color"].max(), abs(nhood_adata.obs["graph_color"].min())])
+        vmin = -vmax
+        sc.pl.embedding(
+            nhood_adata,
+            "X_milo_graph",
+            color="graph_color",
+            cmap="RdBu_r",
+            size=nhood_adata.obs["Nhood_size"] * min_size,
+            edges=plot_edges,
+            neighbors_key="nhood",
+            sort_order=False,
+            frameon=False,
+            vmax=vmax,
+            vmin=vmin,
+            title=title,
+            color_map=color_map,
+            palette=palette,
+            ax=ax,
+            show=show,
+            save=save,
+            **kwargs,
+        )
+    def plot_nhood(
+        self,
+        mdata: MuData,
+        ix: int,
+        feature_key: str | None = "rna",
+        basis: str = "X_umap",
+        color_map: Colormap | str | None = None,
+        palette: str | Sequence[str] | None = None,
+        return_fig: bool | None = None,
+        ax: Axes | None = None,
+        show: bool | None = None,
+        save: bool | str | None = None,
+        **kwargs,
+    ) -> None:
+        """Visualize cells in a neighbourhood.
+        Args:
+            mdata: MuData object with feature_key slot, storing neighbourhood assignments in `mdata[feature_key].obsm['nhoods']`
+            ix: index of neighbourhood to visualize
+            basis: Embedding to use for visualization.
+            show: Show the plot, do not return axis.
+            save: If True or a str, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}.
+            **kwargs: Additional arguments to `scanpy.pl.embedding`.
+        Examples:
+            >>> import pertpy as pt
+            >>> import scanpy as sc
+            >>> adata = pt.dt.bhattacherjee()
+            >>> milo = pt.tl.Milo()
+            >>> mdata = milo.load(adata)
+            >>> sc.pp.neighbors(mdata["rna"])
+            >>> sc.tl.umap(mdata["rna"])
+            >>> milo.make_nhoods(mdata["rna"])
+            >>> milo.plot_nhood(mdata, ix=0)
+        Preview:
+            .. image:: /_static/docstring_previews/milo_nhood.png
+        """
+        mdata[feature_key].obs["Nhood"] = mdata[feature_key].obsm["nhoods"][:, ix].toarray().ravel()
+        sc.pl.embedding(
+            mdata[feature_key],
+            basis,
+            color="Nhood",
+            size=30,
+            title="Nhood" + str(ix),
+            color_map=color_map,
+            palette=palette,
+            return_fig=return_fig,
+            ax=ax,
+            show=show,
+            save=save,
+            **kwargs,
+        )
+    def plot_da_beeswarm(
+        self,
+        mdata: MuData,
+        feature_key: str | None = "rna",
+        anno_col: str = "nhood_annotation",
+        alpha: float = 0.1,
+        subset_nhoods: list[str] = None,
+        palette: str | Sequence[str] | dict[str, str] | None = None,
+        return_fig: bool | None = None,
+        save: bool | str | None = None,
+        show: bool | None = None,
+    ) -> Figure | Axes | None:
+        """Plot beeswarm plot of logFC against nhood labels
+        Args:
+            mdata: MuData object
+            anno_col: Column in adata.uns['nhood_adata'].obs to use as annotation. (default: 'nhood_annotation'.)
+            alpha: Significance threshold. (default: 0.1)
+            subset_nhoods: List of nhoods to plot. If None, plot all nhoods.
+            palette: Name of Seaborn color palette for violinplots.
+                     Defaults to pre-defined category colors for violinplots.
+        Examples:
+            >>> import pertpy as pt
+            >>> import scanpy as sc
+            >>> adata = pt.dt.bhattacherjee()
+            >>> milo = pt.tl.Milo()
+            >>> mdata = milo.load(adata)
+            >>> sc.pp.neighbors(mdata["rna"])
+            >>> milo.make_nhoods(mdata["rna"])
+            >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
+            >>> milo.da_nhoods(mdata, design="~label")
+            >>> milo.annotate_nhoods(mdata, anno_col="cell_type")
+            >>> milo.plot_da_beeswarm(mdata)
+        Preview:
+            .. image:: /_static/docstring_previews/milo_da_beeswarm.png
+        """
+        try:
+            nhood_adata = mdata["milo"].T.copy()
+        except KeyError:
+            raise RuntimeError(
+                "mdata should be a MuData object with two slots: feature_key and 'milo'. Run 'milopy.count_nhoods(adata)' first."
+            ) from None
+        try:
+            nhood_adata.obs[anno_col]
+        except KeyError:
+            raise RuntimeError(
+                f"Unable to find {anno_col} in mdata['milo'].var. Run 'milopy.utils.annotate_nhoods(adata, anno_col)' first"
+            ) from None
+        if subset_nhoods is not None:
+            nhood_adata = nhood_adata[nhood_adata.obs[anno_col].isin(subset_nhoods)]
+        try:
+            nhood_adata.obs["logFC"]
+        except KeyError:
+            raise RuntimeError(
+                "Unable to find 'logFC' in mdata.uns['nhood_adata'].obs. Run 'core.da_nhoods(adata)' first."
+            ) from None
+        sorted_annos = (
+            nhood_adata.obs[[anno_col, "logFC"]].groupby(anno_col).median().sort_values("logFC", ascending=True).index
+        )
+        anno_df = nhood_adata.obs[[anno_col, "logFC", "SpatialFDR"]].copy()
+        anno_df["is_signif"] = anno_df["SpatialFDR"] < alpha
+        anno_df = anno_df[anno_df[anno_col] != "nan"]
+        try:
+            obs_col = nhood_adata.uns["annotation_obs"]
+            if palette is None:
+                palette = dict(
+                    zip(
+                        mdata[feature_key].obs[obs_col].cat.categories,
+                        mdata[feature_key].uns[f"{obs_col}_colors"],
+                        strict=False,
+                    )
+                )
+            sns.violinplot(
+                data=anno_df,
+                y=anno_col,
+                x="logFC",
+                order=sorted_annos,
+                inner=None,
+                orient="h",
+                palette=palette,
+                linewidth=0,
+                scale="width",
+            )
+        except BaseException:  # noqa: BLE001
+            sns.violinplot(
+                data=anno_df,
+                y=anno_col,
+                x="logFC",
+                order=sorted_annos,
+                inner=None,
+                orient="h",
+                linewidth=0,
+                scale="width",
+            )
+        sns.stripplot(
+            data=anno_df,
+            y=anno_col,
+            x="logFC",
+            order=sorted_annos,
+            size=2,
+            hue="is_signif",
+            palette=["grey", "black"],
+            orient="h",
+            alpha=0.5,
+        )
+        plt.legend(loc="upper left", title=f"< {int(alpha * 100)}% SpatialFDR", bbox_to_anchor=(1, 1), frameon=False)
+        plt.axvline(x=0, ymin=0, ymax=1, color="black", linestyle="--")
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+            return None
+        if show:
+            plt.show()
+            return None
+        if return_fig:
+            return plt.gcf()
+        if (not show and not save) or (show is None and save is None):
+            return plt.gca()
+        return None
+    def plot_nhood_counts_by_cond(
+        self,
+        mdata: MuData,
+        test_var: str,
+        subset_nhoods: list[str] = None,
+        log_counts: bool = False,
+        return_fig: bool | None = None,
+        save: bool | str | None = None,
+        show: bool | None = None,
+    ) -> Figure | Axes | None:
+        """Plot boxplot of cell numbers vs condition of interest.
+        Args:
+            mdata: MuData object storing cell level and nhood level information
+            test_var: Name of column in adata.obs storing condition of interest (y-axis for boxplot)
+            subset_nhoods: List of obs_names for neighbourhoods to include in plot. If None, plot all nhoods.
+            log_counts: Whether to plot log1p of cell counts.
+        """
+        try:
+            nhood_adata = mdata["milo"].T.copy()
+        except KeyError:
+            raise RuntimeError(
+                "mdata should be a MuData object with two slots: feature_key and 'milo'. Run milopy.count_nhoods(mdata) first"
+            ) from None
+        if subset_nhoods is None:
+            subset_nhoods = nhood_adata.obs_names
+        pl_df = pd.DataFrame(nhood_adata[subset_nhoods].X.A, columns=nhood_adata.var_names).melt(
+            var_name=nhood_adata.uns["sample_col"], value_name="n_cells"
+        )
+        pl_df = pd.merge(pl_df, nhood_adata.var)
+        pl_df["log_n_cells"] = np.log1p(pl_df["n_cells"])
+        if not log_counts:
+            sns.boxplot(data=pl_df, x=test_var, y="n_cells", color="lightblue")
+            sns.stripplot(data=pl_df, x=test_var, y="n_cells", color="black", s=3)
+            plt.ylabel("# cells")
+        else:
+            sns.boxplot(data=pl_df, x=test_var, y="log_n_cells", color="lightblue")
+            sns.stripplot(data=pl_df, x=test_var, y="log_n_cells", color="black", s=3)
+            plt.ylabel("log(# cells + 1)")
+        plt.xticks(rotation=90)
+        plt.xlabel(test_var)
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+            return None
+        if show:
+            plt.show()
+            return None
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return plt.gca()
+        return None

pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl