PyPI - pertpy - Versions diffs - 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

pertpy 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

pertpy/__init__.py +3 -2
pertpy/data/__init__.py +5 -1
pertpy/data/_dataloader.py +2 -4
pertpy/data/_datasets.py +203 -92
pertpy/metadata/__init__.py +4 -0
pertpy/metadata/_cell_line.py +826 -0
pertpy/metadata/_compound.py +129 -0
pertpy/metadata/_drug.py +242 -0
pertpy/metadata/_look_up.py +582 -0
pertpy/metadata/_metadata.py +73 -0
pertpy/metadata/_moa.py +129 -0
pertpy/plot/__init__.py +1 -9
pertpy/plot/_augur.py +53 -116
pertpy/plot/_coda.py +277 -677
pertpy/plot/_guide_rna.py +17 -35
pertpy/plot/_milopy.py +59 -134
pertpy/plot/_mixscape.py +152 -391
pertpy/preprocessing/_guide_rna.py +88 -4
pertpy/tools/__init__.py +8 -13
pertpy/tools/_augur.py +315 -17
pertpy/tools/_cinemaot.py +143 -4
pertpy/tools/_coda/_base_coda.py +1210 -65
pertpy/tools/_coda/_sccoda.py +50 -21
pertpy/tools/_coda/_tasccoda.py +27 -19
pertpy/tools/_dialogue.py +164 -56
pertpy/tools/_differential_gene_expression.py +240 -14
pertpy/tools/_distances/_distance_tests.py +8 -8
pertpy/tools/_distances/_distances.py +184 -34
pertpy/tools/_enrichment.py +465 -0
pertpy/tools/_milo.py +345 -11
pertpy/tools/_mixscape.py +668 -50
pertpy/tools/_perturbation_space/_clustering.py +5 -1
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +526 -0
pertpy/tools/_perturbation_space/_perturbation_space.py +135 -43
pertpy/tools/_perturbation_space/_simple.py +51 -10
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_scgen.py +701 -0
pertpy/tools/_scgen/_utils.py +1 -3
pertpy/tools/decoupler_LICENSE +674 -0
{pertpy-0.6.0.dist-info → pertpy-0.7.0.dist-info}/METADATA +31 -12
pertpy-0.7.0.dist-info/RECORD +53 -0
{pertpy-0.6.0.dist-info → pertpy-0.7.0.dist-info}/WHEEL +1 -1
pertpy/plot/_cinemaot.py +0 -81
pertpy/plot/_dialogue.py +0 -91
pertpy/plot/_scgen.py +0 -337
pertpy/tools/_metadata/__init__.py +0 -0
pertpy/tools/_metadata/_cell_line.py +0 -613
pertpy/tools/_metadata/_look_up.py +0 -342
pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
pertpy/tools/_scgen/_jax_scgen.py +0 -370
pertpy-0.6.0.dist-info/RECORD +0 -50
/pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
{pertpy-0.6.0.dist-info → pertpy-0.7.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_coda/_base_coda.py CHANGED Viewed

@@ -1,17 +1,23 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING
+from pathlib import Path
+from typing import TYPE_CHECKING, Literal, Optional, Union
 import arviz as az
-import ete3 as ete
 import jax.numpy as jnp
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import patsy as pt
+import scanpy as sc
+import seaborn as sns
+from adjustText import adjust_text
 from anndata import AnnData
-from jax import random
-from jax.config import config
+from jax import config, random
+from matplotlib import cm, rcParams
+from matplotlib import image as mpimg
+from matplotlib.colors import ListedColormap
 from mudata import MuData
 from numpyro.infer import HMC, MCMC, NUTS, initialization
 from rich import box, print
@@ -20,10 +26,15 @@ from rich.table import Table
 from scipy.cluster import hierarchy as sp_hierarchy
 if TYPE_CHECKING:
+    from collections.abc import Sequence
     import numpyro as npy
     import toytree as tt
-    from jax._src.prng import PRNGKeyArray
+    from ete3 import Tree
     from jax._src.typing import Array
+    from matplotlib.axes import Axes
+    from matplotlib.colors import Colormap
+    from matplotlib.figure import Figure
 config.update("jax_enable_x64", True)
@@ -179,7 +190,7 @@ class CompositionalModel2(ABC):
         self,
         sample_adata: AnnData,
         kernel: npy.infer.mcmc.MCMCKernel,
-        rng_key: Array | PRNGKeyArray,
+        rng_key: Array,
         copy: bool = False,
         *args,
         **kwargs,
@@ -295,7 +306,7 @@ class CompositionalModel2(ABC):
         if copy:
             sample_adata = sample_adata.copy()
-        rng_key_array = random.PRNGKey(rng_key)
+        rng_key_array = random.key(rng_key)
         sample_adata.uns["scCODA_params"]["mcmc"]["rng_key"] = np.array(rng_key_array)
         # Set up NUTS kernel
@@ -335,7 +346,6 @@ class CompositionalModel2(ABC):
             copy: Return a copy instead of writing to adata. Defaults to False.
         Examples:
-            Example with scCODA:
             >>> import pertpy as pt
             >>> haber_cells = pt.dt.haber_2017_regions()
             >>> sccoda = pt.tl.Sccoda()
@@ -358,10 +368,10 @@ class CompositionalModel2(ABC):
         # Set rng key if needed
         if rng_key is None:
             rng = np.random.default_rng()
-            rng_key = random.PRNGKey(rng.integers(0, 10000))
+            rng_key = random.key(rng.integers(0, 10000))
             sample_adata.uns["scCODA_params"]["mcmc"]["rng_key"] = rng_key
         else:
-            rng_key = random.PRNGKey(rng_key)
+            rng_key = random.key(rng_key)
         # Set up HMC kernel
         sample_adata = self.set_init_mcmc_states(
@@ -423,7 +433,6 @@ class CompositionalModel2(ABC):
                 - Is credible: Boolean indicator whether effect is credible
          Examples:
-            Example with scCODA:
             >>> import pertpy as pt
             >>> haber_cells = pt.dt.haber_2017_regions()
             >>> sccoda = pt.tl.Sccoda()
@@ -433,7 +442,6 @@ class CompositionalModel2(ABC):
             >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
             >>> intercept_df, effect_df = sccoda.summary_prepare(mdata["coda"])
         """
-        # Get model and effect selection types
         select_type = sample_adata.uns["scCODA_params"]["select_type"]
         model_type = sample_adata.uns["scCODA_params"]["model_type"]
@@ -548,7 +556,11 @@ class CompositionalModel2(ABC):
         intercept_df = intercept_df.loc[:, ["final_parameter", hdis[0], hdis[1], "sd", "expected_sample"]].copy()
         intercept_df = intercept_df.rename(
             columns=dict(
-                zip(intercept_df.columns, ["Final Parameter", hdis_new[0], hdis_new[1], "SD", "Expected Sample"])
+                zip(
+                    intercept_df.columns,
+                    ["Final Parameter", hdis_new[0], hdis_new[1], "SD", "Expected Sample"],
+                    strict=False,
+                )
             )
         )
@@ -561,6 +573,7 @@ class CompositionalModel2(ABC):
                     zip(
                         effect_df.columns,
                         ["Effect", "Median", hdis_new[0], hdis_new[1], "SD", "Expected Sample", "log2-fold change"],
+                        strict=False,
                     )
                 )
             )
@@ -581,6 +594,7 @@ class CompositionalModel2(ABC):
                             "Expected Sample",
                             "log2-fold change",
                         ],
+                        strict=False,
                     )
                 )
             )
@@ -594,6 +608,7 @@ class CompositionalModel2(ABC):
                     zip(
                         node_df.columns,
                         ["Final Parameter", "Median", hdis_new[0], hdis_new[1], "SD", "Delta", "Is credible"],
+                        strict=False,
                     )
                 )  # type: ignore
             )  # type: ignore
@@ -781,7 +796,6 @@ class CompositionalModel2(ABC):
             kwargs: Passed to az.summary
         Examples:
-            Example with scCODA:
             >>> import pertpy as pt
             >>> haber_cells = pt.dt.haber_2017_regions()
             >>> sccoda = pt.tl.Sccoda()
@@ -799,7 +813,7 @@ class CompositionalModel2(ABC):
                 raise
         if isinstance(data, AnnData):
             sample_adata = data
-        # Get model and effect selection types
         select_type = sample_adata.uns["scCODA_params"]["select_type"]
         model_type = sample_adata.uns["scCODA_params"]["model_type"]
@@ -926,7 +940,6 @@ class CompositionalModel2(ABC):
             pd.DataFrame: Intercept data frame.
         Examples:
-            Example with scCODA:
             >>> import pertpy as pt
             >>> haber_cells = pt.dt.haber_2017_regions()
             >>> sccoda = pt.tl.Sccoda()
@@ -936,7 +949,6 @@ class CompositionalModel2(ABC):
             >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
             >>> intercepts = sccoda.get_intercept_df(mdata)
         """
         if isinstance(data, MuData):
             try:
                 sample_adata = data[modality_key]
@@ -959,7 +971,6 @@ class CompositionalModel2(ABC):
             pd.DataFrame: Effect data frame.
         Examples:
-            Example with scCODA:
             >>> import pertpy as pt
             >>> haber_cells = pt.dt.haber_2017_regions()
             >>> sccoda = pt.tl.Sccoda()
@@ -969,7 +980,6 @@ class CompositionalModel2(ABC):
             >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
             >>> effects = sccoda.get_effect_df(mdata)
         """
         if isinstance(data, MuData):
             try:
                 sample_adata = data[modality_key]
@@ -1003,9 +1013,8 @@ class CompositionalModel2(ABC):
             pd.DataFrame: Node effect data frame.
         Examples:
-            Example with tascCODA (works only for model of type tree_agg, i.e. a tascCODA model):
             >>> import pertpy as pt
-            >>> adata = pt.dt.smillie()
+            >>> adata = pt.dt.tasccoda_example()
             >>> tasccoda = pt.tl.Tasccoda()
             >>> mdata = tasccoda.load(
             >>>     adata, type="sample_level",
@@ -1113,6 +1122,1136 @@ class CompositionalModel2(ABC):
         return out
+    def _stackbar(  # pragma: no cover
+        self,
+        y: np.ndarray,
+        type_names: list[str],
+        title: str,
+        level_names: list[str],
+        figsize: tuple[float, float] | None = None,
+        dpi: int | None = 100,
+        palette: ListedColormap | None = cm.tab20,
+        show_legend: bool | None = True,
+    ) -> plt.Axes:
+        """Plots a stacked barplot for one (discrete) covariate.
+        Typical use (only inside stacked_barplot): plot_one_stackbar(data.X, data.var.index, "xyz", data.obs.index)
+        Args:
+            y: The count data, collapsed onto the level of interest. i.e. a binary covariate has two rows,
+               one for each group, containing the count mean of each cell type
+            type_names: The names of all cell types
+            title: Plot title, usually the covariate's name
+            level_names: Names of the covariate's levels
+            figsize: Figure size. Defaults to None.
+            dpi: Dpi setting. Defaults to 100.
+            palette: The color map for the barplot. Defaults to cm.tab20.
+            show_legend: If True, adds a legend. Defaults to True.
+        Returns:
+            A :class:`~matplotlib.axes.Axes` object
+        """
+        n_bars, n_types = y.shape
+        figsize = rcParams["figure.figsize"] if figsize is None else figsize
+        _, ax = plt.subplots(figsize=figsize, dpi=dpi)
+        r = np.array(range(n_bars))
+        sample_sums = np.sum(y, axis=1)
+        barwidth = 0.85
+        cum_bars = np.zeros(n_bars)
+        for n in range(n_types):
+            bars = [i / j * 100 for i, j in zip([y[k][n] for k in range(n_bars)], sample_sums, strict=False)]
+            plt.bar(
+                r,
+                bars,
+                bottom=cum_bars,
+                color=palette(n % palette.N),
+                width=barwidth,
+                label=type_names[n],
+                linewidth=0,
+            )
+            cum_bars += bars
+        ax.set_title(title)
+        if show_legend:
+            ax.legend(loc="upper left", bbox_to_anchor=(1, 1), ncol=1)
+        ax.set_xticks(r)
+        ax.set_xticklabels(level_names, rotation=45, ha="right")
+        ax.set_ylabel("Proportion")
+        return ax
+    def plot_stacked_barplot(  # pragma: no cover
+        self,
+        data: AnnData | MuData,
+        feature_name: str,
+        modality_key: str = "coda",
+        palette: ListedColormap | None = cm.tab20,
+        show_legend: bool | None = True,
+        level_order: list[str] = None,
+        figsize: tuple[float, float] | None = None,
+        dpi: int | None = 100,
+        return_fig: bool | None = None,
+        ax: plt.Axes | None = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+        **kwargs,
+    ) -> plt.Axes | plt.Figure | None:
+        """Plots a stacked barplot for all levels of a covariate or all samples (if feature_name=="samples").
+        Args:
+            data: AnnData object or MuData object.
+            feature_name: The name of the covariate to plot. If feature_name=="samples", one bar for every sample will be plotted
+            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
+            figsize: Figure size. Defaults to None.
+            dpi: Dpi setting. Defaults to 100.
+            palette: The matplotlib color map for the barplot. Defaults to cm.tab20.
+            show_legend: If True, adds a legend. Defaults to True.
+            level_order: Custom ordering of bars on the x-axis. Defaults to None.
+        Returns:
+            A :class:`~matplotlib.axes.Axes` object
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells, type="cell_level", generate_sample_level=True, cell_type_identifier="cell_label", \
+                sample_identifier="batch", covariate_obs=["condition"])
+            >>> sccoda.plot_stacked_barplot(mdata, feature_name="samples")
+        Preview:
+            .. image:: /_static/docstring_previews/sccoda_stacked_barplot.png
+        """
+        if isinstance(data, MuData):
+            data = data[modality_key]
+        if isinstance(data, AnnData):
+            data = data
+        ct_names = data.var.index
+        # option to plot one stacked barplot per sample
+        if feature_name == "samples":
+            if level_order:
+                assert set(level_order) == set(data.obs.index), "level order is inconsistent with levels"
+                data = data[level_order]
+            ax = self._stackbar(
+                data.X,
+                type_names=data.var.index,
+                title="samples",
+                level_names=data.obs.index,
+                figsize=figsize,
+                dpi=dpi,
+                palette=palette,
+                show_legend=show_legend,
+            )
+        else:
+            # Order levels
+            if level_order:
+                assert set(level_order) == set(data.obs[feature_name]), "level order is inconsistent with levels"
+                levels = level_order
+            elif hasattr(data.obs[feature_name], "cat"):
+                levels = data.obs[feature_name].cat.categories.to_list()
+            else:
+                levels = pd.unique(data.obs[feature_name])
+            n_levels = len(levels)
+            feature_totals = np.zeros([n_levels, data.X.shape[1]])
+            for level in range(n_levels):
+                l_indices = np.where(data.obs[feature_name] == levels[level])
+                feature_totals[level] = np.sum(data.X[l_indices], axis=0)
+            ax = self._stackbar(
+                feature_totals,
+                type_names=ct_names,
+                title=feature_name,
+                level_names=levels,
+                figsize=figsize,
+                dpi=dpi,
+                palette=palette,
+                show_legend=show_legend,
+            )
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None
+    def plot_effects_barplot(  # pragma: no cover
+        self,
+        data: AnnData | MuData,
+        modality_key: str = "coda",
+        covariates: str | list | None = None,
+        parameter: Literal["log2-fold change", "Final Parameter", "Expected Sample"] = "log2-fold change",
+        plot_facets: bool = True,
+        plot_zero_covariate: bool = True,
+        plot_zero_cell_type: bool = False,
+        palette: str | ListedColormap | None = cm.tab20,
+        level_order: list[str] = None,
+        args_barplot: dict | None = None,
+        figsize: tuple[float, float] | None = None,
+        dpi: int | None = 100,
+        return_fig: bool | None = None,
+        ax: plt.Axes | None = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> plt.Axes | plt.Figure | sns.axisgrid.FacetGrid | None:
+        """Barplot visualization for effects.
+        The effect results for each covariate are shown as a group of barplots, with intra--group separation by cell types.
+        The covariates groups can either be ordered along the x-axis of a single plot (plot_facets=False) or as plot facets (plot_facets=True).
+        Args:
+            data: AnnData object or MuData object.
+            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
+            covariates: The name of the covariates in data.obs to plot. Defaults to None.
+            parameter: The parameter in effect summary to plot. Defaults to "log2-fold change".
+            plot_facets: If False, plot cell types on the x-axis. If True, plot as facets.
+                         Defaults to True.
+            plot_zero_covariate: If True, plot covariate that have all zero effects. If False, do not plot.
+                                 Defaults to True.
+            plot_zero_cell_type: If True, plot cell type that have zero effect. If False, do not plot.
+                                 Defaults to False.
+            figsize: Figure size. Defaults to None.
+            dpi: Figure size. Defaults to 100.
+            palette: The seaborn color map for the barplot. Defaults to cm.tab20.
+            level_order: Custom ordering of bars on the x-axis. Defaults to None.
+            args_barplot: Arguments passed to sns.barplot. Defaults to None.
+        Returns:
+            Depending on `plot_facets`, returns a :class:`~matplotlib.axes.Axes` (`plot_facets = False`)
+            or :class:`~sns.axisgrid.FacetGrid` (`plot_facets = True`) object
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells, type="cell_level", generate_sample_level=True, cell_type_identifier="cell_label", \
+                sample_identifier="batch", covariate_obs=["condition"])
+            >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
+            >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
+            >>> sccoda.plot_effects_barplot(mdata)
+        Preview:
+            .. image:: /_static/docstring_previews/sccoda_effects_barplot.png
+        """
+        if args_barplot is None:
+            args_barplot = {}
+        if isinstance(data, MuData):
+            data = data[modality_key]
+        if isinstance(data, AnnData):
+            data = data
+        # Get covariate names from adata, partition into those with nonzero effects for min. one cell type/no cell types
+        covariate_names = data.uns["scCODA_params"]["covariate_names"]
+        if covariates is not None:
+            if isinstance(covariates, str):
+                covariates = [covariates]
+            partial_covariate_names = [
+                covariate_name
+                for covariate_name in covariate_names
+                if any(covariate in covariate_name for covariate in covariates)
+            ]
+            covariate_names = partial_covariate_names
+        covariate_names_non_zero = [
+            covariate_name
+            for covariate_name in covariate_names
+            if data.varm[f"effect_df_{covariate_name}"][parameter].any()
+        ]
+        covariate_names_zero = list(set(covariate_names) - set(covariate_names_non_zero))
+        if not plot_zero_covariate:
+            covariate_names = covariate_names_non_zero
+        # set up df for plotting
+        plot_df = pd.concat(
+            [data.varm[f"effect_df_{covariate_name}"][parameter] for covariate_name in covariate_names],
+            axis=1,
+        )
+        plot_df.columns = covariate_names
+        plot_df = pd.melt(plot_df, ignore_index=False, var_name="Covariate")
+        plot_df = plot_df.reset_index()
+        if len(covariate_names_zero) != 0:
+            if plot_facets:
+                if plot_zero_covariate and not plot_zero_cell_type:
+                    plot_df = plot_df[plot_df["value"] != 0]
+                    for covariate_name_zero in covariate_names_zero:
+                        new_row = {
+                            "Covariate": covariate_name_zero,
+                            "Cell Type": "zero",
+                            "value": 0,
+                        }
+                        plot_df = pd.concat([plot_df, pd.DataFrame([new_row])], ignore_index=True)
+                    plot_df["covariate_"] = pd.Categorical(plot_df["Covariate"], covariate_names)
+                    plot_df = plot_df.sort_values(["covariate_"])
+        if not plot_zero_cell_type:
+            cell_type_names_zero = [
+                name
+                for name in plot_df["Cell Type"].unique()
+                if (plot_df[plot_df["Cell Type"] == name]["value"] == 0).all()
+            ]
+            plot_df = plot_df[~plot_df["Cell Type"].isin(cell_type_names_zero)]
+        # If plot as facets, create a FacetGrid and map barplot to it.
+        if plot_facets:
+            if isinstance(palette, ListedColormap):
+                palette = np.array([palette(i % palette.N) for i in range(len(plot_df["Cell Type"].unique()))]).tolist()
+            if figsize is not None:
+                height = figsize[0]
+                aspect = np.round(figsize[1] / figsize[0], 2)
+            else:
+                height = 3
+                aspect = 2
+            g = sns.FacetGrid(
+                plot_df,
+                col="Covariate",
+                sharey=True,
+                sharex=False,
+                height=height,
+                aspect=aspect,
+            )
+            g.map(
+                sns.barplot,
+                "Cell Type",
+                "value",
+                palette=palette,
+                order=level_order,
+                **args_barplot,
+            )
+            g.set_xticklabels(rotation=90)
+            g.set(ylabel=parameter)
+            axes = g.axes.flatten()
+            for i, ax in enumerate(axes):
+                ax.set_title(covariate_names[i])
+                if len(ax.get_xticklabels()) < 5:
+                    ax.set_aspect(10 / len(ax.get_xticklabels()))
+                    if len(ax.get_xticklabels()) == 1:
+                        if ax.get_xticklabels()[0]._text == "zero":
+                            ax.set_xticks([])
+            if save:
+                plt.savefig(save, bbox_inches="tight")
+            if show:
+                plt.show()
+            if return_fig:
+                return plt.gcf()
+            if not (show or save):
+                return g
+            return None
+        # If not plot as facets, call barplot to plot cell types on the x-axis.
+        else:
+            _, ax = plt.subplots(figsize=figsize, dpi=dpi)
+            if len(covariate_names) == 1:
+                if isinstance(palette, ListedColormap):
+                    palette = np.array(
+                        [palette(i % palette.N) for i in range(len(plot_df["Cell Type"].unique()))]
+                    ).tolist()
+                sns.barplot(
+                    data=plot_df,
+                    x="Cell Type",
+                    y="value",
+                    hue="x",
+                    palette=palette,
+                    ax=ax,
+                )
+                ax.set_title(covariate_names[0])
+            else:
+                if isinstance(palette, ListedColormap):
+                    palette = np.array([palette(i % palette.N) for i in range(len(covariate_names))]).tolist()
+                sns.barplot(
+                    data=plot_df,
+                    x="Cell Type",
+                    y="value",
+                    hue="Covariate",
+                    palette=palette,
+                    ax=ax,
+                )
+            cell_types = pd.unique(plot_df["Cell Type"])
+            ax.set_xticklabels(cell_types, rotation=90)
+            if save:
+                plt.savefig(save, bbox_inches="tight")
+            if show:
+                plt.show()
+            if return_fig:
+                return plt.gcf()
+            if not (show or save):
+                return ax
+            return None
+    def plot_boxplots(  # pragma: no cover
+        self,
+        data: AnnData | MuData,
+        feature_name: str,
+        modality_key: str = "coda",
+        y_scale: Literal["relative", "log", "log10", "count"] = "relative",
+        plot_facets: bool = False,
+        add_dots: bool = False,
+        cell_types: list | None = None,
+        args_boxplot: dict | None = None,
+        args_swarmplot: dict | None = None,
+        palette: str | None = "Blues",
+        show_legend: bool | None = True,
+        level_order: list[str] = None,
+        figsize: tuple[float, float] | None = None,
+        dpi: int | None = 100,
+        return_fig: bool | None = None,
+        ax: plt.Axes | None = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> plt.Axes | plt.Figure | sns.axisgrid.FacetGrid | None:
+        """Grouped boxplot visualization.
+         The cell counts for each cell type are shown as a group of boxplots
+         with intra--group separation by a covariate from data.obs.
+        Args:
+            data: AnnData object or MuData object
+            feature_name: The name of the feature in data.obs to plot
+            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
+            y_scale: Transformation to of cell counts. Options: "relative" - Relative abundance, "log" - log(count),
+                     "log10" - log10(count), "count" - absolute abundance (cell counts).
+                     Defaults to "relative".
+            plot_facets: If False, plot cell types on the x-axis. If True, plot as facets. Defaults to False.
+            add_dots: If True, overlay a scatterplot with one dot for each data point. Defaults to False.
+            cell_types: Subset of cell types that should be plotted. Defaults to None.
+            args_boxplot: Arguments passed to sns.boxplot. Defaults to {}.
+            args_swarmplot: Arguments passed to sns.swarmplot. Defaults to {}.
+            figsize: Figure size. Defaults to None.
+            dpi: Dpi setting. Defaults to 100.
+            palette: The seaborn color map for the barplot. Defaults to "Blues".
+            show_legend: If True, adds a legend. Defaults to True.
+            level_order: Custom ordering of bars on the x-axis. Defaults to None.
+        Returns:
+            Depending on `plot_facets`, returns a :class:`~matplotlib.axes.Axes` (`plot_facets = False`)
+            or :class:`~sns.axisgrid.FacetGrid` (`plot_facets = True`) object
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells, type="cell_level", generate_sample_level=True, cell_type_identifier="cell_label", \
+                sample_identifier="batch", covariate_obs=["condition"])
+            >>> sccoda.plot_boxplots(mdata, feature_name="condition", add_dots=True)
+        Preview:
+            .. image:: /_static/docstring_previews/sccoda_boxplots.png
+        """
+        if args_boxplot is None:
+            args_boxplot = {}
+        if args_swarmplot is None:
+            args_swarmplot = {}
+        if isinstance(data, MuData):
+            data = data[modality_key]
+        if isinstance(data, AnnData):
+            data = data
+        # y scale transformations
+        if y_scale == "relative":
+            sample_sums = np.sum(data.X, axis=1, keepdims=True)
+            X = data.X / sample_sums
+            value_name = "Proportion"
+        # add pseudocount 0.5 if using log scale
+        elif y_scale == "log":
+            X = data.X.copy()
+            X[X == 0] = 0.5
+            X = np.log(X)
+            value_name = "log(count)"
+        elif y_scale == "log10":
+            X = data.X.copy()
+            X[X == 0] = 0.5
+            X = np.log(X)
+            value_name = "log10(count)"
+        elif y_scale == "count":
+            X = data.X
+            value_name = "count"
+        else:
+            raise ValueError("Invalid y_scale transformation")
+        count_df = pd.DataFrame(X, columns=data.var.index, index=data.obs.index).merge(
+            data.obs[feature_name], left_index=True, right_index=True
+        )
+        plot_df = pd.melt(count_df, id_vars=feature_name, var_name="Cell type", value_name=value_name)
+        if cell_types is not None:
+            plot_df = plot_df[plot_df["Cell type"].isin(cell_types)]
+        # Currently disabled because the latest statsannotations does not support the latest seaborn.
+        # We had to drop the dependency.
+        # Get credible effects results from model
+        # if draw_effects:
+        #     if model is not None:
+        #         credible_effects_df = model.credible_effects(data, modality_key).to_frame().reset_index()
+        #     else:
+        #         print("[bold yellow]Specify a tasCODA model to draw effects")
+        #     credible_effects_df[feature_name] = credible_effects_df["Covariate"].str.removeprefix(f"{feature_name}[T.")
+        #     credible_effects_df[feature_name] = credible_effects_df[feature_name].str.removesuffix("]")
+        #     credible_effects_df = credible_effects_df[credible_effects_df["Final Parameter"]]
+        # If plot as facets, create a FacetGrid and map boxplot to it.
+        if plot_facets:
+            if level_order is None:
+                level_order = pd.unique(plot_df[feature_name])
+            K = X.shape[1]
+            if figsize is not None:
+                height = figsize[0]
+                aspect = np.round(figsize[1] / figsize[0], 2)
+            else:
+                height = 3
+                aspect = 2
+            g = sns.FacetGrid(
+                plot_df,
+                col="Cell type",
+                sharey=False,
+                col_wrap=int(np.floor(np.sqrt(K))),
+                height=height,
+                aspect=aspect,
+            )
+            g.map(
+                sns.boxplot,
+                feature_name,
+                value_name,
+                palette=palette,
+                order=level_order,
+                **args_boxplot,
+            )
+            if add_dots:
+                if "hue" in args_swarmplot:
+                    hue = args_swarmplot.pop("hue")
+                else:
+                    hue = None
+                if hue is None:
+                    g.map(
+                        sns.swarmplot,
+                        feature_name,
+                        value_name,
+                        color="black",
+                        order=level_order,
+                        **args_swarmplot,
+                    ).set_titles("{col_name}")
+                else:
+                    g.map(
+                        sns.swarmplot,
+                        feature_name,
+                        value_name,
+                        hue,
+                        order=level_order,
+                        **args_swarmplot,
+                    ).set_titles("{col_name}")
+            if save:
+                plt.savefig(save, bbox_inches="tight")
+            if show:
+                plt.show()
+            if return_fig:
+                return plt.gcf()
+            if not (show or save):
+                return g
+            return None
+        # If not plot as facets, call boxplot to plot cell types on the x-axis.
+        else:
+            if level_order:
+                args_boxplot["hue_order"] = level_order
+                args_swarmplot["hue_order"] = level_order
+            _, ax = plt.subplots(figsize=figsize, dpi=dpi)
+            ax = sns.boxplot(
+                x="Cell type",
+                y=value_name,
+                hue=feature_name,
+                data=plot_df,
+                fliersize=1,
+                palette=palette,
+                ax=ax,
+                **args_boxplot,
+            )
+            # Currently disabled because the latest statsannotations does not support the latest seaborn.
+            # We had to drop the dependency.
+            # if draw_effects:
+            #     pairs = [
+            #         [(row["Cell Type"], row[feature_name]), (row["Cell Type"], "Control")]
+            #         for _, row in credible_effects_df.iterrows()
+            #     ]
+            #     annot = Annotator(ax, pairs, data=plot_df, x="Cell type", y=value_name, hue=feature_name)
+            #     annot.configure(test=None, loc="outside", color="red", line_height=0, verbose=False)
+            #     annot.set_custom_annotations([row[feature_name] for _, row in credible_effects_df.iterrows()])
+            #     annot.annotate()
+            if add_dots:
+                sns.swarmplot(
+                    x="Cell type",
+                    y=value_name,
+                    data=plot_df,
+                    hue=feature_name,
+                    ax=ax,
+                    dodge=True,
+                    palette="dark:black",
+                    **args_swarmplot,
+                )
+            cell_types = pd.unique(plot_df["Cell type"])
+            ax.set_xticklabels(cell_types, rotation=90)
+            if show_legend:
+                handles, labels = ax.get_legend_handles_labels()
+                handout = []
+                labelout = []
+                for h, l in zip(handles, labels, strict=False):
+                    if l not in labelout:
+                        labelout.append(l)
+                        handout.append(h)
+                ax.legend(
+                    handout,
+                    labelout,
+                    loc="upper left",
+                    bbox_to_anchor=(1, 1),
+                    ncol=1,
+                    title=feature_name,
+                )
+            if save:
+                plt.savefig(save, bbox_inches="tight")
+            if show:
+                plt.show()
+            if return_fig:
+                return plt.gcf()
+            if not (show or save):
+                return ax
+            return None
+    def plot_rel_abundance_dispersion_plot(  # pragma: no cover
+        self,
+        data: AnnData | MuData,
+        modality_key: str = "coda",
+        abundant_threshold: float | None = 0.9,
+        default_color: str | None = "Grey",
+        abundant_color: str | None = "Red",
+        label_cell_types: bool = True,
+        figsize: tuple[float, float] | None = None,
+        dpi: int | None = 100,
+        return_fig: bool | None = None,
+        ax: plt.Axes | None = None,
+        show: bool | None = None,
+        save: str | bool | None = None,
+    ) -> plt.Axes | plt.Figure | None:
+        """Plots total variance of relative abundance versus minimum relative abundance of all cell types for determination of a reference cell type.
+        If the count of the cell type is larger than 0 in more than abundant_threshold percent of all samples, the cell type will be marked in a different color.
+        Args:
+            data: AnnData or MuData object.
+            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
+                          Defaults to "coda".
+            abundant_threshold: Presence threshold for abundant cell types. Defaults to 0.9.
+            default_color: Bar color for all non-minimal cell types. Defaults to "Grey".
+            abundant_color: Bar color for cell types with abundant percentage larger than abundant_threshold.
+                            Defaults to "Red".
+            label_cell_types: Label dots with cell type names. Defaults to True.
+            figsize: Figure size. Defaults to None.
+            dpi: Dpi setting. Defaults to 100.
+            ax: A matplotlib axes object. Only works if plotting a single component. Defaults to None.
+        Returns:
+            A :class:`~matplotlib.axes.Axes` object
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells, type="cell_level", generate_sample_level=True, cell_type_identifier="cell_label", \
+                sample_identifier="batch", covariate_obs=["condition"])
+            >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
+            >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
+            >>> sccoda.plot_rel_abundance_dispersion_plot(mdata)
+        Preview:
+            .. image:: /_static/docstring_previews/sccoda_rel_abundance_dispersion_plot.png
+        """
+        if isinstance(data, MuData):
+            data = data[modality_key]
+        if isinstance(data, AnnData):
+            data = data
+        if ax is None:
+            _, ax = plt.subplots(figsize=figsize, dpi=dpi)
+        rel_abun = data.X / np.sum(data.X, axis=1, keepdims=True)
+        percent_zero = np.sum(data.X == 0, axis=0) / data.X.shape[0]
+        nonrare_ct = np.where(percent_zero < 1 - abundant_threshold)[0]
+        # select reference
+        cell_type_disp = np.var(rel_abun, axis=0) / np.mean(rel_abun, axis=0)
+        is_abundant = [x in nonrare_ct for x in range(data.X.shape[1])]
+        # Scatterplot
+        plot_df = pd.DataFrame(
+            {
+                "Total dispersion": cell_type_disp,
+                "Cell type": data.var.index,
+                "Presence": 1 - percent_zero,
+                "Is abundant": is_abundant,
+            }
+        )
+        if len(np.unique(plot_df["Is abundant"])) > 1:
+            palette = [default_color, abundant_color]
+        elif np.unique(plot_df["Is abundant"]) == [False]:
+            palette = [default_color]
+        else:
+            palette = [abundant_color]
+        ax = sns.scatterplot(
+            data=plot_df,
+            x="Presence",
+            y="Total dispersion",
+            hue="Is abundant",
+            palette=palette,
+            ax=ax,
+        )
+        # Text labels for abundant cell types
+        abundant_df = plot_df.loc[plot_df["Is abundant"], :]
+        def label_point(x, y, val, ax):
+            a = pd.concat({"x": x, "y": y, "val": val}, axis=1)
+            texts = [
+                ax.text(
+                    point["x"],
+                    point["y"],
+                    str(point["val"]),
+                )
+                for i, point in a.iterrows()
+            ]
+            adjust_text(texts)
+        if label_cell_types:
+            label_point(
+                abundant_df["Presence"],
+                abundant_df["Total dispersion"],
+                abundant_df["Cell type"],
+                plt.gca(),
+            )
+        ax.legend(loc="upper left", bbox_to_anchor=(1, 1), ncol=1, title="Is abundant")
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None
+    def plot_draw_tree(  # pragma: no cover
+        self,
+        data: AnnData | MuData,
+        modality_key: str = "coda",
+        tree: str = "tree",  # Also type ete3.Tree. Omitted due to import errors
+        tight_text: bool | None = False,
+        show_scale: bool | None = False,
+        units: Literal["px", "mm", "in"] | None = "px",
+        figsize: tuple[float, float] | None = (None, None),
+        dpi: int | None = 100,
+        show: bool | None = True,
+        save: str | bool | None = None,
+    ) -> Tree | None:
+        """Plot a tree using input ete3 tree object.
+        Args:
+            data: AnnData object or MuData object.
+            modality_key: If data is a MuData object, specify which modality to use.
+                          Defaults to "coda".
+            tree: A ete3 tree object or a str to indicate the tree stored in `.uns`.
+                  Defaults to "tree".
+            tight_text: When False, boundaries of the text are approximated according to general font metrics,
+                        producing slightly worse aligned text faces but improving the performance of tree visualization in scenes with a lot of text faces.
+                        Default to False.
+            show_scale: Include the scale legend in the tree image or not.
+                        Defaults to False.
+            show: If True, plot the tree inline. If false, return tree and tree_style objects.
+                  Defaults to True.
+            file_name: Path to the output image file. Valid extensions are .SVG, .PDF, .PNG.
+                       Output image can be saved whether show is True or not.
+                       Defaults to None.
+            units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches. Defaults to "px".
+            figsize: Figure size. Defaults to None.
+            dpi: Dots per inches. Defaults to 100.
+        Returns:
+            Depending on `show`, returns :class:`ete3.TreeNode` and :class:`ete3.TreeStyle` (`show = False`) or plot the tree inline (`show = False`)
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.tasccoda_example()
+            >>> tasccoda = pt.tl.Tasccoda()
+            >>> mdata = tasccoda.load(
+            >>>     adata, type="sample_level",
+            >>>     levels_agg=["Major_l1", "Major_l2", "Major_l3", "Major_l4", "Cluster"],
+            >>>     key_added="lineage", add_level_name=True
+            >>> )
+            >>> mdata = tasccoda.prepare(
+            >>>     mdata, formula="Health", reference_cell_type="automatic", tree_key="lineage", pen_args={"phi": 0}
+            >>> )
+            >>> tasccoda.run_nuts(mdata, num_samples=1000, num_warmup=100, rng_key=42)
+            >>> tasccoda.plot_draw_tree(mdata, tree="lineage")
+        Preview:
+            .. image:: /_static/docstring_previews/tasccoda_draw_tree.png
+        """
+        try:
+            from ete3 import CircleFace, NodeStyle, TextFace, Tree, TreeStyle, faces
+        except ImportError:
+            raise ImportError(
+                "To use tasccoda please install additional dependencies with `pip install pertpy[coda]`"
+            ) from None
+        if isinstance(data, MuData):
+            data = data[modality_key]
+        if isinstance(data, AnnData):
+            data = data
+        if isinstance(tree, str):
+            tree = data.uns[tree]
+        def my_layout(node):
+            text_face = TextFace(node.name, tight_text=tight_text)
+            faces.add_face_to_node(text_face, node, column=0, position="branch-right")
+        tree_style = TreeStyle()
+        tree_style.show_leaf_name = False
+        tree_style.layout_fn = my_layout
+        tree_style.show_scale = show_scale
+        if save is not None:
+            tree.render(save, tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi)  # type: ignore
+        if show:
+            return tree.render("%%inline", tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi)  # type: ignore
+        else:
+            return tree, tree_style
+    def plot_draw_effects(  # pragma: no cover
+        self,
+        data: AnnData | MuData,
+        covariate: str,
+        modality_key: str = "coda",
+        tree: str = "tree",  # Also type ete3.Tree. Omitted due to import errors
+        show_legend: bool | None = None,
+        show_leaf_effects: bool | None = False,
+        tight_text: bool | None = False,
+        show_scale: bool | None = False,
+        units: Literal["px", "mm", "in"] | None = "px",
+        figsize: tuple[float, float] | None = (None, None),
+        dpi: int | None = 100,
+        show: bool | None = True,
+        save: str | None = None,
+    ) -> Tree | None:
+        """Plot a tree with colored circles on the nodes indicating significant effects with bar plots which indicate leave-level significant effects.
+        Args:
+            data: AnnData object or MuData object.
+            covariate: The covariate, whose effects should be plotted.
+            modality_key: If data is a MuData object, specify which modality to use.
+                          Defaults to "coda".
+            tree: A ete3 tree object or a str to indicate the tree stored in `.uns`.
+                  Defaults to "tree".
+            show_legend: If show legend of nodes significant effects or not.
+                         Defaults to False if show_leaf_effects is True.
+            show_leaf_effects: If True, plot bar plots which indicate leave-level significant effects.
+                               Defaults to False.
+            tight_text: When False, boundaries of the text are approximated according to general font metrics,
+                        producing slightly worse aligned text faces but improving the performance of tree visualization in scenes with a lot of text faces.
+                        Defaults to False.
+            show_scale: Include the scale legend in the tree image or not. Defaults to False.
+            show: If True, plot the tree inline. If false, return tree and tree_style objects. Defaults to True.
+            file_name: Path to the output image file. valid extensions are .SVG, .PDF, .PNG. Output image can be saved whether show is True or not.
+                       Defaults to None.
+            units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches. Defaults to "px".
+            figsize: Figure size. Defaults to None.
+            dpi: Dots per inches. Defaults to 100.
+        Returns:
+            Depending on `show`, returns :class:`ete3.TreeNode` and :class:`ete3.TreeStyle` (`show = False`)
+            or  plot the tree inline (`show = False`)
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.tasccoda_example()
+            >>> tasccoda = pt.tl.Tasccoda()
+            >>> mdata = tasccoda.load(
+            >>>     adata, type="sample_level",
+            >>>     levels_agg=["Major_l1", "Major_l2", "Major_l3", "Major_l4", "Cluster"],
+            >>>     key_added="lineage", add_level_name=True
+            >>> )
+            >>> mdata = tasccoda.prepare(
+            >>>     mdata, formula="Health", reference_cell_type="automatic", tree_key="lineage", pen_args={"phi": 0}
+            >>> )
+            >>> tasccoda.run_nuts(mdata, num_samples=1000, num_warmup=100, rng_key=42)
+            >>> tasccoda.plot_draw_effects(mdata, covariate="Health[T.Inflamed]", tree="lineage")
+        Preview:
+            .. image:: /_static/docstring_previews/tasccoda_draw_effects.png
+        """
+        try:
+            from ete3 import CircleFace, NodeStyle, TextFace, Tree, TreeStyle, faces
+        except ImportError:
+            raise ImportError(
+                "To use tasccoda please install additional dependencies as `pip install pertpy[coda]`"
+            ) from None
+        if isinstance(data, MuData):
+            data = data[modality_key]
+        if isinstance(data, AnnData):
+            data = data
+        if show_legend is None:
+            show_legend = not show_leaf_effects
+        elif show_legend:
+            print("Tree leaves and leaf effect bars won't be aligned when legend is shown!")
+        if isinstance(tree, str):
+            tree = data.uns[tree]
+        # Collapse tree singularities
+        tree2 = collapse_singularities_2(tree)
+        node_effs = data.uns["scCODA_params"]["node_df"].loc[(covariate + "_node",),].copy()
+        node_effs.index = node_effs.index.get_level_values("Node")
+        covariates = data.uns["scCODA_params"]["covariate_names"]
+        effect_dfs = [data.varm[f"effect_df_{cov}"] for cov in covariates]
+        eff_df = pd.concat(effect_dfs)
+        eff_df.index = pd.MultiIndex.from_product(
+            (covariates, data.var.index.tolist()),
+            names=["Covariate", "Cell Type"],
+        )
+        leaf_effs = eff_df.loc[(covariate,),].copy()
+        leaf_effs.index = leaf_effs.index.get_level_values("Cell Type")
+        # Add effect values
+        for n in tree2.traverse():
+            nstyle = NodeStyle()
+            nstyle["size"] = 0
+            n.set_style(nstyle)
+            if n.name in node_effs.index:
+                e = node_effs.loc[n.name, "Final Parameter"]
+                n.add_feature("node_effect", e)
+            else:
+                n.add_feature("node_effect", 0)
+            if n.name in leaf_effs.index:
+                e = leaf_effs.loc[n.name, "Effect"]
+                n.add_feature("leaf_effect", e)
+            else:
+                n.add_feature("leaf_effect", 0)
+        # Scale effect values to get nice node sizes
+        eff_max = np.max([np.abs(n.node_effect) for n in tree2.traverse()])
+        leaf_eff_max = np.max([np.abs(n.leaf_effect) for n in tree2.traverse()])
+        def my_layout(node):
+            text_face = TextFace(node.name, tight_text=tight_text)
+            text_face.margin_left = 10
+            faces.add_face_to_node(text_face, node, column=0, aligned=True)
+            # if node.is_leaf():
+            size = (np.abs(node.node_effect) * 10 / eff_max) if node.node_effect != 0 else 0
+            if np.sign(node.node_effect) == 1:
+                color = "blue"
+            elif np.sign(node.node_effect) == -1:
+                color = "red"
+            else:
+                color = "cyan"
+            if size != 0:
+                faces.add_face_to_node(CircleFace(radius=size, color=color), node, column=0)
+        tree_style = TreeStyle()
+        tree_style.show_leaf_name = False
+        tree_style.layout_fn = my_layout
+        tree_style.show_scale = show_scale
+        tree_style.draw_guiding_lines = True
+        tree_style.legend_position = 1
+        if show_legend:
+            tree_style.legend.add_face(TextFace("Effects"), column=0)
+            tree_style.legend.add_face(TextFace("       "), column=1)
+            for i in range(4, 0, -1):
+                tree_style.legend.add_face(
+                    CircleFace(
+                        float(f"{np.abs(eff_max) * 10 * i / (eff_max * 4):.2f}"),
+                        "red",
+                    ),
+                    column=0,
+                )
+                tree_style.legend.add_face(TextFace(f"{-eff_max * i / 4:.2f} "), column=0)
+                tree_style.legend.add_face(
+                    CircleFace(
+                        float(f"{np.abs(eff_max) * 10 * i / (eff_max * 4):.2f}"),
+                        "blue",
+                    ),
+                    column=1,
+                )
+                tree_style.legend.add_face(TextFace(f" {eff_max * i / 4:.2f}"), column=1)
+        if show_leaf_effects:
+            leaf_name = [node.name for node in tree2.traverse("postorder") if node.is_leaf()]
+            leaf_effs = leaf_effs.loc[leaf_name].reset_index()
+            palette = ["blue" if Effect > 0 else "red" for Effect in leaf_effs["Effect"].tolist()]
+            dir_path = Path.cwd()
+            dir_path = Path(dir_path / "tree_effect.png")
+            tree2.render(dir_path, tree_style=tree_style, units="in")
+            _, ax = plt.subplots(1, 2, figsize=(10, 10))
+            sns.barplot(data=leaf_effs, x="Effect", y="Cell Type", palette=palette, ax=ax[1])
+            img = mpimg.imread(dir_path)
+            ax[0].imshow(img)
+            ax[0].get_xaxis().set_visible(False)
+            ax[0].get_yaxis().set_visible(False)
+            ax[0].set_frame_on(False)
+            ax[1].get_yaxis().set_visible(False)
+            ax[1].spines["left"].set_visible(False)
+            ax[1].spines["right"].set_visible(False)
+            ax[1].spines["top"].set_visible(False)
+            plt.xlim(-leaf_eff_max, leaf_eff_max)
+            plt.subplots_adjust(wspace=0)
+            if save is not None:
+                plt.savefig(save)
+        if save is not None and not show_leaf_effects:
+            tree2.render(save, tree_style=tree_style, units=units)
+        if show:
+            if not show_leaf_effects:
+                return tree2.render("%%inline", tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi)
+        else:
+            if not show_leaf_effects:
+                return tree2, tree_style
+        return None
+    def plot_effects_umap(  # pragma: no cover
+        self,
+        mdata: MuData,
+        effect_name: str | list | None,
+        cluster_key: str,
+        modality_key_1: str = "rna",
+        modality_key_2: str = "coda",
+        color_map: Colormap | str | None = None,
+        palette: str | Sequence[str] | None = None,
+        return_fig: bool | None = None,
+        ax: Axes = None,
+        show: bool = None,
+        save: str | bool | None = None,
+        **kwargs,
+    ) -> plt.Axes | plt.Figure | None:
+        """Plot a UMAP visualization colored by effect strength.
+        Effect results in .varm of aggregated sample-level AnnData (default is data['coda']) are assigned to cell-level AnnData
+        (default is data['rna']) depending on the cluster they were assigned to.
+        Args:
+            mudata: MuData object.
+            effect_name: The name of the effect results in .varm of aggregated sample-level AnnData to plot
+            cluster_key: The cluster information in .obs of cell-level AnnData (default is data['rna']).
+                         To assign cell types' effects to original cells.
+            modality_key_1: Key to the cell-level AnnData in the MuData object. Defaults to "rna".
+            modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
+                            Defaults to "coda".
+            show: Whether to display the figure or return axis. Defaults to None.
+            ax: A matplotlib axes object. Only works if plotting a single component.
+                Defaults to None.
+            **kwargs: All other keyword arguments are passed to `scanpy.plot.umap()`
+        Returns:
+            If `show==False` a :class:`~matplotlib.axes.Axes` or a list of it.
+        Examples:
+            >>> import pertpy as pt
+            >>> import scanpy as sc
+            >>> import schist
+            >>> adata = pt.dt.haber_2017_regions()
+            >>> sc.pp.neighbors(adata)
+            >>> schist.inference.nested_model(adata, n_init=100, random_seed=5678)
+            >>> tasccoda_model = pt.tl.Tasccoda()
+            >>> tasccoda_data = tasccoda_model.load(adata, type="cell_level",
+            >>>                 cell_type_identifier="nsbm_level_1",
+            >>>                 sample_identifier="batch", covariate_obs=["condition"],
+            >>>                 levels_orig=["nsbm_level_4", "nsbm_level_3", "nsbm_level_2", "nsbm_level_1"],
+            >>>                 add_level_name=True)
+            >>> tasccoda_model.prepare(
+            >>>     tasccoda_data,
+            >>>     modality_key="coda",
+            >>>     reference_cell_type="18",
+            >>>     formula="condition",
+            >>>     pen_args={"phi": 0, "lambda_1": 3.5},
+            >>>     tree_key="tree"
+            >>> )
+            >>> tasccoda_model.run_nuts(
+            ...     tasccoda_data, modality_key="coda", rng_key=1234, num_samples=10000, num_warmup=1000
+            ... )
+            >>> tasccoda_model.run_nuts(
+            ...     tasccoda_data, modality_key="coda", rng_key=1234, num_samples=10000, num_warmup=1000
+            ... )
+            >>> sc.tl.umap(tasccoda_data["rna"])
+            >>> tasccoda_model.plot_effects_umap(tasccoda_data,
+            >>>                         effect_name=["effect_df_condition[T.Salmonella]",
+            >>>                                      "effect_df_condition[T.Hpoly.Day3]",
+            >>>                                      "effect_df_condition[T.Hpoly.Day10]"],
+            >>>                                       cluster_key="nsbm_level_1",
+            >>>                         )
+        Preview:
+            .. image:: /_static/docstring_previews/tasccoda_effects_umap.png
+        """
+        # TODO: Add effect_name parameter and cluster_key and test the example
+        data_rna = mdata[modality_key_1]
+        data_coda = mdata[modality_key_2]
+        if isinstance(effect_name, str):
+            effect_name = [effect_name]
+        for _, effect in enumerate(effect_name):
+            data_rna.obs[effect] = [data_coda.varm[effect].loc[f"{c}", "Effect"] for c in data_rna.obs[cluster_key]]
+        if kwargs.get("vmin"):
+            vmin = kwargs["vmin"]
+            kwargs.pop("vmin")
+        else:
+            vmin = min(data_rna.obs[effect].min() for _, effect in enumerate(effect_name))
+        if kwargs.get("vmax"):
+            vmax = kwargs["vmax"]
+            kwargs.pop("vmax")
+        else:
+            vmax = max(data_rna.obs[effect].max() for _, effect in enumerate(effect_name))
+        return sc.pl.umap(
+            data_rna,
+            color=effect_name,
+            vmax=vmax,
+            vmin=vmin,
+            palette=palette,
+            color_map=color_map,
+            return_fig=return_fig,
+            ax=ax,
+            show=show,
+            save=save,
+            **kwargs,
+        )
 def get_a(
     tree: tt.tree,
@@ -1242,7 +2381,7 @@ def df2newick(df: pd.DataFrame, levels: list[str], inner_label: bool = True) ->
 def get_a_2(
-    tree: ete.Tree,
+    tree: Tree,
     leaf_order: list[str] = None,
     node_order: list[str] = None,
 ) -> tuple[np.ndarray, int]:
@@ -1263,6 +2402,13 @@ def get_a_2(
         T
             number of nodes in the tree, excluding the root node
     """
+    try:
+        import ete3 as ete
+    except ImportError:
+        raise ImportError(
+            "To use tasccoda please install additional dependencies as `pip install pertpy[coda]`"
+        ) from None
     n_tips = len(tree.get_leaves())
     n_nodes = len(tree.get_descendants())
@@ -1292,7 +2438,7 @@ def get_a_2(
     return A_, n_nodes
-def collapse_singularities_2(tree: ete.Tree) -> ete.Tree:
+def collapse_singularities_2(tree: Tree) -> Tree:
     """Collapses (deletes) nodes in a ete3 tree that are singularities (have only one child).
     Args:
@@ -1327,10 +2473,10 @@ def linkage_to_newick(
     def build_newick(node, newick, parentdist, leaf_names):
         if node.is_leaf():
-            return f"{leaf_names[node.id]}:{(parentdist - node.dist)/2}{newick}"
+            return f"{leaf_names[node.id]}:{(parentdist - node.dist) / 2}{newick}"
         else:
             if len(newick) > 0:
-                newick = f"):{(parentdist - node.dist)/2}{newick}"
+                newick = f"):{(parentdist - node.dist) / 2}{newick}"
             else:
                 newick = ");"
             newick = build_newick(node.get_left(), newick, node.dist, leaf_names)
@@ -1363,14 +2509,15 @@ def import_tree(
     Args:
         data: A tascCODA-compatible data object.
-        modality_1: If `data` is MuData, specifiy the modality name to the original cell level anndata object. Defaults to None.
-        modality_2: If `data` is MuData, specifiy the modality name to the aggregated level anndata object. Defaults to None.
+        modality_1: If `data` is MuData, specify the modality name to the original cell level anndata object. Defaults to None.
+        modality_2: If `data` is MuData, specify the modality name to the aggregated level anndata object. Defaults to None.
         dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object. Defaults to None.
         levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
         levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
-        add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}. Defaults to True.
-        key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2]. Defaults to "tree".
-        copy: Return a copy instead of writing to `data`. Defaults to False.
+        add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
+                        Defaults to True.
+        key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`.
+                   If `data` is MuData, save tree in data[modality_2]. Defaults to "tree".
     Returns:
         Updates data with the following:
@@ -1379,6 +2526,13 @@ def import_tree(
         tree: A ete3 tree object.
     """
+    try:
+        import ete3 as ete
+    except ImportError:
+        raise ImportError(
+            "To use tasccoda please install additional dependencies as `pip install pertpy[coda]`"
+        ) from None
     if isinstance(data, MuData):
         try:
             data_1 = data[modality_1]
@@ -1443,16 +2597,17 @@ def from_scanpy(
     The anndata object needs to have a column in adata.obs that contains the cell type assignment.
     Further, it must contain one column or a set of columns (e.g. subject id, treatment, disease status) that uniquely identify each (statistical) sample.
-    Further covariates (e.g. subject age) can either be specified via addidional column names in adata.obs, a key in adata.uns, or as a separate DataFrame.
+    Further covariates (e.g. subject age) can either be specified via additional column names in adata.obs, a key in adata.uns, or as a separate DataFrame.
-    NOTE: The order of samples in the returned dataset is determined by the first occurence of cells from each sample in `adata`
+    NOTE: The order of samples in the returned dataset is determined by the first occurrence of cells from each sample in `adata`
     Args:
         adata: An anndata object from scanpy
         cell_type_identifier: column name in adata.obs that specifies the cell types
         sample_identifier: column name or list of column names in adata.obs that uniquely identify each sample
         covariate_uns: key for adata.uns, where covariate values are stored
-        covariate_obs: list of column names in adata.obs, where covariate values are stored. Note: If covariate values are not unique for a value of sample_identifier, this covaariate will be skipped.
+        covariate_obs: list of column names in adata.obs, where covariate values are stored.
+                       Note: If covariate values are not unique for a value of sample_identifier, this covariate will be skipped.
         covariate_df: DataFrame with covariates
     Returns:
@@ -1461,50 +2616,40 @@ def from_scanpy(
     if isinstance(sample_identifier, str):
         sample_identifier = [sample_identifier]
-    if covariate_obs:
-        covariate_obs += [i for i in sample_identifier if i not in covariate_obs]
-    else:
-        covariate_obs = sample_identifier  # type: ignore
-    # join sample identifiers
-    if isinstance(sample_identifier, list):
+    if len(sample_identifier) > 1:
         adata.obs["scCODA_sample_id"] = adata.obs[sample_identifier].agg("-".join, axis=1)
         sample_identifier = "scCODA_sample_id"
+    else:
+        sample_identifier = sample_identifier[0]
     # get cell type counts
-    groups = adata.obs.value_counts([sample_identifier, cell_type_identifier])
-    count_data = groups.unstack(level=cell_type_identifier)
-    count_data = count_data.fillna(0)
+    ct_count_data = pd.crosstab(adata.obs[sample_identifier], adata.obs[cell_type_identifier])
+    ct_count_data = ct_count_data.fillna(0)
     # get covariates from different sources
-    covariate_df_ = pd.DataFrame(index=count_data.index)
-    if covariate_df is None and covariate_obs is None and covariate_uns is None:
-        print("No covariate information specified!")
+    covariate_df_ = pd.DataFrame(index=ct_count_data.index)
     if covariate_uns is not None:
-        covariate_df_uns = pd.DataFrame(adata.uns[covariate_uns])
-        covariate_df_ = pd.concat((covariate_df_, covariate_df_uns), axis=1)
+        covariate_df_uns = pd.DataFrame(adata.uns[covariate_uns], index=ct_count_data.index)
+        covariate_df_ = covariate_df_.join(covariate_df_uns, how="left")
-    if covariate_obs is not None:
-        for c in covariate_obs:
-            if any(adata.obs.groupby(sample_identifier).nunique()[c] != 1):
-                print(f"Covariate {c} has non-unique values! Skipping...")
-                covariate_obs.remove(c)
+    if covariate_obs:
+        is_unique = adata.obs.groupby(sample_identifier, observed=True).transform(lambda x: x.nunique() == 1)
+        unique_covariates = is_unique.columns[is_unique.all()].tolist()
-        covariate_df_obs = adata.obs.groupby(sample_identifier).first()[covariate_obs]
-        covariate_df_ = pd.concat((covariate_df_, covariate_df_obs), axis=1)
+        if len(unique_covariates) < len(covariate_obs):
+            skipped = set(covariate_obs) - set(unique_covariates)
+            print(f"[bold yellow]Covariates {skipped} have non-unique values! Skipping...")
+        if unique_covariates:
+            covariate_df_obs = adata.obs.groupby(sample_identifier, observed=True).first()[unique_covariates]
+            covariate_df_ = covariate_df_.join(covariate_df_obs, how="left")
     if covariate_df is not None:
-        if set(covariate_df.index) != set(count_data.index):
-            raise ValueError("anndata sample names and covariate_df index do not have the same elements!")
-        covs_ord = covariate_df.reindex(count_data.index)
-        covariate_df_ = pd.concat((covariate_df_, covs_ord), axis=1)
-    covariate_df_.index = covariate_df_.index.astype(str)
+        if not covariate_df.index.equals(ct_count_data.index):
+            raise ValueError("AnnData sample names and covariate_df index do not have the same elements!")
+        covariate_df_ = covariate_df_.join(covariate_df, how="left")
-    # create var (number of cells for each type as only column)
-    var_dat = count_data.sum(axis=0).rename("n_cells").to_frame()
+    var_dat = ct_count_data.sum(axis=0).rename("n_cells").to_frame()
     var_dat.index = var_dat.index.astype(str)
-    return AnnData(X=count_data.values, var=var_dat, obs=covariate_df_)
+    return AnnData(X=ct_count_data.values, var=var_dat, obs=covariate_df_)

pertpy 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

pertpy 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl