PyPI - pertpy - Versions diffs - 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

pertpy/__init__.py +4 -2
pertpy/data/__init__.py +66 -1
pertpy/data/_dataloader.py +28 -26
pertpy/data/_datasets.py +261 -92
pertpy/metadata/__init__.py +6 -0
pertpy/metadata/_cell_line.py +795 -0
pertpy/metadata/_compound.py +128 -0
pertpy/metadata/_drug.py +238 -0
pertpy/metadata/_look_up.py +569 -0
pertpy/metadata/_metadata.py +70 -0
pertpy/metadata/_moa.py +125 -0
pertpy/plot/__init__.py +0 -13
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +89 -6
pertpy/tools/__init__.py +48 -15
pertpy/tools/_augur.py +329 -32
pertpy/tools/_cinemaot.py +145 -6
pertpy/tools/_coda/_base_coda.py +1237 -116
pertpy/tools/_coda/_sccoda.py +66 -36
pertpy/tools/_coda/_tasccoda.py +46 -39
pertpy/tools/_dialogue.py +180 -77
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +29 -24
pertpy/tools/_distances/_distances.py +584 -98
pertpy/tools/_enrichment.py +460 -0
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +406 -49
pertpy/tools/_mixscape.py +677 -55
pertpy/tools/_perturbation_space/_clustering.py +10 -3
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
pertpy/tools/_perturbation_space/_simple.py +52 -11
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +706 -0
pertpy/tools/_scgen/_utils.py +3 -5
pertpy/tools/decoupler_LICENSE +674 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
pertpy-0.8.0.dist-info/RECORD +57 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -234
pertpy/plot/_cinemaot.py +0 -81
pertpy/plot/_coda.py +0 -1001
pertpy/plot/_dialogue.py +0 -91
pertpy/plot/_guide_rna.py +0 -82
pertpy/plot/_milopy.py +0 -284
pertpy/plot/_mixscape.py +0 -594
pertpy/plot/_scgen.py +0 -337
pertpy/tools/_differential_gene_expression.py +0 -99
pertpy/tools/_metadata/__init__.py +0 -0
pertpy/tools/_metadata/_cell_line.py +0 -613
pertpy/tools/_metadata/_look_up.py +0 -342
pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
pertpy/tools/_scgen/_jax_scgen.py +0 -370
pertpy-0.6.0.dist-info/RECORD +0 -50
/pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_dialogue.py CHANGED Viewed

@@ -2,27 +2,33 @@ from __future__ import annotations
 import itertools
 from collections import defaultdict
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Any, Literal
 import anndata as ad
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import scanpy as sc
-import scipy.sparse as sp
+import seaborn as sns
 import statsmodels.formula.api as smf
 import statsmodels.stats.multitest as ssm
 from anndata import AnnData
+from lamin_utils import logger
 from pandas import DataFrame
-from rich import print
 from rich.console import Group
 from rich.live import Live
 from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
 from scipy import stats
 from scipy.optimize import nnls
+from seaborn import PairGrid
 from sklearn.linear_model import LinearRegression
 from sparsecca import lp_pmd, multicca_permute, multicca_pmd
 from statsmodels.sandbox.stats.multicomp import multipletests
+if TYPE_CHECKING:
+    from matplotlib.axes import Axes
+    from matplotlib.figure import Figure
 class Dialogue:
     """Python implementation of DIALOGUE"""
@@ -53,8 +59,6 @@ class Dialogue:
         Copied from `https://github.com/schillerlab/sc-toolbox/blob/397e80dc5e8fb8017b75f6c3fa634a1e1213d484/sc_toolbox/tools/__init__.py#L458`
-        # TODO: Replace with decoupler's implementation
         Args:
             groupby: The key to groupby for pseudobulks
             strategy: The pseudobulking strategy. One of "median" or "mean"
@@ -62,14 +66,15 @@ class Dialogue:
         Returns:
             A Pandas DataFrame of pseudobulk counts
         """
+        # TODO: Replace with decoupler's implementation
         pseudobulk = {"Genes": adata.var_names.values}
         for category in adata.obs.loc[:, groupby].cat.categories:
             temp = adata.obs.loc[:, groupby] == category
             if strategy == "median":
-                pseudobulk[category] = adata[temp].X.median(axis=0).A1
+                pseudobulk[category] = adata[temp].X.median(axis=0)
             elif strategy == "mean":
-                pseudobulk[category] = adata[temp].X.mean(axis=0).A1
+                pseudobulk[category] = adata[temp].X.mean(axis=0)
         pseudobulk = pd.DataFrame(pseudobulk).set_index("Genes")
@@ -101,8 +106,6 @@ class Dialogue:
     def _scale_data(self, pseudobulks: pd.DataFrame, normalize: bool = True) -> np.ndarray:
         """Row-wise mean center and scale by the standard deviation.
-        TODO: the `scale` function we implemented to match the R `scale` fn should already contain this functionality.
         Args:
             pseudobulks: The pseudobulk PCA components.
             normalize: Whether to mimic DIALOGUE behavior or not.
@@ -110,9 +113,9 @@ class Dialogue:
         Returns:
             The scaled count matrix.
         """
+        # TODO: the `scale` function we implemented to match the R `scale` fn should already contain this functionality.
         # DIALOGUE doesn't scale the data before passing to multicca, unlike what is recommended by sparsecca.
         # However, performing this scaling _does_ increase overall correlation of the end result
-        # WHEN SAMPLE ORDER AND DIALOGUE2+3 PROCESSING IS IGNORED.
         if normalize:
             return pseudobulks.to_numpy()
         else:
@@ -288,7 +291,7 @@ class Dialogue:
             mcp_name: Name of mcp which was used for calculation of column value.
             max_length: Value needed to later decide at what index the threshold value should be extracted from column.
             min_threshold: Minimal threshold to select final scores by if it is smaller than calculated threshold.
-            index: Column index to use eto calculate the significant genes. Defaults to `z_score`.
+            index: Column index to use eto calculate the significant genes.
         Returns:
             According to the values in a df column (default: zscore) the significant up and downregulated gene names
@@ -313,13 +316,13 @@ class Dialogue:
     def _apply_HLM_per_MCP_for_one_pair(
         self,
         mcp_name: str,
-        scores_df: dict,
+        scores_df: pd.DataFrame,
         ct_data: AnnData,
         tme: pd.DataFrame,
         sig: dict,
         n_counts: str,
         formula: str,
-        confounder: str,
+        confounder: str | None,
     ) -> tuple[pd.DataFrame, dict[str, Any]]:
         """Applies hierarchical modeling for a single MCP.
@@ -340,7 +343,7 @@ class Dialogue:
         """
         HLM_result = self._mixed_effects(
             scores=scores_df[[mcp_name]],
-            x_labels=ct_data.obs[[n_counts, confounder]],
+            x_labels=ct_data.obs[[n_counts, confounder]] if confounder else ct_data.obs[[n_counts]],
             tme=tme,
             genes_in_mcp=list(sig[mcp_name]["up"]) + list(sig[mcp_name]["down"]),
             formula=formula,
@@ -367,19 +370,13 @@ class Dialogue:
         return np.array(resid)
     def _iterative_nnls(self, A_orig: np.ndarray, y_orig: np.ndarray, feature_ranks: list[int], n_iter: int = 1000):
-        """Solves non-negative least squares separately for different feature categories.
+        """Solves non-negative least-squares separately for different feature categories.
         Mimics DLG.iterative.nnls.
         Variables are notated according to:
             `argmin|Ax - y|`
-        Args:
-            A_orig:
-            y_orig:
-            feature_ranks:
-            n_iter: Passed to scipy.optimize.nnls. Defaults to 1000.
         Returns:
             Returns the aggregated coefficients from nnls.
         """
@@ -398,7 +395,7 @@ class Dialogue:
         x_final = np.zeros(A_orig.shape[0])
         Ax = np.zeros(A_orig.shape[1])
-        for _, mask in zip(sig_ranks, masks):
+        for _, mask in zip(sig_ranks, masks, strict=False):
             A = A_orig[mask].T
             coef_nnls, _ = nnls(A, y, maxiter=n_iter)
             y = y - A @ coef_nnls  # residuals
@@ -516,8 +513,8 @@ class Dialogue:
             # TODO: probably format the up and down within get_top_elements
             cca_sig: dict[str, Any] = defaultdict(dict)
             for i in range(0, int(len(cca_sig_unformatted) / 2)):
-                cca_sig[f"MCP{i + 1}"]["up"] = cca_sig_unformatted[i * 2]
-                cca_sig[f"MCP{i + 1}"]["down"] = cca_sig_unformatted[i * 2 + 1]
+                cca_sig[f"MCP{i}"]["up"] = cca_sig_unformatted[i * 2]
+                cca_sig[f"MCP{i}"]["down"] = cca_sig_unformatted[i * 2 + 1]
             cca_sig = dict(cca_sig)
             cca_sig_results[ct] = cca_sig
@@ -555,7 +552,7 @@ class Dialogue:
         return cca_sig_results, new_mcp_scores
-    def load(
+    def _load(
         self,
         adata: AnnData,
         ct_order: list[str],
@@ -569,21 +566,11 @@ class Dialogue:
         Args:
             adata: AnnData object generate celltype objects for
             ct_order: The order of cell types
-            agg_pca: Whether to aggregate pseudobulks with PCA or not. Defaults to True.
-            normalize: Whether to mimic DIALOGUE behavior or not. Defaults to True.
+            agg_pca: Whether to aggregate pseudobulks with PCA or not.
+            normalize: Whether to mimic DIALOGUE behavior or not.
         Returns:
             A celltype_label:array dictionary.
-        Examples:
-            >>> import pertpy as pt
-            >>> import scanpy as sc
-            >>> adata = pt.dt.dialogue_example()
-            >>> sc.pp.pca(adata)
-            >>> dl = pt.tl.Dialogue(sample_id = "clinical.status", celltype_key = "cell.subtypes", \
-                n_counts_key = "nCount_RNA", n_mpcs = 3)
-            >>> cell_types = adata.obs[dl.celltype_key].astype("category").cat.categories
-            >>> mcca_in, ct_subs = dl.load(adata, ct_order=cell_types)
         """
         ct_subs = {ct: adata[adata.obs[self.celltype_key] == ct].copy() for ct in ct_order}
         fn = self._pseudobulk_pca if agg_pca else self._get_pseudobulks
@@ -620,7 +607,6 @@ class Dialogue:
             agg_pca: Whether to calculate cell-averaged PCA components.
             solver: Which solver to use for PMD. Must be one of "lp" (linear programming) or "bs" (binary search).
                     For differences between these to please refer to https://github.com/theislab/sparsecca/blob/main/examples/linear_programming_multicca.ipynb
-                    Defaults to 'bs'.
             normalize: Whether to mimic DIALOGUE as close as possible
         Returns:
@@ -631,25 +617,31 @@ class Dialogue:
             >>> import scanpy as sc
             >>> adata = pt.dt.dialogue_example()
             >>> sc.pp.pca(adata)
-            >>> dl = pt.tl.Dialogue(sample_id = "clinical.status", celltype_key = "cell.subtypes", \
-                n_counts_key = "nCount_RNA", n_mpcs = 3)
+            >>> dl = pt.tl.Dialogue(
+            ...     sample_id="clinical.status", celltype_key="cell.subtypes", n_counts_key="nCount_RNA", n_mpcs=3
+            ... )
             >>> adata, mcps, ws, ct_subs = dl.calculate_multifactor_PMD(adata, normalize=True)
         """
-        # IMPORTANT NOTE: the order in which matrices are passed to multicca matters. As such,
-        # it is important here that to obtain the same result as in R, we pass the matrices in
-        # in the same order.
+        # IMPORTANT NOTE: the order in which matrices are passed to multicca matters.
+        # As such, it is important here that to obtain the same result as in R, we pass the matrices in the same order.
         if ct_order is not None:
             cell_types = ct_order
         else:
             ct_order = cell_types = adata.obs[self.celltype_key].astype("category").cat.categories
-        mcca_in, ct_subs = self.load(adata, ct_order=cell_types, agg_pca=agg_pca, normalize=normalize)
+        mcca_in, ct_subs = self._load(adata, ct_order=cell_types, agg_pca=agg_pca, normalize=normalize)
         n_samples = mcca_in[0].shape[1]
         if penalties is None:
-            penalties = multicca_permute(
-                mcca_in, penalties=np.sqrt(n_samples) / 2, nperms=10, niter=50, standardize=True
-            )["bestpenalties"]
+            try:
+                penalties = multicca_permute(
+                    mcca_in, penalties=np.sqrt(n_samples) / 2, nperms=10, niter=50, standardize=True
+                )["bestpenalties"]
+            except ValueError as e:
+                if "matmul: input operand 1 has a mismatch in its core dimension" in str(e):
+                    raise ValueError("Please ensure that every cell type is represented in every sample.") from e
+                else:
+                    raise
         else:
             penalties = penalties
@@ -685,7 +677,7 @@ class Dialogue:
         ct_subs: dict,
         mcp_scores: dict,
         ws_dict: dict,
-        confounder: str,
+        confounder: str | None,
         formula: str = None,
     ):
         """Runs the multilevel modeling step to match genes to MCPs and generate p-values for MCPs.
@@ -700,7 +692,6 @@ class Dialogue:
             A Pandas DataFrame containing:
             - for each mcp: HLM_result_1, HLM_result_2, sig_genes_1, sig_genes_2
             - merged HLM_result_1, HLM_result_2, sig_genes_1, sig_genes_2 of all mcps
-            TODO: Describe both returns
         Examples:
             >>> import pertpy as pt
@@ -713,7 +704,9 @@ class Dialogue:
             >>> all_results, new_mcps = dl.multilevel_modeling(ct_subs=ct_subs, mcp_scores=mcps, ws_dict=ws, \
                 confounder="gender")
         """
-        # all possible pairs of cell types with out pairing same cell type
+        # TODO the returns of the function better
+        # all possible pairs of cell types without pairing same cell type
         cell_types = list(ct_subs.keys())
         pairs = list(itertools.combinations(cell_types, 2))
@@ -721,9 +714,9 @@ class Dialogue:
             formula = f"y ~ x + {self.n_counts_key}"
         # Hierarchical modeling expects DataFrames
-        mcp_cell_types = {f"MCP{i + 1}": cell_types for i in range(self.n_mcps)}
+        mcp_cell_types = {f"MCP{i}": cell_types for i in range(self.n_mcps)}
         mcp_scores_df = {
-            ct: pd.DataFrame(v, index=ct_subs[ct].obs.index, columns=mcp_cell_types.keys())
+            ct: pd.DataFrame(v, index=ct_subs[ct].obs.index, columns=list(mcp_cell_types.keys()))
             for ct, v in mcp_scores.items()
         }
@@ -762,10 +755,10 @@ class Dialogue:
                         mcps.append(mcp)
                 if len(mcps) == 0:
-                    print(f"[bold red]No shared MCPs between {cell_type_1} and {cell_type_2}.")
+                    logger.warning(f"No shared MCPs between {cell_type_1} and {cell_type_2}.")
                     continue
-                print(f"[bold blue]{len(mcps)} MCPs identified for {cell_type_1} and {cell_type_2}.")
+                logger.info(f"{len(mcps)} MCPs identified for {cell_type_1} and {cell_type_2}.")
                 new_mcp_scores: dict[Any, list[Any]]
                 cca_sig, new_mcp_scores = self._calculate_cca_sig(
@@ -805,7 +798,7 @@ class Dialogue:
                 for mcp in mcps:
                     mixed_model_progress.update(mm_task, description=f"[bold blue]Determining mixed effects for {mcp}")
-                    # TODO Check that the genes in result{sig_genes_1] are different and if so note that somewhere and explain why
+                    # TODO Check whether the genes in result{sig_genes_1] are different and if so note that somewhere and explain why
                     result = {}
                     result["HLM_result_1"], result["sig_genes_1"] = self._apply_HLM_per_MCP_for_one_pair(
                         mcp_name=mcp,
@@ -875,22 +868,19 @@ class Dialogue:
         sample_label = self.sample_id
         n_mcps = self.n_mcps
-        # create conditions_compare if not supplied
         if conditions_compare is None:
-            conditions_compare = list(adata.obs["path_str"].cat.categories)  # type: ignore
+            conditions_compare = list(adata.obs[condition_label].cat.categories)  # type: ignore
             if len(conditions_compare) != 2:
                 raise ValueError("Please specify conditions to compare or supply an object with only 2 conditions")
-        # create data frames to store results
         pvals = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(0, n_mcps)])
         tstats = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(0, n_mcps)])
         pvals_adj = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(0, n_mcps)])
         response = adata.obs.groupby(sample_label)[condition_label].agg(pd.Series.mode)
         for celltype in adata.obs[celltype_label].unique():
-            # subset data to cell type
             df = adata.obs[adata.obs[celltype_label] == celltype]
-            # run t-test for each MCP
             for mcpnum in ["mcp_" + str(n) for n in range(0, n_mcps)]:
                 mns = df.groupby(sample_label)[mcpnum].mean()
                 mns = pd.concat([mns, response], axis=1)
@@ -900,11 +890,10 @@ class Dialogue:
                 )
                 pvals.loc[celltype, mcpnum] = res[1]
                 tstats.loc[celltype, mcpnum] = res[0]
-                # return(res)
-        # benjamini-hochberg correction for number of cell types (use BH because correlated MCPs)
         for mcpnum in ["mcp_" + str(n) for n in range(0, n_mcps)]:
             pvals_adj[mcpnum] = multipletests(pvals[mcpnum], method="fdr_bh")[1]
         return {"pvals": pvals, "tstats": tstats, "pvals_adj": pvals_adj}
     def get_mlm_mcp_genes(
@@ -921,10 +910,8 @@ class Dialogue:
             celltype: Cell type of interest.
             results: dl.MultilevelModeling result object.
             MCP: MCP key of the result object.
-            threshhold: Number between [0,1]. The fraction of cell types compared against which must have the associated MCP gene.
-                        Defaults to 0.70.
+            threshold: Number between [0,1]. The fraction of cell types compared against which must have the associated MCP gene.
             focal_celltypes: None (compare against all cell types) or a list of other cell types which you want to compare against.
-                             Defaults to None.
         Returns:
             Dict with keys 'up_genes' and 'down_genes' and values of lists of genes
@@ -945,7 +932,6 @@ class Dialogue:
         # REMOVE THIS BLOCK ONCE MLM OUTPUT MATCHES STANDARD
         if MCP.startswith("mcp_"):
             MCP = MCP.replace("mcp_", "MCP")
-            # convert from MCPx to MCPx+1
             MCP = "MCP" + str(int(MCP[3:]) - 1)
         # Extract all comparison keys from the results object
@@ -1004,27 +990,24 @@ class Dialogue:
         Args:
             ct_subs: Dialogue output ct_subs dictionary
             mcp: The name of the marker gene expression column.
-                 Defaults to "mcp_0".
             fraction: Fraction of extreme cells to consider for gene ranking.
                       Should be between 0 and 1.
-                      Defaults to 0.1.
         Returns:
             Dictionary where keys are subpopulation names and values are Anndata
             objects containing the results of gene ranking analysis.
         Examples:
-            ct_subs = {
-            "subpop1": anndata_obj1,
-            "subpop2": anndata_obj2,
-            # ... more subpopulations ...
-            }
-            genes_results = _get_extrema_MCP_genes_single(ct_subs, mcp="mcp_4", fraction=0.2)
+            >>> ct_subs = {
+            ...     "subpop1": anndata_obj1,
+            ...     "subpop2": anndata_obj2,
+            ...     # ... more subpopulations ...
+            ... }
+            >>> genes_results = _get_extrema_MCP_genes_single(ct_subs, mcp="mcp_4", fraction=0.2)
         """
         genes = {}
         for ct in ct_subs.keys():
             mini = ct_subs[ct]
-            mini.obs[mcp]
             mini.obs["extrema"] = pd.qcut(
                 mini.obs[mcp],
                 [0, 0 + fraction, 1 - fraction, 1.0],
@@ -1034,6 +1017,7 @@ class Dialogue:
                 mini, "extrema", groups=["high" + mcp + " " + ct], reference="low " + mcp + " " + ct
             )
             genes[ct] = mini  # .uns['rank_genes_groups']
         return genes
     def get_extrema_MCP_genes(self, ct_subs: dict, fraction: float = 0.1):
@@ -1046,7 +1030,7 @@ class Dialogue:
         Args:
             ct_subs: Dialogue output ct_subs dictionary
             fraction: Fraction of extreme cells to consider for gene ranking.
-                      Should be between 0 and 1. Defaults to 0.1.
+                      Should be between 0 and 1.
         Returns:
             Nested dictionary where keys of the first level are MCPs (of the form "mcp_0" etc)
@@ -1064,7 +1048,7 @@ class Dialogue:
             >>> extrema_mcp_genes = dl.get_extrema_MCP_genes(ct_subs)
         """
         rank_dfs: dict[str, dict[Any, Any]] = {}
-        _, ct_sub = next(iter(ct_subs.items()))
+        ct_sub = next(iter(ct_subs.values()))
         mcps = [col for col in ct_sub.obs.columns if col.startswith("mcp_")]
         for mcp in mcps:
@@ -1072,4 +1056,123 @@ class Dialogue:
             ct_ranked = self._get_extrema_MCP_genes_single(ct_subs, mcp=mcp, fraction=fraction)
             for celltype in ct_ranked.keys():
                 rank_dfs[mcp][celltype] = sc.get.rank_genes_groups_df(ct_ranked[celltype], group=None)
         return rank_dfs
+    def plot_split_violins(
+        self,
+        adata: AnnData,
+        split_key: str,
+        celltype_key: str,
+        split_which: tuple[str, str] = None,
+        mcp: str = "mcp_0",
+        return_fig: bool | None = None,
+        ax: Axes | None = None,
+        save: bool | str | None = None,
+        show: bool | None = None,
+    ) -> Axes | Figure | None:
+        """Plots split violin plots for a given MCP and split variable.
+        Any cells with a value for split_key not in split_which are removed from the plot.
+        Args:
+            adata: Annotated data object.
+            split_key: Variable in adata.obs used to split the data.
+            celltype_key: Key for cell type annotations.
+            split_which: Which values of split_key to plot. Required if more than 2 values in split_key.
+            mcp: Key for MCP data.
+        Returns:
+            A :class:`~matplotlib.axes.Axes` object
+        Examples:
+            >>> import pertpy as pt
+            >>> import scanpy as sc
+            >>> adata = pt.dt.dialogue_example()
+            >>> sc.pp.pca(adata)
+            >>> dl = pt.tl.Dialogue(sample_id = "clinical.status", celltype_key = "cell.subtypes", \
+                n_counts_key = "nCount_RNA", n_mpcs = 3)
+            >>> adata, mcps, ws, ct_subs = dl.calculate_multifactor_PMD(adata, normalize=True)
+            >>> dl.plot_split_violins(adata, split_key='gender', celltype_key='cell.subtypes')
+        Preview:
+            .. image:: /_static/docstring_previews/dialogue_violin.png
+        """
+        df = sc.get.obs_df(adata, [celltype_key, mcp, split_key])
+        if split_which is None:
+            split_which = df[split_key].unique()
+        df = df[df[split_key].isin(split_which)]
+        df[split_key] = df[split_key].cat.remove_unused_categories()
+        ax = sns.violinplot(data=df, x=celltype_key, y=mcp, hue=split_key, split=True)
+        ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None
+    def plot_pairplot(
+        self,
+        adata: AnnData,
+        celltype_key: str,
+        color: str,
+        sample_id: str,
+        mcp: str = "mcp_0",
+        return_fig: bool | None = None,
+        show: bool | None = None,
+        save: bool | str | None = None,
+    ) -> PairGrid | Figure | None:
+        """Generate a pairplot visualization for multi-cell perturbation (MCP) data.
+        Computes the mean of a specified MCP feature (mcp) for each combination of sample and cell type,
+        then creates a pairplot to visualize the relationships between these mean MCP values.
+        Args:
+            adata: Annotated data object.
+            celltype_key: Key in `adata.obs` containing cell type annotations.
+            color: Key in `adata.obs` for color annotations. This parameter is used as the hue
+            sample_id: Key in `adata.obs` for the sample annotations.
+            mcp: Key in `adata.obs` for MCP feature values.
+        Returns:
+            Seaborn Pairgrid object.
+        Examples:
+            >>> import pertpy as pt
+            >>> import scanpy as sc
+            >>> adata = pt.dt.dialogue_example()
+            >>> sc.pp.pca(adata)
+            >>> dl = pt.tl.Dialogue(sample_id = "clinical.status", celltype_key = "cell.subtypes", \
+                n_counts_key = "nCount_RNA", n_mpcs = 3)
+            >>> adata, mcps, ws, ct_subs = dl.calculate_multifactor_PMD(adata, normalize=True)
+            >>> dl.plot_pairplot(adata, celltype_key="cell.subtypes", color="gender", sample_id="clinical.status")
+        Preview:
+            .. image:: /_static/docstring_previews/dialogue_pairplot.png
+        """
+        mean_mcps = adata.obs.groupby([sample_id, celltype_key])[mcp].mean()
+        mean_mcps = mean_mcps.reset_index()
+        mcp_pivot = pd.pivot(mean_mcps[[sample_id, celltype_key, mcp]], index=sample_id, columns=celltype_key)[mcp]
+        aggstats = adata.obs.groupby([sample_id])[color].describe()
+        aggstats = aggstats.loc[list(mcp_pivot.index), :]
+        aggstats[color] = aggstats["top"]
+        mcp_pivot = pd.concat([mcp_pivot, aggstats[color]], axis=1)
+        ax = sns.pairplot(mcp_pivot, hue=color, corner=True)
+        if save:
+            plt.savefig(save, bbox_inches="tight")
+        if show:
+            plt.show()
+        if return_fig:
+            return plt.gcf()
+        if not (show or save):
+            return ax
+        return None

pertpy/tools/_differential_gene_expression/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+from ._base import ContrastType, LinearModelBase, MethodBase
+from ._dge_comparison import DGEEVAL
+from ._edger import EdgeR
+from ._pydeseq2 import PyDESeq2
+from ._simple_tests import SimpleComparisonBase, TTest, WilcoxonTest
+from ._statsmodels import Statsmodels
+__all__ = [
+    "MethodBase",
+    "LinearModelBase",
+    "EdgeR",
+    "PyDESeq2",
+    "Statsmodels",
+    "SimpleComparisonBase",
+    "WilcoxonTest",
+    "TTest",
+    "ContrastType",
+]
+AVAILABLE_METHODS = [Statsmodels, EdgeR, PyDESeq2, WilcoxonTest, TTest]

pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl