PyPI - pertpy - Versions diffs - 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

pertpy 0.9.4py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

pertpy/__init__.py +1 -1
pertpy/_doc.py +19 -0
pertpy/data/_datasets.py +1 -1
pertpy/metadata/_cell_line.py +18 -8
pertpy/metadata/_compound.py +3 -4
pertpy/metadata/_metadata.py +1 -1
pertpy/preprocessing/_guide_rna.py +114 -13
pertpy/preprocessing/_guide_rna_mixture.py +179 -0
pertpy/tools/__init__.py +1 -1
pertpy/tools/_augur.py +64 -86
pertpy/tools/_cinemaot.py +21 -17
pertpy/tools/_coda/_base_coda.py +90 -117
pertpy/tools/_dialogue.py +32 -40
pertpy/tools/_differential_gene_expression/__init__.py +1 -2
pertpy/tools/_differential_gene_expression/_base.py +486 -112
pertpy/tools/_differential_gene_expression/_edger.py +30 -21
pertpy/tools/_differential_gene_expression/_pydeseq2.py +15 -29
pertpy/tools/_differential_gene_expression/_statsmodels.py +0 -11
pertpy/tools/_distances/_distances.py +71 -56
pertpy/tools/_enrichment.py +16 -8
pertpy/tools/_milo.py +54 -50
pertpy/tools/_mixscape.py +307 -208
pertpy/tools/_perturbation_space/_perturbation_space.py +40 -31
pertpy/tools/_perturbation_space/_simple.py +48 -0
pertpy/tools/_scgen/_scgen.py +35 -27
{pertpy-0.9.4.dist-info → pertpy-0.10.0.dist-info}/METADATA +6 -6
{pertpy-0.9.4.dist-info → pertpy-0.10.0.dist-info}/RECORD +29 -28
{pertpy-0.9.4.dist-info → pertpy-0.10.0.dist-info}/WHEEL +1 -1
pertpy/tools/_differential_gene_expression/_formulaic.py +0 -189
{pertpy-0.9.4.dist-info → pertpy-0.10.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_dialogue.py CHANGED Viewed

@@ -25,6 +25,8 @@ from sklearn.linear_model import LinearRegression
 from sparsecca import lp_pmd, multicca_permute, multicca_pmd
 from statsmodels.sandbox.stats.multicomp import multipletests
+from pertpy._doc import _doc_params, doc_common_plot_args
 if TYPE_CHECKING:
     from matplotlib.axes import Axes
     from matplotlib.figure import Figure
@@ -80,27 +82,27 @@ class Dialogue:
         return pseudobulk
-    def _pseudobulk_pca(self, adata: AnnData, groupby: str, n_components: int = 50) -> pd.DataFrame:
-        """Return cell-averaged PCA components.
+    def _pseudobulk_feature_space(
+        self, adata: AnnData, groupby: str, n_components: int = 50, feature_space_key: str = "X_pca"
+    ) -> pd.DataFrame:
+        """Return Cell-averaged components from a passed feature space.
         TODO: consider merging with `get_pseudobulks`
         TODO: DIALOGUE recommends running PCA on each cell type separately before running PMD - this should be implemented as an option here.
         Args:
-            groupby: The key to groupby for pseudobulks
-            n_components: The number of PCA components
+            groupby: The key to groupby for pseudobulks.
+            n_components: The number of components to use.
+            feature_key: The key in adata.obsm for the feature space (e.g., "X_pca", "X_umap").
         Returns:
-            A pseudobulk of PCA components.
+            A pseudobulk DataFrame of the averaged components.
         """
         aggr = {}
         for category in adata.obs.loc[:, groupby].cat.categories:
             temp = adata.obs.loc[:, groupby] == category
-            aggr[category] = adata[temp].obsm["X_pca"][:, :n_components].mean(axis=0)
+            aggr[category] = adata[temp].obsm[feature_space_key][:, :n_components].mean(axis=0)
         aggr = pd.DataFrame(aggr)
         return aggr
     def _scale_data(self, pseudobulks: pd.DataFrame, normalize: bool = True) -> np.ndarray:
@@ -556,7 +558,7 @@ class Dialogue:
         self,
         adata: AnnData,
         ct_order: list[str],
-        agg_pca: bool = True,
+        agg_feature: bool = True,
         normalize: bool = True,
     ) -> tuple[list, dict]:
         """Separates cell into AnnDatas by celltype_key and creates the multifactor PMD input.
@@ -566,14 +568,14 @@ class Dialogue:
         Args:
             adata: AnnData object generate celltype objects for
             ct_order: The order of cell types
-            agg_pca: Whether to aggregate pseudobulks with PCA or not.
+            agg_feature: Whether to aggregate pseudobulks with some embeddings or not.
             normalize: Whether to mimic DIALOGUE behavior or not.
         Returns:
             A celltype_label:array dictionary.
         """
         ct_subs = {ct: adata[adata.obs[self.celltype_key] == ct].copy() for ct in ct_order}
-        fn = self._pseudobulk_pca if agg_pca else self._get_pseudobulks
+        fn = self._pseudobulk_feature_space if agg_feature else self._get_pseudobulks
         ct_aggr = {ct: fn(ad, self.sample_id) for ct, ad in ct_subs.items()}  # type: ignore
         # TODO: implement check (as in https://github.com/livnatje/DIALOGUE/blob/55da9be0a9bf2fcd360d9e11f63e30d041ec4318/R/DIALOGUE.main.R#L114-L119)
@@ -591,7 +593,7 @@ class Dialogue:
         adata: AnnData,
         penalties: list[int] = None,
         ct_order: list[str] = None,
-        agg_pca: bool = True,
+        agg_feature: bool = True,
         solver: Literal["lp", "bs"] = "bs",
         normalize: bool = True,
     ) -> tuple[AnnData, dict[str, np.ndarray], dict[Any, Any], dict[Any, Any]]:
@@ -604,7 +606,7 @@ class Dialogue:
             sample_id: Key to use for pseudobulk determination.
             penalties: PMD penalties.
             ct_order: The order of cell types.
-            agg_pca: Whether to calculate cell-averaged PCA components.
+            agg_features: Whether to calculate cell-averaged principal components.
             solver: Which solver to use for PMD. Must be one of "lp" (linear programming) or "bs" (binary search).
                     For differences between these to please refer to https://github.com/theislab/sparsecca/blob/main/examples/linear_programming_multicca.ipynb
             normalize: Whether to mimic DIALOGUE as close as possible
@@ -629,7 +631,7 @@ class Dialogue:
         else:
             ct_order = cell_types = adata.obs[self.celltype_key].astype("category").cat.categories
-        mcca_in, ct_subs = self._load(adata, ct_order=cell_types, agg_pca=agg_pca, normalize=normalize)
+        mcca_in, ct_subs = self._load(adata, ct_order=cell_types, agg_feature=agg_feature, normalize=normalize)
         n_samples = mcca_in[0].shape[1]
         if penalties is None:
@@ -1059,18 +1061,17 @@ class Dialogue:
         return rank_dfs
+    @_doc_params(common_plot_args=doc_common_plot_args)
     def plot_split_violins(
         self,
         adata: AnnData,
         split_key: str,
         celltype_key: str,
+        *,
         split_which: tuple[str, str] = None,
         mcp: str = "mcp_0",
-        return_fig: bool | None = None,
-        ax: Axes | None = None,
-        save: bool | str | None = None,
-        show: bool | None = None,
-    ) -> Axes | Figure | None:
+        return_fig: bool = False,
+    ) -> Figure | None:
         """Plots split violin plots for a given MCP and split variable.
         Any cells with a value for split_key not in split_which are removed from the plot.
@@ -1081,9 +1082,10 @@ class Dialogue:
             celltype_key: Key for cell type annotations.
             split_which: Which values of split_key to plot. Required if more than 2 values in split_key.
             mcp: Key for MCP data.
+            {common_plot_args}
         Returns:
-            A :class:`~matplotlib.axes.Axes` object
+            If `return_fig` is `True`, returns the figure, otherwise `None`.
         Examples:
             >>> import pertpy as pt
@@ -1105,30 +1107,24 @@ class Dialogue:
         df[split_key] = df[split_key].cat.remove_unused_categories()
         ax = sns.violinplot(data=df, x=celltype_key, y=mcp, hue=split_key, split=True)
         ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
-        if save:
-            plt.savefig(save, bbox_inches="tight")
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
-        if not (show or save):
-            return ax
+        plt.show()
         return None
+    @_doc_params(common_plot_args=doc_common_plot_args)
     def plot_pairplot(
         self,
         adata: AnnData,
         celltype_key: str,
         color: str,
         sample_id: str,
+        *,
         mcp: str = "mcp_0",
-        return_fig: bool | None = None,
-        show: bool | None = None,
-        save: bool | str | None = None,
-    ) -> PairGrid | Figure | None:
+        return_fig: bool = False,
+    ) -> Figure | None:
         """Generate a pairplot visualization for multi-cell perturbation (MCP) data.
         Computes the mean of a specified MCP feature (mcp) for each combination of sample and cell type,
@@ -1140,9 +1136,10 @@ class Dialogue:
             color: Key in `adata.obs` for color annotations. This parameter is used as the hue
             sample_id: Key in `adata.obs` for the sample annotations.
             mcp: Key in `adata.obs` for MCP feature values.
+            {common_plot_args}
         Returns:
-            Seaborn Pairgrid object.
+            If `return_fig` is `True`, returns the figure, otherwise `None`.
         Examples:
             >>> import pertpy as pt
@@ -1165,14 +1162,9 @@ class Dialogue:
         aggstats = aggstats.loc[list(mcp_pivot.index), :]
         aggstats[color] = aggstats["top"]
         mcp_pivot = pd.concat([mcp_pivot, aggstats[color]], axis=1)
-        ax = sns.pairplot(mcp_pivot, hue=color, corner=True)
+        sns.pairplot(mcp_pivot, hue=color, corner=True)
-        if save:
-            plt.savefig(save, bbox_inches="tight")
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
-        if not (show or save):
-            return ax
+        plt.show()
         return None

pertpy/tools/_differential_gene_expression/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from ._base import ContrastType, LinearModelBase, MethodBase
+from ._base import LinearModelBase, MethodBase
 from ._dge_comparison import DGEEVAL
 from ._edger import EdgeR
 from ._pydeseq2 import PyDESeq2
@@ -14,7 +14,6 @@ __all__ = [
     "SimpleComparisonBase",
     "WilcoxonTest",
     "TTest",
-    "ContrastType",
 ]
 AVAILABLE_METHODS = [Statsmodels, EdgeR, PyDESeq2, WilcoxonTest, TTest]

pertpy 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl

pertpy 0.9.4py3-none-any.whl → 0.10.0py3-none-any.whl