PyPI - pertpy - Versions diffs - 0.9.5__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

pertpy 0.9.5py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

pertpy/__init__.py +1 -1
pertpy/_doc.py +1 -2
pertpy/metadata/_cell_line.py +3 -5
pertpy/preprocessing/_guide_rna.py +98 -10
pertpy/preprocessing/_guide_rna_mixture.py +179 -0
pertpy/tools/_augur.py +32 -44
pertpy/tools/_cinemaot.py +1 -3
pertpy/tools/_coda/_base_coda.py +21 -29
pertpy/tools/_dialogue.py +17 -21
pertpy/tools/_differential_gene_expression/_base.py +4 -12
pertpy/tools/_distances/_distances.py +56 -48
pertpy/tools/_enrichment.py +1 -3
pertpy/tools/_milo.py +4 -12
pertpy/tools/_mixscape.py +215 -127
pertpy/tools/_perturbation_space/_simple.py +1 -3
pertpy/tools/_scgen/_scgen.py +1 -3
{pertpy-0.9.5.dist-info → pertpy-0.10.0.dist-info}/METADATA +2 -2
{pertpy-0.9.5.dist-info → pertpy-0.10.0.dist-info}/RECORD +20 -19
{pertpy-0.9.5.dist-info → pertpy-0.10.0.dist-info}/WHEEL +0 -0
{pertpy-0.9.5.dist-info → pertpy-0.10.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_coda/_base_coda.py CHANGED Viewed

@@ -850,7 +850,7 @@ class CompositionalModel2(ABC):
         table = Table(title="Compositional Analysis summary", box=box.SQUARE, expand=True, highlight=True)
         table.add_column("Name", justify="left", style="cyan")
         table.add_column("Value", justify="left")
-        table.add_row("Data", "Data: %d samples, %d cell types" % data_dims)
+        table.add_row("Data", f"Data: {data_dims[0]} samples, {data_dims[1]} cell types")
         table.add_row("Reference cell type", "{}".format(str(sample_adata.uns["scCODA_params"]["reference_cell_type"])))
         table.add_row("Formula", "{}".format(sample_adata.uns["scCODA_params"]["formula"]))
         if extended:
@@ -1199,7 +1199,6 @@ class CompositionalModel2(ABC):
         level_order: list[str] = None,
         figsize: tuple[float, float] | None = None,
         dpi: int | None = 100,
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Plots a stacked barplot for all levels of a covariate or all samples (if feature_name=="samples").
@@ -1278,10 +1277,9 @@ class CompositionalModel2(ABC):
                 show_legend=show_legend,
             )
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1300,7 +1298,6 @@ class CompositionalModel2(ABC):
         args_barplot: dict | None = None,
         figsize: tuple[float, float] | None = None,
         dpi: int | None = 100,
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Barplot visualization for effects.
@@ -1465,10 +1462,11 @@ class CompositionalModel2(ABC):
             cell_types = pd.unique(plot_df["Cell Type"])
             ax.set_xticklabels(cell_types, rotation=90)
-        if show:
-            plt.show()
-        if return_fig:
+        if return_fig and plot_facets:
+            return g
+        if return_fig and not plot_facets:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1489,7 +1487,6 @@ class CompositionalModel2(ABC):
         level_order: list[str] = None,
         figsize: tuple[float, float] | None = None,
         dpi: int | None = 100,
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Grouped boxplot visualization.
@@ -1697,10 +1694,11 @@ class CompositionalModel2(ABC):
                     title=feature_name,
                 )
-        if show:
-            plt.show()
-        if return_fig:
+        if return_fig and plot_facets:
+            return g
+        if return_fig and not plot_facets:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1716,7 +1714,6 @@ class CompositionalModel2(ABC):
         figsize: tuple[float, float] | None = None,
         dpi: int | None = 100,
         ax: plt.Axes | None = None,
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Plots total variance of relative abundance versus minimum relative abundance of all cell types for determination of a reference cell type.
@@ -1820,10 +1817,9 @@ class CompositionalModel2(ABC):
         ax.legend(loc="upper left", bbox_to_anchor=(1, 1), ncol=1, title="Is abundant")
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1839,7 +1835,6 @@ class CompositionalModel2(ABC):
         figsize: tuple[float, float] | None = (None, None),
         dpi: int | None = 100,
         save: str | bool = False,
-        show: bool = True,
         return_fig: bool = False,
     ) -> Tree | None:
         """Plot a tree using input ete3 tree object.
@@ -1903,10 +1898,9 @@ class CompositionalModel2(ABC):
         if save is not None:
             tree.render(save, tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi)  # type: ignore
-        if show:
-            return tree.render("%%inline", tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi)  # type: ignore
         if return_fig:
             return tree, tree_style
+        return tree.render("%%inline", tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi)  # type: ignore
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1925,7 +1919,6 @@ class CompositionalModel2(ABC):
         figsize: tuple[float, float] | None = (None, None),
         dpi: int | None = 100,
         save: str | bool = False,
-        show: bool = True,
         return_fig: bool = False,
     ) -> Tree | None:
         """Plot a tree with colored circles on the nodes indicating significant effects with bar plots which indicate leave-level significant effects.
@@ -2092,15 +2085,16 @@ class CompositionalModel2(ABC):
             if save:
                 plt.savefig(save)
+            if return_fig:
+                return plt.gcf()
-        if save and not show_leaf_effects:
-            tree2.render(save, tree_style=tree_style, units=units)
-        if show:
-            if not show_leaf_effects:
-                return tree2.render("%%inline", tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi)
-        if return_fig:
-            if not show_leaf_effects:
+        else:
+            if save:
+                tree2.render(save, tree_style=tree_style, units=units)
+            if return_fig:
                 return tree2, tree_style
+            return tree2.render("%%inline", tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi)
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -2115,7 +2109,6 @@ class CompositionalModel2(ABC):
         color_map: Colormap | str | None = None,
         palette: str | Sequence[str] | None = None,
         ax: Axes = None,
-        show: bool = True,
         return_fig: bool = False,
         **kwargs,
     ) -> Figure | None:
@@ -2209,10 +2202,9 @@ class CompositionalModel2(ABC):
             **kwargs,
         )
-        if show:
-            plt.show()
         if return_fig:
             return fig
+        plt.show()
         return None

pertpy/tools/_dialogue.py CHANGED Viewed

@@ -82,27 +82,27 @@ class Dialogue:
         return pseudobulk
-    def _pseudobulk_pca(self, adata: AnnData, groupby: str, n_components: int = 50) -> pd.DataFrame:
-        """Return cell-averaged PCA components.
+    def _pseudobulk_feature_space(
+        self, adata: AnnData, groupby: str, n_components: int = 50, feature_space_key: str = "X_pca"
+    ) -> pd.DataFrame:
+        """Return Cell-averaged components from a passed feature space.
         TODO: consider merging with `get_pseudobulks`
         TODO: DIALOGUE recommends running PCA on each cell type separately before running PMD - this should be implemented as an option here.
         Args:
-            groupby: The key to groupby for pseudobulks
-            n_components: The number of PCA components
+            groupby: The key to groupby for pseudobulks.
+            n_components: The number of components to use.
+            feature_key: The key in adata.obsm for the feature space (e.g., "X_pca", "X_umap").
         Returns:
-            A pseudobulk of PCA components.
+            A pseudobulk DataFrame of the averaged components.
         """
         aggr = {}
         for category in adata.obs.loc[:, groupby].cat.categories:
             temp = adata.obs.loc[:, groupby] == category
-            aggr[category] = adata[temp].obsm["X_pca"][:, :n_components].mean(axis=0)
+            aggr[category] = adata[temp].obsm[feature_space_key][:, :n_components].mean(axis=0)
         aggr = pd.DataFrame(aggr)
         return aggr
     def _scale_data(self, pseudobulks: pd.DataFrame, normalize: bool = True) -> np.ndarray:
@@ -558,7 +558,7 @@ class Dialogue:
         self,
         adata: AnnData,
         ct_order: list[str],
-        agg_pca: bool = True,
+        agg_feature: bool = True,
         normalize: bool = True,
     ) -> tuple[list, dict]:
         """Separates cell into AnnDatas by celltype_key and creates the multifactor PMD input.
@@ -568,14 +568,14 @@ class Dialogue:
         Args:
             adata: AnnData object generate celltype objects for
             ct_order: The order of cell types
-            agg_pca: Whether to aggregate pseudobulks with PCA or not.
+            agg_feature: Whether to aggregate pseudobulks with some embeddings or not.
             normalize: Whether to mimic DIALOGUE behavior or not.
         Returns:
             A celltype_label:array dictionary.
         """
         ct_subs = {ct: adata[adata.obs[self.celltype_key] == ct].copy() for ct in ct_order}
-        fn = self._pseudobulk_pca if agg_pca else self._get_pseudobulks
+        fn = self._pseudobulk_feature_space if agg_feature else self._get_pseudobulks
         ct_aggr = {ct: fn(ad, self.sample_id) for ct, ad in ct_subs.items()}  # type: ignore
         # TODO: implement check (as in https://github.com/livnatje/DIALOGUE/blob/55da9be0a9bf2fcd360d9e11f63e30d041ec4318/R/DIALOGUE.main.R#L114-L119)
@@ -593,7 +593,7 @@ class Dialogue:
         adata: AnnData,
         penalties: list[int] = None,
         ct_order: list[str] = None,
-        agg_pca: bool = True,
+        agg_feature: bool = True,
         solver: Literal["lp", "bs"] = "bs",
         normalize: bool = True,
     ) -> tuple[AnnData, dict[str, np.ndarray], dict[Any, Any], dict[Any, Any]]:
@@ -606,7 +606,7 @@ class Dialogue:
             sample_id: Key to use for pseudobulk determination.
             penalties: PMD penalties.
             ct_order: The order of cell types.
-            agg_pca: Whether to calculate cell-averaged PCA components.
+            agg_features: Whether to calculate cell-averaged principal components.
             solver: Which solver to use for PMD. Must be one of "lp" (linear programming) or "bs" (binary search).
                     For differences between these to please refer to https://github.com/theislab/sparsecca/blob/main/examples/linear_programming_multicca.ipynb
             normalize: Whether to mimic DIALOGUE as close as possible
@@ -631,7 +631,7 @@ class Dialogue:
         else:
             ct_order = cell_types = adata.obs[self.celltype_key].astype("category").cat.categories
-        mcca_in, ct_subs = self._load(adata, ct_order=cell_types, agg_pca=agg_pca, normalize=normalize)
+        mcca_in, ct_subs = self._load(adata, ct_order=cell_types, agg_feature=agg_feature, normalize=normalize)
         n_samples = mcca_in[0].shape[1]
         if penalties is None:
@@ -1070,7 +1070,6 @@ class Dialogue:
         *,
         split_which: tuple[str, str] = None,
         mcp: str = "mcp_0",
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Plots split violin plots for a given MCP and split variable.
@@ -1110,10 +1109,9 @@ class Dialogue:
         ax = sns.violinplot(data=df, x=celltype_key, y=mcp, hue=split_key, split=True)
         ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1125,7 +1123,6 @@ class Dialogue:
         sample_id: str,
         *,
         mcp: str = "mcp_0",
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Generate a pairplot visualization for multi-cell perturbation (MCP) data.
@@ -1167,8 +1164,7 @@ class Dialogue:
         mcp_pivot = pd.concat([mcp_pivot, aggstats[color]], axis=1)
         sns.pairplot(mcp_pivot, hue=color, corner=True)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None

pertpy/tools/_differential_gene_expression/_base.py CHANGED Viewed

@@ -125,7 +125,6 @@ class MethodBase(ABC):
         shape_order: list[str] | None = None,
         x_label: str | None = None,
         y_label: str | None = None,
-        show: bool = True,
         return_fig: bool = False,
         **kwargs: int,
     ) -> Figure | None:
@@ -484,10 +483,9 @@ class MethodBase(ABC):
         plt.legend(loc=1, bbox_to_anchor=legend_pos, frameon=False)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -511,7 +509,6 @@ class MethodBase(ABC):
         pvalue_template=lambda x: f"p={x:.2e}",
         boxplot_properties=None,
         palette=None,
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Creates a pairwise expression plot from a Pandas DataFrame or Anndata.
@@ -679,10 +676,9 @@ class MethodBase(ABC):
             )
         plt.tight_layout()
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -696,7 +692,6 @@ class MethodBase(ABC):
         symbol_col: str = "variable",
         y_label: str = "Log2 fold change",
         figsize: tuple[int, int] = (10, 5),
-        show: bool = True,
         return_fig: bool = False,
         **barplot_kwargs,
     ) -> Figure | None:
@@ -762,10 +757,9 @@ class MethodBase(ABC):
         plt.xlabel("")
         plt.ylabel(y_label)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -782,7 +776,6 @@ class MethodBase(ABC):
         figsize: tuple[int, int] = (10, 2),
         x_label: str = "Contrast",
         y_label: str = "Gene",
-        show: bool = True,
         return_fig: bool = False,
         **heatmap_kwargs,
     ) -> Figure | None:
@@ -880,10 +873,9 @@ class MethodBase(ABC):
         plt.xlabel(x_label)
         plt.ylabel(y_label)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None

pertpy/tools/_distances/_distances.py CHANGED Viewed

@@ -1117,67 +1117,75 @@ class MeanVarDistributionDistance(AbstractDistance):
         super().__init__()
         self.accepts_precomputed = False
+    @staticmethod
+    def _mean_var(x, log: bool = False):
+        mean = np.mean(x, axis=0)
+        var = np.var(x, axis=0)
+        positive = mean > 0
+        mean = mean[positive]
+        var = var[positive]
+        if log:
+            mean = np.log(mean)
+            var = np.log(var)
+        return mean, var
+    @staticmethod
+    def _prep_kde_data(x, y):
+        return np.concatenate([x.reshape(-1, 1), y.reshape(-1, 1)], axis=1)
+    @staticmethod
+    def _grid_points(d, n_points=100):
+        # Make grid, add 1 bin on lower/upper end to get final n_points
+        d_min = d.min()
+        d_max = d.max()
+        # Compute bin size
+        d_bin = (d_max - d_min) / (n_points - 2)
+        d_min = d_min - d_bin
+        d_max = d_max + d_bin
+        return np.arange(start=d_min + 0.5 * d_bin, stop=d_max, step=d_bin)
+    @staticmethod
+    def _kde_eval_both(x_kde, y_kde, grid):
+        n_points = len(grid)
+        chunk_size = 10000
+        result_x = np.zeros(n_points)
+        result_y = np.zeros(n_points)
+        # Process same chunks for both KDEs
+        for start in range(0, n_points, chunk_size):
+            end = min(start + chunk_size, n_points)
+            chunk = grid[start:end]
+            result_x[start:end] = x_kde.score_samples(chunk)
+            result_y[start:end] = y_kde.score_samples(chunk)
+        return result_x, result_y
     def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
         """Difference of mean-var distributions in 2 matrices.
         Args:
             X: Normalized and log transformed cells x genes count matrix.
             Y: Normalized and log transformed cells x genes count matrix.
         """
+        mean_x, var_x = self._mean_var(X, log=True)
+        mean_y, var_y = self._mean_var(Y, log=True)
-        def _mean_var(x, log: bool = False):
-            mean = np.mean(x, axis=0)
-            var = np.var(x, axis=0)
-            positive = mean > 0
-            mean = mean[positive]
-            var = var[positive]
-            if log:
-                mean = np.log(mean)
-                var = np.log(var)
-            return mean, var
-        def _prep_kde_data(x, y):
-            return np.concatenate([x.reshape(-1, 1), y.reshape(-1, 1)], axis=1)
-        def _grid_points(d, n_points=100):
-            # Make grid, add 1 bin on lower/upper end to get final n_points
-            d_min = d.min()
-            d_max = d.max()
-            # Compute bin size
-            d_bin = (d_max - d_min) / (n_points - 2)
-            d_min = d_min - d_bin
-            d_max = d_max + d_bin
-            return np.arange(start=d_min + 0.5 * d_bin, stop=d_max, step=d_bin)
-        def _parallel_score_samples(kde, samples, thread_count=int(0.875 * multiprocessing.cpu_count())):
-            # the thread_count is determined using the factor 0.875 as recommended here:
-            # https://stackoverflow.com/questions/32625094/scipy-parallel-computing-in-ipython-notebook
-            with multiprocessing.Pool(thread_count) as p:
-                return np.concatenate(p.map(kde.score_samples, np.array_split(samples, thread_count)))
-        def _kde_eval(d, grid):
-            # Kernel choice: Gaussian is too smoothing and cosine or other kernels that do not stretch out
-            # can not be compared well on regions further away from the data as they are -inf
-            kde = KernelDensity(bandwidth="silverman", kernel="exponential").fit(d)
-            return _parallel_score_samples(kde, grid)
-        mean_x, var_x = _mean_var(X, log=True)
-        mean_y, var_y = _mean_var(Y, log=True)
-        x = _prep_kde_data(mean_x, var_x)
-        y = _prep_kde_data(mean_y, var_y)
+        x = self._prep_kde_data(mean_x, var_x)
+        y = self._prep_kde_data(mean_y, var_y)
         # Gridpoints to eval KDE on
-        mean_grid = _grid_points(np.concatenate([mean_x, mean_y]))
-        var_grid = _grid_points(np.concatenate([var_x, var_y]))
+        mean_grid = self._grid_points(np.concatenate([mean_x, mean_y]))
+        var_grid = self._grid_points(np.concatenate([var_x, var_y]))
         grid = np.array(np.meshgrid(mean_grid, var_grid)).T.reshape(-1, 2)
-        kde_x = _kde_eval(x, grid)
-        kde_y = _kde_eval(y, grid)
+        # Fit both KDEs first
+        x_kde = KernelDensity(bandwidth="silverman", kernel="exponential").fit(x)
+        y_kde = KernelDensity(bandwidth="silverman", kernel="exponential").fit(y)
-        kde_diff = ((kde_x - kde_y) ** 2).mean()
+        # Evaluate both KDEs on same grid chunks
+        kde_x, kde_y = self._kde_eval_both(x_kde, y_kde, grid)
-        return kde_diff
+        return ((np.exp(kde_x) - np.exp(kde_y)) ** 2).mean()
     def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
         raise NotImplementedError("MeanVarDistributionDistance cannot be called on a pairwise distance matrix.")

pertpy/tools/_enrichment.py CHANGED Viewed

@@ -304,7 +304,6 @@ class Enrichment:
         groupby: str = None,
         key: str = "pertpy_enrichment",
         ax: Axes | None = None,
-        show: bool = True,
         return_fig: bool = False,
         **kwargs,
     ) -> DotPlot | None:
@@ -417,10 +416,9 @@ class Enrichment:
             **kwargs,
         )
-        if show:
-            plt.show()
         if return_fig:
             return fig
+        plt.show()
         return None
     def plot_gsea(

pertpy/tools/_milo.py CHANGED Viewed

@@ -727,7 +727,6 @@ class Milo:
         color_map: Colormap | str | None = None,
         palette: str | Sequence[str] | None = None,
         ax: Axes | None = None,
-        show: bool = True,
         return_fig: bool = False,
         **kwargs,
     ) -> Figure | None:
@@ -803,10 +802,9 @@ class Milo:
             **kwargs,
         )
-        if show:
-            plt.show()
         if return_fig:
             return fig
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -820,7 +818,6 @@ class Milo:
         color_map: Colormap | str | None = None,
         palette: str | Sequence[str] | None = None,
         ax: Axes | None = None,
-        show: bool = True,
         return_fig: bool = False,
         **kwargs,
     ) -> Figure | None:
@@ -866,10 +863,9 @@ class Milo:
             **kwargs,
         )
-        if show:
-            plt.show()
         if return_fig:
             return fig
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -882,7 +878,6 @@ class Milo:
         alpha: float = 0.1,
         subset_nhoods: list[str] = None,
         palette: str | Sequence[str] | dict[str, str] | None = None,
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Plot beeswarm plot of logFC against nhood labels
@@ -994,10 +989,9 @@ class Milo:
         plt.legend(loc="upper left", title=f"< {int(alpha * 100)}% SpatialFDR", bbox_to_anchor=(1, 1), frameon=False)
         plt.axvline(x=0, ymin=0, ymax=1, color="black", linestyle="--")
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1008,7 +1002,6 @@ class Milo:
         *,
         subset_nhoods: list[str] = None,
         log_counts: bool = False,
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Plot boxplot of cell numbers vs condition of interest.
@@ -1050,8 +1043,7 @@ class Milo:
         plt.xticks(rotation=90)
         plt.xlabel(test_var)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None

pertpy 0.9.5__py3-none-any.whl → 0.10.0__py3-none-any.whl

pertpy 0.9.5py3-none-any.whl → 0.10.0py3-none-any.whl