PyPI - pertpy - Versions diffs - 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

pertpy 0.9.4py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

pertpy/__init__.py +1 -1
pertpy/_doc.py +19 -0
pertpy/data/_datasets.py +1 -1
pertpy/metadata/_cell_line.py +18 -8
pertpy/metadata/_compound.py +3 -4
pertpy/metadata/_metadata.py +1 -1
pertpy/preprocessing/_guide_rna.py +114 -13
pertpy/preprocessing/_guide_rna_mixture.py +179 -0
pertpy/tools/__init__.py +1 -1
pertpy/tools/_augur.py +64 -86
pertpy/tools/_cinemaot.py +21 -17
pertpy/tools/_coda/_base_coda.py +90 -117
pertpy/tools/_dialogue.py +32 -40
pertpy/tools/_differential_gene_expression/__init__.py +1 -2
pertpy/tools/_differential_gene_expression/_base.py +486 -112
pertpy/tools/_differential_gene_expression/_edger.py +30 -21
pertpy/tools/_differential_gene_expression/_pydeseq2.py +15 -29
pertpy/tools/_differential_gene_expression/_statsmodels.py +0 -11
pertpy/tools/_distances/_distances.py +71 -56
pertpy/tools/_enrichment.py +16 -8
pertpy/tools/_milo.py +54 -50
pertpy/tools/_mixscape.py +307 -208
pertpy/tools/_perturbation_space/_perturbation_space.py +40 -31
pertpy/tools/_perturbation_space/_simple.py +48 -0
pertpy/tools/_scgen/_scgen.py +35 -27
{pertpy-0.9.4.dist-info → pertpy-0.10.0.dist-info}/METADATA +6 -6
{pertpy-0.9.4.dist-info → pertpy-0.10.0.dist-info}/RECORD +29 -28
{pertpy-0.9.4.dist-info → pertpy-0.10.0.dist-info}/WHEEL +1 -1
pertpy/tools/_differential_gene_expression/_formulaic.py +0 -189
{pertpy-0.9.4.dist-info → pertpy-0.10.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_augur.py CHANGED Viewed

@@ -15,7 +15,6 @@ import statsmodels.api as sm
 from anndata import AnnData
 from joblib import Parallel, delayed
 from lamin_utils import logger
-from rich import print
 from rich.progress import track
 from scipy import sparse, stats
 from sklearn.base import is_classifier, is_regressor
@@ -26,17 +25,19 @@ from sklearn.metrics import (
     explained_variance_score,
     f1_score,
     make_scorer,
-    mean_squared_error,
     precision_score,
     r2_score,
     recall_score,
     roc_auc_score,
+    root_mean_squared_error,
 )
 from sklearn.model_selection import StratifiedKFold, cross_validate
 from sklearn.preprocessing import LabelEncoder
 from skmisc.loess import loess
 from statsmodels.stats.multitest import fdrcorrection
+from pertpy._doc import _doc_params, doc_common_plot_args
 if TYPE_CHECKING:
     from matplotlib.axes import Axes
     from matplotlib.figure import Figure
@@ -439,7 +440,7 @@ class Augur:
                 "augur_score": make_scorer(self.ccc_score),
                 "r2": make_scorer(r2_score),
                 "ccc": make_scorer(self.ccc_score),
-                "neg_mean_squared_error": make_scorer(mean_squared_error),
+                "neg_mean_squared_error": make_scorer(root_mean_squared_error),
                 "explained_variance": make_scorer(explained_variance_score),
             }
         )
@@ -684,7 +685,7 @@ class Augur:
         span: float = 0.75,
         filter_negative_residuals: bool = False,
         n_threads: int = 4,
-        augur_mode: Literal["permute"] | Literal["default"] | Literal["velocity"] = "default",
+        augur_mode: Literal["default", "permute", "velocity"] = "default",
         select_variance_features: bool = True,
         key_added: str = "augurpy_results",
         random_state: int | None = None,
@@ -907,41 +908,39 @@ class Augur:
             .mean()
         )
-        sampled_permuted_cv_augur1 = []
-        sampled_permuted_cv_augur2 = []
+        rng = np.random.default_rng()
+        sampled_data = []
         # draw mean aucs for permute1 and permute2
         for celltype in permuted_cv_augur1["cell_type"].unique():
             df1 = permuted_cv_augur1[permuted_cv_augur1["cell_type"] == celltype]
             df2 = permuted_cv_augur2[permuted_cv_augur2["cell_type"] == celltype]
-            for permutation_idx in range(n_permutations):
-                # subsample
-                sample1 = df1.sample(n=n_subsamples, random_state=permutation_idx, axis="index")
-                sampled_permuted_cv_augur1.append(
-                    pd.DataFrame(
-                        {
-                            "cell_type": [celltype],
-                            "permutation_idx": [permutation_idx],
-                            "mean": [sample1["augur_score"].mean(axis=0)],
-                            "std": [sample1["augur_score"].std(axis=0)],
-                        }
-                    )
-                )
-                sample2 = df2.sample(n=n_subsamples, random_state=permutation_idx, axis="index")
-                sampled_permuted_cv_augur2.append(
-                    pd.DataFrame(
-                        {
-                            "cell_type": [celltype],
-                            "permutation_idx": [permutation_idx],
-                            "mean": [sample2["augur_score"].mean(axis=0)],
-                            "std": [sample2["augur_score"].std(axis=0)],
-                        }
-                    )
+            indices1 = rng.choice(len(df1), size=(n_permutations, n_subsamples), replace=True)
+            indices2 = rng.choice(len(df2), size=(n_permutations, n_subsamples), replace=True)
+            scores1 = df1["augur_score"].values[indices1]
+            scores2 = df2["augur_score"].values[indices2]
+            means1 = scores1.mean(axis=1)
+            means2 = scores2.mean(axis=1)
+            stds1 = scores1.std(axis=1)
+            stds2 = scores2.std(axis=1)
+            sampled_data.append(
+                pd.DataFrame(
+                    {
+                        "cell_type": np.repeat(celltype, n_permutations),
+                        "permutation_idx": np.arange(n_permutations),
+                        "mean1": means1,
+                        "mean2": means2,
+                        "std1": stds1,
+                        "std2": stds2,
+                    }
                 )
+            )
-        permuted_samples1 = pd.concat(sampled_permuted_cv_augur1)
-        permuted_samples2 = pd.concat(sampled_permuted_cv_augur2)
+        sampled_df = pd.concat(sampled_data)
         # delta between augur scores
         delta = augur_score1.merge(augur_score2, on=["cell_type"], suffixes=("1", "2")).assign(
@@ -949,9 +948,7 @@ class Augur:
         )
         # delta between permutation scores
-        delta_rnd = permuted_samples1.merge(
-            permuted_samples2, on=["cell_type", "permutation_idx"], suffixes=("1", "2")
-        ).assign(delta_rnd=lambda x: x.mean2 - x.mean1)
+        delta_rnd = sampled_df.assign(delta_rnd=lambda x: x.mean2 - x.mean1)
         # number of values where permutations are larger than test statistic
         delta["b"] = (
@@ -966,7 +963,7 @@ class Augur:
         delta["z"] = (
             delta["delta_augur"] - delta_rnd.groupby("cell_type", as_index=False).mean()["delta_rnd"]
         ) / delta_rnd.groupby("cell_type", as_index=False).std()["delta_rnd"]
-        # calculate pvalues
         delta["pval"] = np.minimum(
             2 * (delta["b"] + 1) / (delta["m"] + 1), 2 * (delta["m"] - delta["b"] + 1) / (delta["m"] + 1)
         )
@@ -974,24 +971,25 @@ class Augur:
         return delta
+    @_doc_params(common_plot_args=doc_common_plot_args)
     def plot_dp_scatter(
         self,
         results: pd.DataFrame,
+        *,
         top_n: int = None,
-        return_fig: bool | None = None,
         ax: Axes = None,
-        show: bool | None = None,
-        save: str | bool | None = None,
-    ) -> Axes | Figure | None:
+        return_fig: bool = False,
+    ) -> Figure | None:
         """Plot scatterplot of differential prioritization.
         Args:
             results: Results after running differential prioritization.
             top_n: optionally, the number of top prioritized cell types to label in the plot
             ax: optionally, axes used to draw plot
+            {common_plot_args}
         Returns:
-            Axes of the plot.
+            If `return_fig` is `True`, returns the figure, otherwise `None`.
         Examples:
             >>> import pertpy as pt
@@ -1038,37 +1036,32 @@ class Augur:
         legend1 = ax.legend(*scatter.legend_elements(), loc="center left", title="z-scores", bbox_to_anchor=(1, 0.5))
         ax.add_artist(legend1)
-        if save:
-            plt.savefig(save, bbox_inches="tight")
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
-        if not (show or save):
-            return ax
+        plt.show()
         return None
+    @_doc_params(common_plot_args=doc_common_plot_args)
     def plot_important_features(
         self,
         data: dict[str, Any],
+        *,
         key: str = "augurpy_results",
         top_n: int = 10,
-        return_fig: bool | None = None,
         ax: Axes = None,
-        show: bool | None = None,
-        save: str | bool | None = None,
-    ) -> Axes | None:
+        return_fig: bool = False,
+    ) -> Figure | None:
         """Plot a lollipop plot of the n features with largest feature importances.
         Args:
-            results: results after running `predict()` as dictionary or the AnnData object.
+            data: results after running `predict()` as dictionary or the AnnData object.
             key: Key in the AnnData object of the results
             top_n: n number feature importance values to plot. Default is 10.
             ax: optionally, axes used to draw plot
-            return_figure: if `True` returns figure of the plot, default is `False`
+            {common_plot_args}
         Returns:
-            Axes of the plot.
+            If `return_fig` is `True`, returns the figure, otherwise `None`.
         Examples:
             >>> import pertpy as pt
@@ -1109,35 +1102,30 @@ class Augur:
         plt.ylabel("Gene")
         plt.yticks(y_axes_range, n_features["genes"])
-        if save:
-            plt.savefig(save, bbox_inches="tight")
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
-        if not (show or save):
-            return ax
+        plt.show()
         return None
+    @_doc_params(common_plot_args=doc_common_plot_args)
     def plot_lollipop(
         self,
-        data: dict[str, Any],
+        data: dict[str, Any] | AnnData,
+        *,
         key: str = "augurpy_results",
-        return_fig: bool | None = None,
         ax: Axes = None,
-        show: bool | None = None,
-        save: str | bool | None = None,
-    ) -> Axes | Figure | None:
+        return_fig: bool = False,
+    ) -> Figure | None:
         """Plot a lollipop plot of the mean augur values.
         Args:
-            results: results after running `predict()` as dictionary or the AnnData object.
-            key: Key in the AnnData object of the results
-            ax: optionally, axes used to draw plot
-            return_figure: if `True` returns figure of the plot
+            data: results after running `predict()` as dictionary or the AnnData object.
+            key: .uns key in the results AnnData object.
+            ax: optionally, axes used to draw plot.
+            {common_plot_args}
         Returns:
-            Axes of the plot.
+            If `return_fig` is `True`, returns the figure, otherwise `None`.
         Examples:
             >>> import pertpy as pt
@@ -1175,32 +1163,27 @@ class Augur:
         plt.ylabel("Cell Type")
         plt.yticks(y_axes_range, results["summary_metrics"].sort_values("mean_augur_score", axis=1).columns)
-        if save:
-            plt.savefig(save, bbox_inches="tight")
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
-        if not (show or save):
-            return ax
+        plt.show()
         return None
+    @_doc_params(common_plot_args=doc_common_plot_args)
     def plot_scatterplot(
         self,
         results1: dict[str, Any],
         results2: dict[str, Any],
+        *,
         top_n: int = None,
-        return_fig: bool | None = None,
-        show: bool | None = None,
-        save: str | bool | None = None,
-    ) -> Axes | Figure | None:
+        return_fig: bool = False,
+    ) -> Figure | None:
         """Create scatterplot with two augur results.
         Args:
             results1: results after running `predict()`
             results2: results after running `predict()`
             top_n: optionally, the number of top prioritized cell types to label in the plot
-            return_figure: if `True` returns figure of the plot
+            {common_plot_args}
         Returns:
             Axes of the plot.
@@ -1249,12 +1232,7 @@ class Augur:
         plt.xlabel("Augur scores 1")
         plt.ylabel("Augur scores 2")
-        if save:
-            plt.savefig(save, bbox_inches="tight")
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
-        if not (show or save):
-            return ax
+        plt.show()
         return None

pertpy/tools/_cinemaot.py CHANGED Viewed

@@ -18,9 +18,12 @@ from sklearn.decomposition import FastICA
 from sklearn.linear_model import LinearRegression
 from sklearn.neighbors import NearestNeighbors
+from pertpy._doc import _doc_params, doc_common_plot_args
 if TYPE_CHECKING:
     from anndata import AnnData
     from matplotlib.axes import Axes
+    from matplotlib.pyplot import Figure
     from statsmodels.tools.typing import ArrayLike
@@ -88,7 +91,7 @@ class Cinemaot:
             dim = self.get_dim(adata, use_rep=use_rep)
         transformer = FastICA(n_components=dim, random_state=0, whiten="arbitrary-variance")
-        X_transformed = transformer.fit_transform(adata.obsm[use_rep][:, :dim])
+        X_transformed = np.array(transformer.fit_transform(adata.obsm[use_rep][:, :dim]), dtype=np.float64)
         groupvec = (adata.obs[pert_key] == control * 1).values  # control
         xi = np.zeros(dim)
         j = 0
@@ -97,9 +100,9 @@ class Cinemaot:
             xi[j] = xi_obj.correlation
             j = j + 1
-        cf = X_transformed[:, xi < thres]
-        cf1 = cf[adata.obs[pert_key] == control, :]
-        cf2 = cf[adata.obs[pert_key] != control, :]
+        cf = np.array(X_transformed[:, xi < thres], np.float64)
+        cf1 = np.array(cf[adata.obs[pert_key] == control, :], np.float64)
+        cf2 = np.array(cf[adata.obs[pert_key] != control, :], np.float64)
         if sum(xi < thres) == 1:
             sklearn.metrics.pairwise_distances(cf1.reshape(-1, 1), cf2.reshape(-1, 1))
         elif sum(xi < thres) == 0:
@@ -167,7 +170,7 @@ class Cinemaot:
         else:
             _solver = sinkhorn.Sinkhorn(threshold=eps)
             ot_sink = _solver(ot_prob)
-            ot_matrix = ot_sink.matrix.T
+            ot_matrix = np.array(ot_sink.matrix.T, dtype=np.float64)
             embedding = X_transformed[adata.obs[pert_key] != control, :] - np.matmul(
                 ot_matrix / np.sum(ot_matrix, axis=1)[:, None], X_transformed[adata.obs[pert_key] == control, :]
             )
@@ -639,6 +642,7 @@ class Cinemaot:
         s_effect = (np.linalg.norm(e1, axis=0) + 1e-6) / (np.linalg.norm(e0, axis=0) + 1e-6)
         return c_effect, s_effect
+    @_doc_params(common_plot_args=doc_common_plot_args)
     def plot_vis_matching(
         self,
         adata: AnnData,
@@ -647,16 +651,16 @@ class Cinemaot:
         control: str,
         de_label: str,
         source_label: str,
+        *,
         matching_rep: str = "ot",
         resolution: float = 0.5,
         normalize: str = "col",
         title: str = "CINEMA-OT matching matrix",
         min_val: float = 0.01,
-        show: bool = True,
-        save: str | None = None,
         ax: Axes | None = None,
+        return_fig: bool = False,
         **kwargs,
-    ) -> None:
+    ) -> Figure | None:
         """Visualize the CINEMA-OT matching matrix.
         Args:
@@ -670,11 +674,12 @@ class Cinemaot:
             normalize: normalize the coarse-grained matching matrix by row / column.
             title: the title for the figure.
             min_val: The min value to truncate the matching matrix.
-            show: Show the plot, do not return axis.
-            save: If `True` or a `str`, save the figure. A string is appended to the default filename.
-                Infer the filetype if ending on {`'.pdf'`, `'.png'`, `'.svg'`}.
+            {common_plot_args}
             **kwargs: Other parameters to input for seaborn.heatmap.
+        Returns:
+            If `return_fig` is `True`, returns the figure, otherwise `None`.
         Examples:
             >>> import pertpy as pt
             >>> adata = pt.dt.cinemaot_example()
@@ -710,12 +715,11 @@ class Cinemaot:
         g = sns.heatmap(df, annot=True, ax=ax, **kwargs)
         plt.title(title)
-        _utils.savefig_or_show("matching_heatmap", show=show, save=save)
-        if not show:
-            if ax is not None:
-                return ax
-            else:
-                return g
+        if return_fig:
+            return g
+        plt.show()
+        return None
 class Xi:

pertpy 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl

pertpy 0.9.4py3-none-any.whl → 0.10.0py3-none-any.whl