PyPI - pertpy - Versions diffs - 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

pertpy/__init__.py +4 -2
pertpy/data/__init__.py +66 -1
pertpy/data/_dataloader.py +28 -26
pertpy/data/_datasets.py +261 -92
pertpy/metadata/__init__.py +6 -0
pertpy/metadata/_cell_line.py +795 -0
pertpy/metadata/_compound.py +128 -0
pertpy/metadata/_drug.py +238 -0
pertpy/metadata/_look_up.py +569 -0
pertpy/metadata/_metadata.py +70 -0
pertpy/metadata/_moa.py +125 -0
pertpy/plot/__init__.py +0 -13
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +89 -6
pertpy/tools/__init__.py +48 -15
pertpy/tools/_augur.py +329 -32
pertpy/tools/_cinemaot.py +145 -6
pertpy/tools/_coda/_base_coda.py +1237 -116
pertpy/tools/_coda/_sccoda.py +66 -36
pertpy/tools/_coda/_tasccoda.py +46 -39
pertpy/tools/_dialogue.py +180 -77
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +29 -24
pertpy/tools/_distances/_distances.py +584 -98
pertpy/tools/_enrichment.py +460 -0
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +406 -49
pertpy/tools/_mixscape.py +677 -55
pertpy/tools/_perturbation_space/_clustering.py +10 -3
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
pertpy/tools/_perturbation_space/_simple.py +52 -11
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +706 -0
pertpy/tools/_scgen/_utils.py +3 -5
pertpy/tools/decoupler_LICENSE +674 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
pertpy-0.8.0.dist-info/RECORD +57 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -234
pertpy/plot/_cinemaot.py +0 -81
pertpy/plot/_coda.py +0 -1001
pertpy/plot/_dialogue.py +0 -91
pertpy/plot/_guide_rna.py +0 -82
pertpy/plot/_milopy.py +0 -284
pertpy/plot/_mixscape.py +0 -594
pertpy/plot/_scgen.py +0 -337
pertpy/tools/_differential_gene_expression.py +0 -99
pertpy/tools/_metadata/__init__.py +0 -0
pertpy/tools/_metadata/_cell_line.py +0 -613
pertpy/tools/_metadata/_look_up.py +0 -342
pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
pertpy/tools/_scgen/_jax_scgen.py +0 -370
pertpy-0.6.0.dist-info/RECORD +0 -50
/pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0

pertpy/plot/_scgen.py DELETED Viewed

@@ -1,337 +0,0 @@
-import numpy as np
-import pandas as pd
-import scanpy as sc
-from adjustText import adjust_text
-from matplotlib import pyplot
-from scipy import stats
-from scvi import REGISTRY_KEYS
-class JaxscgenPlot:
-    """Plotting functions for Jaxscgen."""
-    @staticmethod
-    def reg_mean_plot(
-        adata,
-        condition_key,
-        axis_keys,
-        labels,
-        path_to_save="./reg_mean.pdf",
-        save=True,
-        gene_list=None,
-        show=False,
-        top_100_genes=None,
-        verbose=False,
-        legend=True,
-        title=None,
-        x_coeff=0.30,
-        y_coeff=0.8,
-        fontsize=14,
-        **kwargs,
-    ):
-        """Plots mean matching figure for a set of specific genes.
-        Args:
-            adata:  AnnData object with equivalent structure to initial AnnData. If `None`, defaults to the
-                    AnnData object used to initialize the model. Must have been setup with `batch_key` and `labels_key`,
-                    corresponding to batch and cell type metadata, respectively.
-            condition_key: The key for the condition
-            axis_keys: Dictionary of `adata.obs` keys that are used by the axes of the plot. Has to be in the following form:
-                       `{"x": "Key for x-axis", "y": "Key for y-axis"}`.
-            labels: Dictionary of axes labels of the form `{"x": "x-axis-name", "y": "y-axis name"}`.
-            path_to_save: path to save the plot.
-            save: Specify if the plot should be saved or not.
-            gene_list: list of gene names to be plotted.
-            show: if `True`: will show to the plot after saving it.
-            top_100_genes: List of the top 100 differentially expressed genes. Specify if you want the top 100 DEGs to be assessed extra.
-            verbose: Specify if you want information to be printed while creating the plot, defaults to `False`.
-            legend: if `True`: plots a legend, defaults to `True`.
-            title: Set if you want the plot to display a title.
-            x_coeff: Offset to print the R^2 value in x-direction, defaults to 0.3.
-            y_coeff: Offset to print the R^2 value in y-direction, defaults to 0.8.
-            fontsize: Fontsize used for text in the plot, defaults to 14.
-            **kwargs:
-        Examples:
-            >>> import pertpy at pt
-            >>> data = pt.dt.kang_2018()
-            >>> pt.tl.SCGEN.setup_anndata(data, batch_key="label", labels_key="cell_type")
-            >>> model = pt.tl.SCGEN(data)
-            >>> model.train(max_epochs=10, batch_size=64, early_stopping=True, early_stopping_patience=5)
-            >>> pred, delta = model.predict(ctrl_key='ctrl', stim_key='stim', celltype_to_predict='CD4 T cells')
-            >>> pred.obs['label'] = 'pred'
-            >>> eval_adata = data[data.obs['cell_type'] == 'CD4 T cells'].copy().concatenate(pred)
-            >>> r2_value = pt.pl.scg.reg_mean_plot(eval_adata, condition_key='label', axis_keys={"x": "pred", "y": "stim"}, \
-                labels={"x": "predicted", "y": "ground truth"}, save=False, show=True)
-        """
-        import seaborn as sns
-        sns.set()
-        sns.set(color_codes=True)
-        diff_genes = top_100_genes
-        stim = adata[adata.obs[condition_key] == axis_keys["y"]]
-        ctrl = adata[adata.obs[condition_key] == axis_keys["x"]]
-        if diff_genes is not None:
-            if hasattr(diff_genes, "tolist"):
-                diff_genes = diff_genes.tolist()
-            adata_diff = adata[:, diff_genes]
-            stim_diff = adata_diff[adata_diff.obs[condition_key] == axis_keys["y"]]
-            ctrl_diff = adata_diff[adata_diff.obs[condition_key] == axis_keys["x"]]
-            x_diff = np.asarray(np.mean(ctrl_diff.X, axis=0)).ravel()
-            y_diff = np.asarray(np.mean(stim_diff.X, axis=0)).ravel()
-            m, b, r_value_diff, p_value_diff, std_err_diff = stats.linregress(x_diff, y_diff)
-            if verbose:
-                print("top_100 DEGs mean: ", r_value_diff**2)
-        x = np.asarray(np.mean(ctrl.X, axis=0)).ravel()
-        y = np.asarray(np.mean(stim.X, axis=0)).ravel()
-        m, b, r_value, p_value, std_err = stats.linregress(x, y)
-        if verbose:
-            print("All genes mean: ", r_value**2)
-        df = pd.DataFrame({axis_keys["x"]: x, axis_keys["y"]: y})
-        ax = sns.regplot(x=axis_keys["x"], y=axis_keys["y"], data=df)
-        ax.tick_params(labelsize=fontsize)
-        if "range" in kwargs:
-            start, stop, step = kwargs.get("range")
-            ax.set_xticks(np.arange(start, stop, step))
-            ax.set_yticks(np.arange(start, stop, step))
-        ax.set_xlabel(labels["x"], fontsize=fontsize)
-        ax.set_ylabel(labels["y"], fontsize=fontsize)
-        if gene_list is not None:
-            texts = []
-            for i in gene_list:
-                j = adata.var_names.tolist().index(i)
-                x_bar = x[j]
-                y_bar = y[j]
-                texts.append(pyplot.text(x_bar, y_bar, i, fontsize=11, color="black"))
-                pyplot.plot(x_bar, y_bar, "o", color="red", markersize=5)
-                # if "y1" in axis_keys.keys():
-                # y1_bar = y1[j]
-                # pyplot.text(x_bar, y1_bar, i, fontsize=11, color="black")
-        if gene_list is not None:
-            adjust_text(
-                texts,
-                x=x,
-                y=y,
-                arrowprops={"arrowstyle": "->", "color": "grey", "lw": 0.5},
-                force_points=(0.0, 0.0),
-            )
-        if legend:
-            pyplot.legend(loc="center left", bbox_to_anchor=(1, 0.5))
-        if title is None:
-            pyplot.title("", fontsize=fontsize)
-        else:
-            pyplot.title(title, fontsize=fontsize)
-        ax.text(
-            max(x) - max(x) * x_coeff,
-            max(y) - y_coeff * max(y),
-            r"$\mathrm{R^2_{\mathrm{\mathsf{all\ genes}}}}$= " + f"{r_value ** 2:.2f}",
-            fontsize=kwargs.get("textsize", fontsize),
-        )
-        if diff_genes is not None:
-            ax.text(
-                max(x) - max(x) * x_coeff,
-                max(y) - (y_coeff + 0.15) * max(y),
-                r"$\mathrm{R^2_{\mathrm{\mathsf{top\ 100\ DEGs}}}}$= " + f"{r_value_diff ** 2:.2f}",
-                fontsize=kwargs.get("textsize", fontsize),
-            )
-        if save:
-            pyplot.savefig(f"{path_to_save}", bbox_inches="tight", dpi=100)
-        if show:
-            pyplot.show()
-        pyplot.close()
-        if diff_genes is not None:
-            return r_value**2, r_value_diff**2
-        else:
-            return r_value**2
-    @staticmethod
-    def reg_var_plot(
-        adata,
-        condition_key,
-        axis_keys,
-        labels,
-        path_to_save="./reg_var.pdf",
-        save=True,
-        gene_list=None,
-        top_100_genes=None,
-        show=False,
-        legend=True,
-        title=None,
-        verbose=False,
-        x_coeff=0.30,
-        y_coeff=0.8,
-        fontsize=14,
-        **kwargs,
-    ):
-        """Plots variance matching figure for a set of specific genes.
-        Args:
-            adata: AnnData object with equivalent structure to initial AnnData. If `None`, defaults to the
-                   AnnData object used to initialize the model. Must have been setup with `batch_key` and `labels_key`,
-                   corresponding to batch and cell type metadata, respectively.
-            condition_key: Key of the condition.
-            axis_keys: Dictionary of `adata.obs` keys that are used by the axes of the plot. Has to be in the following form:
-                       `{"x": "Key for x-axis", "y": "Key for y-axis"}`.
-            labels: Dictionary of axes labels of the form `{"x": "x-axis-name", "y": "y-axis name"}`.
-            path_to_save: path to save the plot.
-            save: Specify if the plot should be saved or not.
-            gene_list: list of gene names to be plotted.
-            show: if `True`: will show to the plot after saving it.
-            top_100_genes: List of the top 100 differentially expressed genes. Specify if you want the top 100 DEGs to be assessed extra.
-            legend: if `True`: plots a legend, defaults to `True`.
-            title: Set if you want the plot to display a title.
-            verbose: Specify if you want information to be printed while creating the plot, defaults to `False`.
-            x_coeff: Offset to print the R^2 value in x-direction, defaults to 0.3.
-            y_coeff: Offset to print the R^2 value in y-direction, defaults to 0.8.
-            fontsize: Fontsize used for text in the plot, defaults to 14.
-        """
-        import seaborn as sns
-        sns.set()
-        sns.set(color_codes=True)
-        sc.tl.rank_genes_groups(adata, groupby=condition_key, n_genes=100, method="wilcoxon")
-        diff_genes = top_100_genes
-        stim = adata[adata.obs[condition_key] == axis_keys["y"]]
-        ctrl = adata[adata.obs[condition_key] == axis_keys["x"]]
-        if diff_genes is not None:
-            if hasattr(diff_genes, "tolist"):
-                diff_genes = diff_genes.tolist()
-            adata_diff = adata[:, diff_genes]
-            stim_diff = adata_diff[adata_diff.obs[condition_key] == axis_keys["y"]]
-            ctrl_diff = adata_diff[adata_diff.obs[condition_key] == axis_keys["x"]]
-            x_diff = np.asarray(np.var(ctrl_diff.X, axis=0)).ravel()
-            y_diff = np.asarray(np.var(stim_diff.X, axis=0)).ravel()
-            m, b, r_value_diff, p_value_diff, std_err_diff = stats.linregress(x_diff, y_diff)
-            if verbose:
-                print("Top 100 DEGs var: ", r_value_diff**2)
-        if "y1" in axis_keys.keys():
-            real_stim = adata[adata.obs[condition_key] == axis_keys["y1"]]
-        x = np.asarray(np.var(ctrl.X, axis=0)).ravel()
-        y = np.asarray(np.var(stim.X, axis=0)).ravel()
-        m, b, r_value, p_value, std_err = stats.linregress(x, y)
-        if verbose:
-            print("All genes var: ", r_value**2)
-        df = pd.DataFrame({axis_keys["x"]: x, axis_keys["y"]: y})
-        ax = sns.regplot(x=axis_keys["x"], y=axis_keys["y"], data=df)
-        ax.tick_params(labelsize=fontsize)
-        if "range" in kwargs:
-            start, stop, step = kwargs.get("range")
-            ax.set_xticks(np.arange(start, stop, step))
-            ax.set_yticks(np.arange(start, stop, step))
-        # _p1 = pyplot.scatter(x, y, marker=".", label=f"{axis_keys['x']}-{axis_keys['y']}")
-        # pyplot.plot(x, m * x + b, "-", color="green")
-        ax.set_xlabel(labels["x"], fontsize=fontsize)
-        ax.set_ylabel(labels["y"], fontsize=fontsize)
-        if "y1" in axis_keys.keys():
-            y1 = np.asarray(np.var(real_stim.X, axis=0)).ravel()
-            _ = pyplot.scatter(
-                x,
-                y1,
-                marker="*",
-                c="grey",
-                alpha=0.5,
-                label=f"{axis_keys['x']}-{axis_keys['y1']}",
-            )
-        if gene_list is not None:
-            for i in gene_list:
-                j = adata.var_names.tolist().index(i)
-                x_bar = x[j]
-                y_bar = y[j]
-                pyplot.text(x_bar, y_bar, i, fontsize=11, color="black")
-                pyplot.plot(x_bar, y_bar, "o", color="red", markersize=5)
-                if "y1" in axis_keys.keys():
-                    y1_bar = y1[j]
-                    pyplot.text(x_bar, y1_bar, "*", color="black", alpha=0.5)
-        if legend:
-            pyplot.legend(loc="center left", bbox_to_anchor=(1, 0.5))
-        if title is None:
-            pyplot.title("", fontsize=12)
-        else:
-            pyplot.title(title, fontsize=12)
-        ax.text(
-            max(x) - max(x) * x_coeff,
-            max(y) - y_coeff * max(y),
-            r"$\mathrm{R^2_{\mathrm{\mathsf{all\ genes}}}}$= " + f"{r_value ** 2:.2f}",
-            fontsize=kwargs.get("textsize", fontsize),
-        )
-        if diff_genes is not None:
-            ax.text(
-                max(x) - max(x) * x_coeff,
-                max(y) - (y_coeff + 0.15) * max(y),
-                r"$\mathrm{R^2_{\mathrm{\mathsf{top\ 100\ DEGs}}}}$= " + f"{r_value_diff ** 2:.2f}",
-                fontsize=kwargs.get("textsize", fontsize),
-            )
-        if save:
-            pyplot.savefig(f"{path_to_save}", bbox_inches="tight", dpi=100)
-        if show:
-            pyplot.show()
-        pyplot.close()
-        if diff_genes is not None:
-            return r_value**2, r_value_diff**2
-        else:
-            return r_value**2
-    @staticmethod
-    def binary_classifier(
-        scgen,
-        adata,
-        delta,
-        ctrl_key,
-        stim_key,
-        path_to_save,
-        save=True,
-        fontsize=14,
-    ):
-        """Latent space classifier.
-        Builds a linear classifier based on the dot product between
-        the difference vector and the latent representation of each
-        cell and plots the dot product results between delta and latent representation.
-        Args:
-            scgen: ScGen object that was trained.
-            adata: AnnData object with equivalent structure to initial AnnData. If `None`, defaults to the
-                   AnnData object used to initialize the model. Must have been setup with `batch_key` and `labels_key`,
-                   corresponding to batch and cell type metadata, respectively.
-            delta: Difference between stimulated and control cells in latent space
-            ctrl_key: Key for `control` part of the `data` found in `condition_key`.
-            stim_key: Key for `stimulated` part of the `data` found in `condition_key`.
-            path_to_save: Path to save the plot.
-            save: Specify if the plot should be saved or not.
-            fontsize: Set the font size of the plot.
-        """
-        # matplotlib.rcParams.update(matplotlib.rcParamsDefault)
-        pyplot.close("all")
-        adata = scgen._validate_anndata(adata)
-        condition_key = scgen.adata_manager.get_state_registry(REGISTRY_KEYS.BATCH_KEY).original_key
-        cd = adata[adata.obs[condition_key] == ctrl_key, :]
-        stim = adata[adata.obs[condition_key] == stim_key, :]
-        all_latent_cd = scgen.get_latent_representation(cd.X)
-        all_latent_stim = scgen.get_latent_representation(stim.X)
-        dot_cd = np.zeros(len(all_latent_cd))
-        dot_sal = np.zeros(len(all_latent_stim))
-        for ind, vec in enumerate(all_latent_cd):
-            dot_cd[ind] = np.dot(delta, vec)
-        for ind, vec in enumerate(all_latent_stim):
-            dot_sal[ind] = np.dot(delta, vec)
-        pyplot.hist(
-            dot_cd,
-            label=ctrl_key,
-            bins=50,
-        )
-        pyplot.hist(dot_sal, label=stim_key, bins=50)
-        pyplot.axvline(0, color="k", linestyle="dashed", linewidth=1)
-        pyplot.title("  ", fontsize=fontsize)
-        pyplot.xlabel("  ", fontsize=fontsize)
-        pyplot.ylabel("  ", fontsize=fontsize)
-        pyplot.xticks(fontsize=fontsize)
-        pyplot.yticks(fontsize=fontsize)
-        ax = pyplot.gca()
-        ax.grid(False)
-        if save:
-            pyplot.savefig(f"{path_to_save}", bbox_inches="tight", dpi=100)
-        pyplot.show()

pertpy/tools/_differential_gene_expression.py DELETED Viewed

@@ -1,99 +0,0 @@
-from __future__ import annotations
-from typing import TYPE_CHECKING, Literal
-import decoupler as dc
-import numpy as np
-import numpy.typing as npt
-if TYPE_CHECKING:
-    import pandas as pd
-    from anndata import AnnData
-class DifferentialGeneExpression:
-    """Support for differential gene expression for scverse."""
-    def pseudobulk(
-        self,
-        adata: AnnData,
-        sample_col: str,
-        groups_col: str,
-        obs: pd.DataFrame = None,
-        layer: str = None,
-        use_raw: bool = False,
-        min_prop: float = 0.2,
-        min_counts: int = 1000,
-        min_samples: int = 2,
-        dtype: npt.DTypeLike = np.float32,
-    ) -> AnnData:
-        """Generate Pseudobulk for DE analysis.
-        Wraps decoupler's get_pseudobulk function.
-        See: https://decoupler-py.readthedocs.io/en/latest/generated/decoupler.get_pseudobulk.html#decoupler.get_pseudobulk
-        for more details
-        Args:
-            adata: Input AnnData object.
-            sample_col: Column of obs where to extract the samples names.
-            groups_col: Column of obs where to extract the groups names.
-            obs: If provided, metadata dataframe.
-            layer: If provided, which layer to use.
-            use_raw: Use raw attribute of adata if present.
-            min_prop: Minimum proportion of cells with non-zero values.
-            min_counts: Minimum number of cells per sample.
-            min_samples: Minimum number of samples per feature.
-            dtype: Type of float used.
-        Returns:
-            Returns new AnnData object with unormalized pseudobulk profiles per sample and group.
-        """
-        pseudobulk_adata = dc.get_pseudobulk(
-            adata,
-            sample_col=sample_col,
-            groups_col=groups_col,
-            obs=obs,
-            layer=layer,
-            use_raw=use_raw,
-            min_prop=min_prop,
-            min_counts=min_counts,
-            min_smpls=min_samples,
-            dtype=dtype,
-        )
-        return pseudobulk_adata
-    def de_analysis(
-        self,
-        adata: AnnData,
-        groupby: str,
-        method: Literal["t-test", "wilcoxon", "pydeseq2", "deseq2", "edger"],
-        *formula: str | None,
-        contrast: str | None,
-        inplace: bool = True,
-        key_added: str | None,
-    ) -> pd.DataFrame:
-        """Perform differential expression analysis.
-        Args:
-            adata: single-cell or pseudobulk AnnData object
-            groupby: Column in adata.obs that contains the factor to test, e.g. `treatment`.
-                     For simple statistical tests (t-test, wilcoxon), it is sufficient to specify groupby.
-                     Linear models require to specify a formula.
-                     In that case, the `groupby` column is used to compute the contrast.
-            method: Which method to use to perform the DE test.
-            formula: model specification for linear models. E.g. `~ treatment + sex + age`.
-                     MUST contain the factor specified in `groupby`.
-            contrast: See e.g. https://www.statsmodels.org/devel/contrasts.html for more information.
-            inplace: if True, save the result in `adata.varm[key_added]`
-            key_added: Key under which the result is saved in `adata.varm` if inplace is True.
-                       If set to None this defaults to `de_{method}_{groupby}`.
-        Returns:
-            Depending on the method a Pandas DataFrame containing at least:
-            * gene_id
-            * log2 fold change
-            * mean expression
-            * unadjusted p-value
-            * adjusted p-value
-        """
-        raise NotImplementedError

pertpy/tools/_metadata/__init__.py DELETED Viewed

File without changes

pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl