PyPI - pertpy - Versions diffs - 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

pertpy 1.0.2py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

pertpy/__init__.py +1 -1
pertpy/data/_dataloader.py +2 -2
pertpy/data/_datasets.py +62 -62
pertpy/metadata/_drug.py +4 -2
pertpy/preprocessing/_guide_rna.py +17 -10
pertpy/preprocessing/_guide_rna_mixture.py +9 -3
pertpy/tools/__init__.py +12 -2
pertpy/tools/_augur.py +37 -14
pertpy/tools/_coda/_sccoda.py +0 -19
pertpy/tools/_coda/_tasccoda.py +12 -24
pertpy/tools/_mixscape.py +48 -39
pertpy/tools/_perturbation_space/_comparison.py +3 -3
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +261 -353
pertpy/tools/_perturbation_space/_perturbation_space.py +22 -14
pertpy/tools/_perturbation_space/_simple.py +12 -6
pertpy/tools/_scgen/_scgenvae.py +2 -1
pertpy/tools/core.py +18 -0
{pertpy-1.0.2.dist-info → pertpy-1.0.3.dist-info}/METADATA +84 -51
{pertpy-1.0.2.dist-info → pertpy-1.0.3.dist-info}/RECORD +21 -20
{pertpy-1.0.2.dist-info → pertpy-1.0.3.dist-info}/WHEEL +1 -1
{pertpy-1.0.2.dist-info → pertpy-1.0.3.dist-info}/licenses/LICENSE +0 -0

pertpy/preprocessing/_guide_rna_mixture.py CHANGED Viewed

@@ -8,7 +8,7 @@ import numpy as np
 from jax.random import PRNGKey
 from jax.scipy.special import logsumexp
 from numpyro import factor, plate, sample
-from numpyro.distributions import Dirichlet, Exponential, HalfNormal, Normal, Poisson
+from numpyro.distributions import Categorical, Dirichlet, Exponential, HalfNormal, Normal, Poisson
 from numpyro.infer import MCMC, NUTS
 ParamsDict = Mapping[str, jnp.ndarray]
@@ -102,8 +102,14 @@ class MixtureModel(ABC):
         with plate("data", data.shape[0]):
             log_likelihoods = self.log_likelihood(data, params)
-            log_mixture_likelihood = logsumexp(log_likelihoods, axis=-1)
-            sample("obs", Normal(log_mixture_likelihood, 1.0), obs=data)
+            mixture_probs = jnp.exp(log_likelihoods - logsumexp(log_likelihoods, axis=-1, keepdims=True))
+            z = sample("z", Categorical(mixture_probs), infer={"enumerate": "parallel"})
+            # Observe under selected component
+            poisson_ll = Poisson(params["poisson_rate"]).log_prob(data)
+            gaussian_ll = Normal(params["gaussian_mean"], params["gaussian_std"]).log_prob(data)
+            obs_ll = jnp.where(z == 0, poisson_ll, gaussian_ll)
+            factor("obs", obs_ll)
     def assignment(self, samples: ParamsDict, data: jnp.ndarray) -> np.ndarray:
         """Assign data points to mixture components.

pertpy/tools/__init__.py CHANGED Viewed

@@ -21,7 +21,6 @@ from pertpy.tools._perturbation_space._simple import (
     KMeansSpace,
     PseudobulkSpace,
 )
-from pertpy.tools._scgen import Scgen
 def __getattr__(name: str):
@@ -35,14 +34,25 @@ def __getattr__(name: str):
             raise ImportError(
                 "Extra dependencies required: toytree, ete4. Please install with: pip install toytree ete4"
             ) from None
     elif name in ["EdgeR", "PyDESeq2", "Statsmodels", "TTest", "WilcoxonTest"]:
         module = import_module("pertpy.tools._differential_gene_expression")
         return getattr(module, name)
+    elif name == "Scgen":
+        try:
+            module = import_module("pertpy.tools._scgen")
+            return module.Scgen
+        except ImportError:
+            raise ImportError(
+                "Scgen requires scvi-tools to be installed. Please install with: pip install scvi-tools"
+            ) from None
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+def __dir__():
+    return __all__
 __all__ = [
     "Augur",
     "Cinemaot",

pertpy/tools/_augur.py CHANGED Viewed

@@ -36,6 +36,7 @@ from statsmodels.api import OLS
 from statsmodels.stats.multitest import fdrcorrection
 from pertpy._doc import _doc_params, doc_common_plot_args
+from pertpy.tools.core import _is_raw_counts
 if TYPE_CHECKING:
     from matplotlib.axes import Axes
@@ -87,6 +88,7 @@ class Augur:
         self,
         input: AnnData | pd.DataFrame,
         *,
+        layer: str | None = None,
         meta: pd.DataFrame | None = None,
         label_col: str = "label_col",
         cell_type_col: str = "cell_type_col",
@@ -98,6 +100,7 @@ class Augur:
         Args:
             input: Anndata or matrix containing gene expression values (genes in rows, cells in columns)
                 and optionally meta data about each cell.
+            layer: Layer in AnnData to use for expression data. If None, uses .X
             meta: Optional Pandas DataFrame containing meta data about each cell.
             label_col: column of the meta DataFrame or the Anndata or matrix containing the condition labels for each cell
                 in the cell-by-gene expression matrix
@@ -114,11 +117,11 @@ class Augur:
             >>> import pertpy as pt
             >>> adata = pt.dt.sc_sim_augur()
             >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
-            >>> loaded_data = ag_rfc.load(adata)
+            >>> augur_adata = ag_rfc.load(adata)
         """
         if isinstance(input, AnnData):
-            input.obs = input.obs.rename(columns={cell_type_col: "cell_type", label_col: "label"})
             adata = input
+            obs_renamed = adata.obs.rename(columns={cell_type_col: "cell_type", label_col: "label"})
         elif isinstance(input, pd.DataFrame):
             if meta is None:
@@ -130,27 +133,47 @@ class Augur:
             label = input[label_col] if meta is None else meta[label_col]
             cell_type = input[cell_type_col] if meta is None else meta[cell_type_col]
-            x = input.drop([label_col, cell_type_col], axis=1) if meta is None else input
-            adata = AnnData(X=x, obs=pd.DataFrame({"cell_type": cell_type, "label": label}))
+            X = input.drop([label_col, cell_type_col], axis=1) if meta is None else input
+            adata = AnnData(X=X, obs=pd.DataFrame({"cell_type": cell_type, "label": label}))
+            obs_renamed = adata.obs
-        if len(adata.obs["label"].unique()) < 2:
+        if len(obs_renamed["label"].unique()) < 2:
             raise ValueError("Less than two unique labels in dataset. At least two are needed for the analysis.")
+        if isinstance(input, AnnData):
+            final_adata = AnnData(X=adata.X, obs=obs_renamed, var=adata.var, layers=adata.layers)
+        else:
+            final_adata = adata
         # dummy variables for categorical data
-        if adata.obs["label"].dtype.name == "category":
-            # filter samples according to label
+        if final_adata.obs["label"].dtype.name == "category":
+            label_encoder = LabelEncoder()
+            final_adata.obs["y_"] = label_encoder.fit_transform(final_adata.obs["label"])
             if condition_label is not None and treatment_label is not None:
                 logger.info(f"Filtering samples with {condition_label} and {treatment_label} labels.")
-                adata = ad.concat(
-                    [adata[adata.obs["label"] == condition_label], adata[adata.obs["label"] == treatment_label]]
+                final_adata = ad.concat(
+                    [
+                        final_adata[final_adata.obs["label"] == condition_label],
+                        final_adata[final_adata.obs["label"] == treatment_label],
+                    ]
                 )
-            label_encoder = LabelEncoder()
-            adata.obs["y_"] = label_encoder.fit_transform(adata.obs["label"])
         else:
-            y = adata.obs["label"].to_frame()
+            y = final_adata.obs["label"].to_frame()
             y = y.rename(columns={"label": "y_"})
-            adata.obs = pd.concat([adata.obs, y], axis=1)
+            final_adata.obs = pd.concat([final_adata.obs, y], axis=1)
-        return adata
+        if layer is not None:
+            if layer not in final_adata.layers:
+                raise ValueError(f"Layer '{layer}' not found in AnnData object")
+            X = final_adata.layers[layer]
+        else:
+            X = final_adata.X
+        if not _is_raw_counts(X):
+            logger.warning("Data does not appear to be raw counts. Augur developers recommend using raw counts.")
+        return final_adata
     def create_estimator(
         self,

pertpy/tools/_coda/_sccoda.py CHANGED Viewed

@@ -11,7 +11,6 @@ from jax import config, random
 from lamin_utils import logger
 from mudata import MuData
 from numpyro.infer import Predictive
-from rich import print
 from pertpy.tools._coda._base_coda import CompositionalModel2, from_scanpy
@@ -25,24 +24,6 @@ config.update("jax_enable_x64", True)
 class Sccoda(CompositionalModel2):
     r"""Statistical model for single-cell differential composition analysis with specification of a reference cell type.
-    This is the standard scCODA model and recommended for all uses.
-    The hierarchical formulation of the model for one sample is:
-    .. math::
-         y|x &\\sim DirMult(\\phi, \\bar{y}) \\\\
-         \\log(\\phi) &= \\alpha + x \\beta \\\\
-         \\alpha_k &\\sim N(0, 5) \\quad &\\forall k \\in [K] \\\\
-         \\beta_{m, \\hat{k}} &= 0 &\\forall m \\in [M]\\\\
-         \\beta_{m, k} &= \\tau_{m, k} \\tilde{\\beta}_{m, k} \\quad &\\forall m \\in [M], k \\in \\{[K] \\smallsetminus \\hat{k}\\} \\\\
-         \\tau_{m, k} &= \\frac{\\exp(t_{m, k})}{1+ \\exp(t_{m, k})} \\quad &\\forall m \\in [M], k \\in \\{[K] \\smallsetminus \\hat{k}\\} \\\\
-         \\frac{t_{m, k}}{50} &\\sim N(0, 1) \\quad &\\forall m \\in [M], k \\in \\{[K] \\smallsetminus \\hat{k}\\} \\\\
-         \\tilde{\\beta}_{m, k} &= \\sigma_m^2 \\cdot \\gamma_{m, k} \\quad &\\forall m \\in [M], k \\in \\{[K] \\smallsetminus \\hat{k}\\} \\\\
-         \\sigma_m^2 &\\sim HC(0, 1) \\quad &\\forall m \\in [M] \\\\
-         \\gamma_{m, k} &\\sim N(0,1) \\quad &\\forall m \\in [M], k \\in \\{[K] \\smallsetminus \\hat{k}\\} \\\\
-    with y being the cell counts and x the covariates.
     For further information, see `scCODA is a Bayesian model for compositional single-cell data analysis`
     (Büttner, Ostner et al., NatComms, 2021)
     """

pertpy/tools/_coda/_tasccoda.py CHANGED Viewed

@@ -33,24 +33,6 @@ config.update("jax_enable_x64", True)
 class Tasccoda(CompositionalModel2):
     r"""Statistical model for tree-aggregated differential composition analysis (tascCODA, Ostner et al., 2021).
-    The hierarchical formulation of the model for one sample is:
-    .. math::
-         \\begin{align*}
-            Y_i &\\sim \\textrm{DirMult}(\\bar{Y}_i, \\textbf{a}(\\textbf{x})_i)\\\\
-            \\log(\\textbf{a}(X))_i &= \\alpha + X_{i, \\cdot} \\beta\\\\
-            \\alpha_j &\\sim \\mathcal{N}(0, 10) & \\forall j\\in[p]\\\\
-            \\beta &= \\hat{\\beta} A^T \\\\
-            \\hat{\\beta}_{l, k} &= 0 & \\forall k \\in \\hat{v}, l \\in [d]\\\\
-            \\hat{\\beta}_{l, k} &= \\theta \\tilde{\\beta}_{1, l, k} + (1- \\theta) \\tilde{\\beta}_{0, l, k} \\quad & \\forall k\\in\\{[v] \\smallsetminus \\hat{v}\\}, l \\in [d]\\\\
-            \\tilde{\\beta}_{m, l, k} &= \\sigma_{m, l, k} * b_{m, l, k} \\quad & \\forall k\\in\\{[v] \\smallsetminus \\hat{v}\\}, m \\in \\{0, 1\\}, l \\in [d]\\\\
-            \\sigma_{m, l, k} &\\sim \\textrm{Exp}(\\lambda_{m, l, k}^2/2) \\quad & \\forall k\\in\\{[v] \\smallsetminus \\hat{v}\\}, l \\in \\{0, 1\\}, l \\in [d]\\\\
-            b_{m, l, k} &\\sim N(0,1) \\quad & \\forall k\\in\\{[v] \\smallsetminus \\hat{v}\\}, l \\in \\{0, 1\\}, l \\in [d]\\\\
-            \\theta &\\sim \\textrm{Beta}(1, \\frac{1}{|\\{[v] \\smallsetminus \\hat{v}\\}|})
-        \\end{align*}
-    with Y being the cell counts, X the covariates, and v the set of nodes of the underlying tree structure.
     For further information, see `tascCODA: Bayesian Tree-Aggregated Analysis of Compositional Amplicon and Single-Cell Data`
     (Ostner et al., 2021)
     """
@@ -75,11 +57,14 @@ class Tasccoda(CompositionalModel2):
         modality_key_1: str = "rna",
         modality_key_2: str = "coda",
     ) -> MuData:
-        """Prepare a MuData object for subsequent processing. If type is "cell_level", then create a compositional analysis dataset from the input adata. If type is "sample_level", generate ete tree for tascCODA models from dendrogram information or cell-level observations.
+        """Prepare a MuData object for subsequent processing.
+        If type is "cell_level", then create a compositional analysis dataset from the input adata.
+        If type is "sample_level", generate ete tree for tascCODA models from dendrogram information or cell-level observations.
-        When using ``type="cell_level"``, ``adata`` needs to have a column in ``adata.obs`` that contains the cell type assignment.
+        When using `type="cell_level"`, `adata` needs to have a column in `adata.obs` that contains the cell type assignment.
         Further, it must contain one column or a set of columns (e.g. subject id, treatment, disease status) that uniquely identify each (statistical) sample.
-        Further covariates (e.g. subject age) can either be specified via addidional column names in ``adata.obs``, a key in ``adata.uns``, or as a separate DataFrame.
+        Further covariates (e.g. subject age) can either be specified via addidional column names in `adata.obs`, a key in `adata.uns`, or as a separate DataFrame.
         Args:
             adata: AnnData object.
@@ -90,10 +75,13 @@ class Tasccoda(CompositionalModel2):
             covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
             covariate_df: If type is "cell_level", specify dataFrame with covariates.
             dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
-            levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
-            levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
+            levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels.
+                The list must begin with the root level, and end with the leaf level.
+            levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels.
+                The list must begin with the root level, and end with the leaf level.
             add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
-            key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
+            key_added: If not specified, the tree is stored in `.uns['tree']`.
+                If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
             modality_key_1: Key to the cell-level AnnData in the MuData object.
             modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.

pertpy/tools/_mixscape.py CHANGED Viewed

@@ -177,7 +177,7 @@ class Mixscape:
     def mixscape(
         self,
         adata: AnnData,
-        labels: str,
+        pert_key: str,
         control: str,
         *,
         new_class_name: str | None = "mixscape_class",
@@ -201,12 +201,12 @@ class Mixscape:
         Args:
             adata: The annotated data object.
-            labels: The column of `.obs` with target gene labels.
+            pert_key: The column of `.obs` with target gene labels.
             control: Control category from the `labels` column.
             new_class_name: Name of mixscape classification to be stored in `.obs`.
             layer: Key from adata.layers whose value will be used to perform tests on. Default is using `.layers["X_pert"]`.
             min_de_genes: Required number of genes that are differentially expressed for method to separate perturbed and non-perturbed cells.
-            logfc_threshold: Limit testing to genes which show, on average, at least X-fold difference (log-scale) between the two groups of cells (default: 0.25).
+            logfc_threshold: Limit testing to genes which show, on average, at least X-fold difference (log-scale) between the two groups of cells.
             de_layer: Layer to use for identifying differentially expressed genes. If `None`, adata.X is used.
             test_method: Method to use for differential expression testing.
             iter_num: Number of normalmixEM iterations to run if convergence does not occur.
@@ -256,7 +256,7 @@ class Mixscape:
             adata=adata,
             split_masks=split_masks,
             categories=categories,
-            labels=labels,
+            pert_key=pert_key,
             control=control,
             layer=de_layer,
             pval_cutoff=pval_cutoff,
@@ -278,7 +278,7 @@ class Mixscape:
         # initialize return variables
         adata.obs[f"{new_class_name}_p_{perturbation_type.lower()}"] = 0
-        adata.obs[new_class_name] = adata.obs[labels].astype(str)
+        adata.obs[new_class_name] = adata.obs[pert_key].astype(str)
         adata.obs[f"{new_class_name}_global"] = np.empty(
             [
                 adata.n_obs,
@@ -290,12 +290,12 @@ class Mixscape:
         adata.obs[f"{new_class_name}_p_{perturbation_type.lower()}"] = 0.0
         for split, split_mask in enumerate(split_masks):
             category = categories[split]
-            gene_targets = list(set(adata[split_mask].obs[labels]).difference([control]))
+            gene_targets = list(set(adata[split_mask].obs[pert_key]).difference([control]))
             for gene in gene_targets:
                 post_prob = 0
-                orig_guide_cells = (adata.obs[labels] == gene) & split_mask
+                orig_guide_cells = (adata.obs[pert_key] == gene) & split_mask
                 orig_guide_cells_index = list(orig_guide_cells.index[orig_guide_cells])
-                nt_cells = (adata.obs[labels] == control) & split_mask
+                nt_cells = (adata.obs[pert_key] == control) & split_mask
                 all_cells = orig_guide_cells | nt_cells
                 if len(perturbation_markers[(category, gene)]) == 0:
@@ -307,7 +307,11 @@ class Mixscape:
                     dat = X[np.asarray(all_cells)][:, de_genes_indices]
                     if scale:
-                        dat = sc.pp.scale(dat)
+                        with warnings.catch_warnings():
+                            warnings.filterwarnings(
+                                "ignore", message="zero-centering a sparse array/matrix densifies it."
+                            )
+                            dat = sc.pp.scale(dat)
                     converged = False
                     n_iter = 0
@@ -335,10 +339,10 @@ class Mixscape:
                         pvec = pd.Series(np.asarray(pvec).flatten(), index=list(all_cells.index[all_cells]))
                         if n_iter == 0:
-                            gv = pd.DataFrame(columns=["pvec", labels])
+                            gv = pd.DataFrame(columns=["pvec", pert_key])
                             gv["pvec"] = pvec
-                            gv[labels] = control
-                            gv.loc[guide_cells, labels] = gene
+                            gv[pert_key] = control
+                            gv.loc[guide_cells, pert_key] = gene
                             if gene not in gv_list:
                                 gv_list[gene] = {}
                             gv_list[gene][category] = gv
@@ -389,7 +393,7 @@ class Mixscape:
     def lda(
         self,
         adata: AnnData,
-        labels: str,
+        pert_key: str,
         control: str,
         *,
         mixscape_class_global: str | None = "mixscape_class_global",
@@ -407,7 +411,7 @@ class Mixscape:
         Args:
             adata: The annotated data object.
-            labels: The column of `.obs` with target gene labels.
+            pert_key: The column of `.obs` with target gene labels.
             control: Control category from the `pert_key` column.
             mixscape_class_global: The column of `.obs` with mixscape global classification result (perturbed, NP or NT).
             layer: Layer to use for identifying differentially expressed genes. If `None`, adata.X is used.
@@ -456,7 +460,7 @@ class Mixscape:
             adata=adata,
             split_masks=split_masks,
             categories=categories,
-            labels=labels,
+            pert_key=pert_key,
             control=control,
             layer=layer,
             pval_cutoff=pval_cutoff,
@@ -475,17 +479,19 @@ class Mixscape:
                 continue
             else:
                 gene_subset = adata_subset[
-                    (adata_subset.obs[labels] == key[1]) | (adata_subset.obs[labels] == control)
+                    (adata_subset.obs[pert_key] == key[1]) | (adata_subset.obs[pert_key] == control)
                 ].copy()
-                sc.pp.scale(gene_subset)
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore", UserWarning)
+                    sc.pp.scale(gene_subset)
                 sc.tl.pca(gene_subset, n_comps=n_comps)
                 # project cells into PCA space of gene_subset
                 projected_pcs[key[1]] = np.asarray(np.dot(X, gene_subset.varm["PCs"]))
         # concatenate all pcs into a single matrix.
         projected_pcs_array = np.concatenate(list(projected_pcs.values()), axis=1)
-        clf = LinearDiscriminantAnalysis(n_components=len(np.unique(adata_subset.obs[labels])) - 1)
-        clf.fit(projected_pcs_array, adata_subset.obs[labels])
+        clf = LinearDiscriminantAnalysis(n_components=len(np.unique(adata_subset.obs[pert_key])) - 1)
+        clf.fit(projected_pcs_array, adata_subset.obs[pert_key])
         cell_embeddings = clf.transform(projected_pcs_array)
         adata.uns["mixscape_lda"] = cell_embeddings
@@ -495,9 +501,10 @@ class Mixscape:
     def _get_perturbation_markers(
         self,
         adata: AnnData,
+        *,
         split_masks: list[np.ndarray],
         categories: list[str],
-        labels: str,
+        pert_key: str,
         control: str,
         layer: str,
         pval_cutoff: float,
@@ -511,7 +518,7 @@ class Mixscape:
             adata: :class:`~anndata.AnnData` object
             split_masks: List of boolean masks for each split/group.
             categories: List of split/group names.
-            labels: The column of `.obs` with target gene labels.
+            pert_key: The column of `.obs` with target gene labels.
             control: Control category from the `labels` column.
             layer: Key from adata.layers whose value will be used to compare gene expression.
             pval_cutoff: P-value cut-off for selection of significantly DE genes.
@@ -526,7 +533,7 @@ class Mixscape:
         for split, split_mask in enumerate(split_masks):
             category = categories[split]
             # get gene sets for each split
-            gene_targets = list(set(adata[split_mask].obs[labels]).difference([control]))
+            gene_targets = list(set(adata[split_mask].obs[pert_key]).difference([control]))
             adata_split = adata[split_mask].copy()
             # find top DE genes between cells with targeting and non-targeting gRNAs
             with warnings.catch_warnings():
@@ -535,7 +542,7 @@ class Mixscape:
                 sc.tl.rank_genes_groups(
                     adata_split,
                     layer=layer,
-                    groupby=labels,
+                    groupby=pert_key,
                     groups=gene_targets,
                     reference=control,
                     method=test_method,
@@ -666,7 +673,7 @@ class Mixscape:
     def plot_heatmap(  # pragma: no cover # noqa: D417
         self,
         adata: AnnData,
-        labels: str,
+        pert_key: str,
         target_gene: str,
         control: str,
         *,
@@ -682,7 +689,7 @@ class Mixscape:
         Args:
             adata: The annotated data object.
-            labels: The column of `.obs` with target gene labels.
+            pert_key: The column of `.obs` with target gene labels.
             target_gene: Target gene name to visualize heatmap for.
             control: Control category from the `pert_key` column.
             layer: Key from `adata.layers` whose value will be used to perform tests on.
@@ -711,12 +718,13 @@ class Mixscape:
         """
         if "mixscape_class" not in adata.obs:
             raise ValueError("Please run `pt.tl.mixscape` first.")
-        adata_subset = adata[(adata.obs[labels] == target_gene) | (adata.obs[labels] == control)].copy()
+        adata_subset = adata[(adata.obs[pert_key] == target_gene) | (adata.obs[pert_key] == control)].copy()
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", RuntimeWarning)
             warnings.simplefilter("ignore", PerformanceWarning)
-            sc.tl.rank_genes_groups(adata_subset, layer=layer, groupby=labels, method=method)
-        sc.pp.scale(adata_subset, max_value=vmax)
+            warnings.simplefilter("ignore", UserWarning)
+            sc.tl.rank_genes_groups(adata_subset, layer=layer, groupby=pert_key, method=method)
+            sc.pp.scale(adata_subset, max_value=vmax)
         sc.pp.subsample(adata_subset, n_obs=subsample_number)
         fig = sc.pl.rank_genes_groups_heatmap(
@@ -739,7 +747,7 @@ class Mixscape:
     def plot_perturbscore(  # pragma: no cover # noqa: D417
         self,
         adata: AnnData,
-        labels: str,
+        pert_key: str,
         target_gene: str,
         *,
         mixscape_class: str = "mixscape_class",
@@ -758,7 +766,7 @@ class Mixscape:
         Args:
             adata: The annotated data object.
-            labels: The column of `.obs` with target gene labels.
+            pert_key: The column of `.obs` with target gene labels.
             target_gene: Target gene name to visualize perturbation scores for.
             mixscape_class: The column of `.obs` with mixscape classifications.
             color: Specify color of target gene class or knockout cell class. For control non-targeting and non-perturbed cells, colors are set to different shades of grey.
@@ -797,21 +805,21 @@ class Mixscape:
             else:
                 perturbation_score = pd.concat([perturbation_score, perturbation_score_temp])
         perturbation_score["mix"] = adata.obs[mixscape_class][perturbation_score.index]
-        gd = list(set(perturbation_score[labels]).difference({target_gene}))[0]
+        gd = list(set(perturbation_score[pert_key]).difference({target_gene}))[0]
         # If before_mixscape is True, split densities based on original target gene classification
         if before_mixscape is True:
             palette = {gd: "#7d7d7d", target_gene: color}
-            plot_dens = sns.kdeplot(data=perturbation_score, x="pvec", hue=labels, fill=False, common_norm=False)
+            plot_dens = sns.kdeplot(data=perturbation_score, x="pvec", hue=pert_key, fill=False, common_norm=False)
             top_r = max(plot_dens.get_lines()[cond].get_data()[1].max() for cond in range(len(plot_dens.get_lines())))
             plt.close()
             perturbation_score["y_jitter"] = perturbation_score["pvec"]
             rng = np.random.default_rng()
-            perturbation_score.loc[perturbation_score[labels] == gd, "y_jitter"] = rng.uniform(
-                low=0.001, high=top_r / 10, size=sum(perturbation_score[labels] == gd)
+            perturbation_score.loc[perturbation_score[pert_key] == gd, "y_jitter"] = rng.uniform(
+                low=0.001, high=top_r / 10, size=sum(perturbation_score[pert_key] == gd)
             )
-            perturbation_score.loc[perturbation_score[labels] == target_gene, "y_jitter"] = rng.uniform(
-                low=-top_r / 10, high=0, size=sum(perturbation_score[labels] == target_gene)
+            perturbation_score.loc[perturbation_score[pert_key] == target_gene, "y_jitter"] = rng.uniform(
+                low=-top_r / 10, high=0, size=sum(perturbation_score[pert_key] == target_gene)
             )
             # If split_by is provided, split densities based on the split_by
             if split_by is not None:
@@ -844,7 +852,7 @@ class Mixscape:
         else:
             if palette is None:
                 palette = {gd: "#7d7d7d", f"{target_gene} NP": "#c9c9c9", f"{target_gene} {perturbation_type}": color}
-            plot_dens = sns.kdeplot(data=perturbation_score, x="pvec", hue=labels, fill=False, common_norm=False)
+            plot_dens = sns.kdeplot(data=perturbation_score, x="pvec", hue=pert_key, fill=False, common_norm=False)
             top_r = max(plot_dens.get_lines()[i].get_data()[1].max() for i in range(len(plot_dens.get_lines())))
             plt.close()
             perturbation_score["y_jitter"] = perturbation_score["pvec"]
@@ -899,6 +907,7 @@ class Mixscape:
         if return_fig:
             return plt.gcf()
         plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1058,7 +1067,7 @@ class Mixscape:
                     data=obs_tidy,
                     order=order,
                     orient="vertical",
-                    scale=scale,
+                    density_norm=scale,
                     ax=ax,
                     hue=hue,
                     **kwargs,
@@ -1072,7 +1081,7 @@ class Mixscape:
                         data=obs_tidy,
                         order=order,
                         jitter=jitter,
-                        color="black",
+                        palette="dark:black",
                         size=size,
                         ax=ax,
                         hue=hue,

pertpy/tools/_perturbation_space/_comparison.py CHANGED Viewed

@@ -22,7 +22,7 @@ class PerturbationComparison:
     ) -> float:
         """Compare classification accuracy between real and simulated perturbations.
-        Trains a classifier on the real perturbation data + the control data and reports a normalized
+        Trains a classifier on the real perturbation data & the control data and reports a normalized
         classification accuracy on the simulated perturbation.
         Args:
@@ -64,8 +64,8 @@ class PerturbationComparison:
             real: Real perturbed data.
             simulated: Simulated perturbed data.
             control: Control data
-            use_simulated_for_knn: Include simulted perturbed data (`simulated`) into the knn graph. Only valid when
-                control (`control`) is provided.
+            use_simulated_for_knn: Include simulted perturbed data (`simulated`) into the knn graph.
+                Only valid when control (`control`) is provided.
             n_neighbors: Number of neighbors to use in k-neighbor graph.
             random_state: Random state used for k-neighbor graph construction.
             n_jobs: Number of cores to use. Defaults to -1 (all).

pertpy 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

pertpy 1.0.2py3-none-any.whl → 1.0.3py3-none-any.whl