PyPI - pertpy - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

pertpy 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

pertpy/__init__.py +1 -1
pertpy/data/_dataloader.py +2 -2
pertpy/data/_datasets.py +62 -62
pertpy/metadata/_cell_line.py +9 -3
pertpy/metadata/_drug.py +4 -2
pertpy/preprocessing/_guide_rna.py +17 -10
pertpy/preprocessing/_guide_rna_mixture.py +9 -3
pertpy/tools/__init__.py +12 -2
pertpy/tools/_augur.py +37 -14
pertpy/tools/_coda/_sccoda.py +68 -101
pertpy/tools/_coda/_tasccoda.py +103 -85
pertpy/tools/_mixscape.py +48 -39
pertpy/tools/_perturbation_space/_comparison.py +3 -3
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +261 -353
pertpy/tools/_perturbation_space/_perturbation_space.py +22 -14
pertpy/tools/_perturbation_space/_simple.py +12 -6
pertpy/tools/_scgen/_scgenvae.py +2 -1
pertpy/tools/core.py +18 -0
{pertpy-1.0.1.dist-info → pertpy-1.0.3.dist-info}/METADATA +14 -2
{pertpy-1.0.1.dist-info → pertpy-1.0.3.dist-info}/RECORD +22 -21
{pertpy-1.0.1.dist-info → pertpy-1.0.3.dist-info}/WHEEL +0 -0
{pertpy-1.0.1.dist-info → pertpy-1.0.3.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_coda/_tasccoda.py CHANGED Viewed

@@ -33,24 +33,6 @@ config.update("jax_enable_x64", True)
 class Tasccoda(CompositionalModel2):
     r"""Statistical model for tree-aggregated differential composition analysis (tascCODA, Ostner et al., 2021).
-    The hierarchical formulation of the model for one sample is:
-    .. math::
-         \\begin{align*}
-            Y_i &\\sim \\textrm{DirMult}(\\bar{Y}_i, \\textbf{a}(\\textbf{x})_i)\\\\
-            \\log(\\textbf{a}(X))_i &= \\alpha + X_{i, \\cdot} \\beta\\\\
-            \\alpha_j &\\sim \\mathcal{N}(0, 10) & \\forall j\\in[p]\\\\
-            \\beta &= \\hat{\\beta} A^T \\\\
-            \\hat{\\beta}_{l, k} &= 0 & \\forall k \\in \\hat{v}, l \\in [d]\\\\
-            \\hat{\\beta}_{l, k} &= \\theta \\tilde{\\beta}_{1, l, k} + (1- \\theta) \\tilde{\\beta}_{0, l, k} \\quad & \\forall k\\in\\{[v] \\smallsetminus \\hat{v}\\}, l \\in [d]\\\\
-            \\tilde{\\beta}_{m, l, k} &= \\sigma_{m, l, k} * b_{m, l, k} \\quad & \\forall k\\in\\{[v] \\smallsetminus \\hat{v}\\}, m \\in \\{0, 1\\}, l \\in [d]\\\\
-            \\sigma_{m, l, k} &\\sim \\textrm{Exp}(\\lambda_{m, l, k}^2/2) \\quad & \\forall k\\in\\{[v] \\smallsetminus \\hat{v}\\}, l \\in \\{0, 1\\}, l \\in [d]\\\\
-            b_{m, l, k} &\\sim N(0,1) \\quad & \\forall k\\in\\{[v] \\smallsetminus \\hat{v}\\}, l \\in \\{0, 1\\}, l \\in [d]\\\\
-            \\theta &\\sim \\textrm{Beta}(1, \\frac{1}{|\\{[v] \\smallsetminus \\hat{v}\\}|})
-        \\end{align*}
-    with Y being the cell counts, X the covariates, and v the set of nodes of the underlying tree structure.
     For further information, see `tascCODA: Bayesian Tree-Aggregated Analysis of Compositional Amplicon and Single-Cell Data`
     (Ostner et al., 2021)
     """
@@ -75,11 +57,14 @@ class Tasccoda(CompositionalModel2):
         modality_key_1: str = "rna",
         modality_key_2: str = "coda",
     ) -> MuData:
-        """Prepare a MuData object for subsequent processing. If type is "cell_level", then create a compositional analysis dataset from the input adata. If type is "sample_level", generate ete tree for tascCODA models from dendrogram information or cell-level observations.
+        """Prepare a MuData object for subsequent processing.
+        If type is "cell_level", then create a compositional analysis dataset from the input adata.
+        If type is "sample_level", generate ete tree for tascCODA models from dendrogram information or cell-level observations.
-        When using ``type="cell_level"``, ``adata`` needs to have a column in ``adata.obs`` that contains the cell type assignment.
+        When using `type="cell_level"`, `adata` needs to have a column in `adata.obs` that contains the cell type assignment.
         Further, it must contain one column or a set of columns (e.g. subject id, treatment, disease status) that uniquely identify each (statistical) sample.
-        Further covariates (e.g. subject age) can either be specified via addidional column names in ``adata.obs``, a key in ``adata.uns``, or as a separate DataFrame.
+        Further covariates (e.g. subject age) can either be specified via addidional column names in `adata.obs`, a key in `adata.uns`, or as a separate DataFrame.
         Args:
             adata: AnnData object.
@@ -90,10 +75,13 @@ class Tasccoda(CompositionalModel2):
             covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
             covariate_df: If type is "cell_level", specify dataFrame with covariates.
             dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
-            levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
-            levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
+            levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels.
+                The list must begin with the root level, and end with the leaf level.
+            levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels.
+                The list must begin with the root level, and end with the leaf level.
             add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
-            key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
+            key_added: If not specified, the tree is stored in `.uns['tree']`.
+                If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
             modality_key_1: Key to the cell-level AnnData in the MuData object.
             modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
@@ -120,8 +108,10 @@ class Tasccoda(CompositionalModel2):
                 covariate_df=covariate_df,
             )
             mdata = MuData({modality_key_1: adata, modality_key_2: adata_coda})
-        else:
+        elif type == "sample_level":
             mdata = MuData({modality_key_1: AnnData(), modality_key_2: adata})
+        else:
+            raise ValueError(f'{type} is not a supported type, expected "cell_level" or "sample_level".')
         import_tree(
             data=mdata,
             modality_1=modality_key_1,
@@ -464,7 +454,7 @@ class Tasccoda(CompositionalModel2):
         self,
         data: AnnData | MuData,
         modality_key: str = "coda",
-        rng_key=None,
+        rng_key: int | None = None,
         num_prior_samples: int = 500,
         use_posterior_predictive: bool = True,
     ) -> az.InferenceData:
@@ -547,6 +537,8 @@ class Tasccoda(CompositionalModel2):
         if rng_key is None:
             rng = np.random.default_rng()
             rng_key = random.key(rng.integers(0, 10000))
+        else:
+            rng_key = random.key(rng_key)
         if use_posterior_predictive:
             posterior_predictive = Predictive(self.model, self.mcmc.get_samples())(
@@ -557,6 +549,15 @@ class Tasccoda(CompositionalModel2):
                 ref_index=ref_index,
                 sample_adata=sample_adata,
             )
+            # Remove problematic posterior predictive arrays with wrong dimensions
+            if posterior_predictive and "counts" in posterior_predictive:
+                counts_shape = posterior_predictive["counts"].shape
+                expected_dims = 2  # ['sample', 'cell_type']
+                if len(counts_shape) != expected_dims:
+                    posterior_predictive = {k: v for k, v in posterior_predictive.items() if k != "counts"}
+                    logger.warning(
+                        f"Removed 'counts' from posterior_predictive due to dimension mismatch: got {len(counts_shape)}D, expected {expected_dims}D"
+                    )
         else:
             posterior_predictive = None
@@ -569,6 +570,15 @@ class Tasccoda(CompositionalModel2):
                 ref_index=ref_index,
                 sample_adata=sample_adata,
             )
+            # Remove problematic prior arrays with wrong dimensions
+            if prior and "counts" in prior:
+                counts_shape = prior["counts"].shape
+                expected_dims = 2  # ['sample', 'cell_type']
+                if len(counts_shape) != expected_dims:
+                    prior = {k: v for k, v in prior.items() if k != "counts"}
+                    logger.warning(
+                        f"Removed 'counts' from prior due to dimension mismatch: got {len(counts_shape)}D, expected {expected_dims}D"
+                    )
         else:
             prior = None
@@ -592,80 +602,88 @@ class Tasccoda(CompositionalModel2):
         *args,
         **kwargs,
     ):
-        """Examples:
-        >>> import pertpy as pt
-        >>> adata = pt.dt.tasccoda_example()
-        >>> tasccoda = pt.tl.Tasccoda()
-        >>> mdata = tasccoda.load(
-        >>>     adata, type="sample_level",
-        >>>     levels_agg=["Major_l1", "Major_l2", "Major_l3", "Major_l4", "Cluster"],
-        >>>     key_added="lineage", add_level_name=True
-        >>> )
-        >>> mdata = tasccoda.prepare(
-        >>>     mdata, formula="Health", reference_cell_type="automatic", tree_key="lineage", pen_args={"phi": 0}
-        >>> )
-        >>> tasccoda.run_nuts(mdata, num_samples=1000, num_warmup=100, rng_key=42).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.tasccoda_example()
+            >>> tasccoda = pt.tl.Tasccoda()
+            >>> mdata = tasccoda.load(
+            >>>     adata, type="sample_level",
+            >>>     levels_agg=["Major_l1", "Major_l2", "Major_l3", "Major_l4", "Cluster"],
+            >>>     key_added="lineage", add_level_name=True
+            >>> )
+            >>> mdata = tasccoda.prepare(
+            >>>     mdata, formula="Health", reference_cell_type="automatic", tree_key="lineage", pen_args={"phi": 0}
+            >>> )
+            >>> tasccoda.run_nuts(mdata, num_samples=1000, num_warmup=100, rng_key=42).
+        """  # noqa: D205, D212
         return super().run_nuts(data, modality_key, num_samples, num_warmup, rng_key, copy, *args, **kwargs)
     run_nuts.__doc__ = CompositionalModel2.run_nuts.__doc__ + run_nuts.__doc__
     def summary(self, data: AnnData | MuData, extended: bool = False, modality_key: str = "coda", *args, **kwargs):
-        """Examples:
-        >>> import pertpy as pt
-        >>> adata = pt.dt.tasccoda_example()
-        >>> tasccoda = pt.tl.Tasccoda()
-        >>> mdata = tasccoda.load(
-        >>>     adata, type="sample_level",
-        >>>     levels_agg=["Major_l1", "Major_l2", "Major_l3", "Major_l4", "Cluster"],
-        >>>     key_added="lineage", add_level_name=True
-        >>> )
-        >>> mdata = tasccoda.prepare(
-        >>>     mdata, formula="Health", reference_cell_type="automatic", tree_key="lineage", pen_args={"phi": 0}
-        >>> )
-        >>> tasccoda.run_nuts(mdata, num_samples=1000, num_warmup=100, rng_key=42)
-        >>> tasccoda.summary(mdata).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.tasccoda_example()
+            >>> tasccoda = pt.tl.Tasccoda()
+            >>> mdata = tasccoda.load(
+            >>>     adata, type="sample_level",
+            >>>     levels_agg=["Major_l1", "Major_l2", "Major_l3", "Major_l4", "Cluster"],
+            >>>     key_added="lineage", add_level_name=True
+            >>> )
+            >>> mdata = tasccoda.prepare(
+            >>>     mdata, formula="Health", reference_cell_type="automatic", tree_key="lineage", pen_args={"phi": 0}
+            >>> )
+            >>> tasccoda.run_nuts(mdata, num_samples=1000, num_warmup=100, rng_key=42)
+            >>> tasccoda.summary(mdata).
+        """  # noqa: D205, D212
         return super().summary(data, extended, modality_key, *args, **kwargs)
     summary.__doc__ = CompositionalModel2.summary.__doc__ + summary.__doc__
     def credible_effects(self, data: AnnData | MuData, modality_key: str = "coda", est_fdr: float = None) -> pd.Series:
-        """Examples:
-        >>> import pertpy as pt
-        >>> adata = pt.dt.tasccoda_example()
-        >>> tasccoda = pt.tl.Tasccoda()
-        >>> mdata = tasccoda.load(
-        >>>     adata, type="sample_level",
-        >>>     levels_agg=["Major_l1", "Major_l2", "Major_l3", "Major_l4", "Cluster"],
-        >>>     key_added="lineage", add_level_name=True
-        >>> )
-        >>> mdata = tasccoda.prepare(
-        >>>     mdata, formula="Health", reference_cell_type="automatic", tree_key="lineage", pen_args={"phi": 0}
-        >>> )
-        >>> tasccoda.run_nuts(mdata, num_samples=1000, num_warmup=100, rng_key=42)
-        >>> tasccoda.credible_effects(mdata).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.tasccoda_example()
+            >>> tasccoda = pt.tl.Tasccoda()
+            >>> mdata = tasccoda.load(
+            >>>     adata, type="sample_level",
+            >>>     levels_agg=["Major_l1", "Major_l2", "Major_l3", "Major_l4", "Cluster"],
+            >>>     key_added="lineage", add_level_name=True
+            >>> )
+            >>> mdata = tasccoda.prepare(
+            >>>     mdata, formula="Health", reference_cell_type="automatic", tree_key="lineage", pen_args={"phi": 0}
+            >>> )
+            >>> tasccoda.run_nuts(mdata, num_samples=1000, num_warmup=100, rng_key=42)
+            >>> tasccoda.credible_effects(mdata).
+        """  # noqa: D205, D212
         return super().credible_effects(data, modality_key, est_fdr)
     credible_effects.__doc__ = CompositionalModel2.credible_effects.__doc__ + credible_effects.__doc__
     def set_fdr(self, data: AnnData | MuData, est_fdr: float, modality_key: str = "coda", *args, **kwargs):
-        """Examples:
-        >>> import pertpy as pt
-        >>> adata = pt.dt.tasccoda_example()
-        >>> tasccoda = pt.tl.Tasccoda()
-        >>> mdata = tasccoda.load(
-        >>>     adata, type="sample_level",
-        >>>     levels_agg=["Major_l1", "Major_l2", "Major_l3", "Major_l4", "Cluster"],
-        >>>     key_added="lineage", add_level_name=True
-        >>> )
-        >>> mdata = tasccoda.prepare(
-        >>>     mdata, formula="Health", reference_cell_type="automatic", tree_key="lineage", pen_args={"phi": 0}
-        >>> )
-        >>> tasccoda.run_nuts(mdata, num_samples=1000, num_warmup=100, rng_key=42)
-        >>> tasccoda.set_fdr(mdata, est_fdr=0.4).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> adata = pt.dt.tasccoda_example()
+            >>> tasccoda = pt.tl.Tasccoda()
+            >>> mdata = tasccoda.load(
+            >>>     adata, type="sample_level",
+            >>>     levels_agg=["Major_l1", "Major_l2", "Major_l3", "Major_l4", "Cluster"],
+            >>>     key_added="lineage", add_level_name=True
+            >>> )
+            >>> mdata = tasccoda.prepare(
+            >>>     mdata, formula="Health", reference_cell_type="automatic", tree_key="lineage", pen_args={"phi": 0}
+            >>> )
+            >>> tasccoda.run_nuts(mdata, num_samples=1000, num_warmup=100, rng_key=42)
+            >>> tasccoda.set_fdr(mdata, est_fdr=0.4).
+        """  # noqa: D205, D212
         return super().set_fdr(data, est_fdr, modality_key, *args, **kwargs)
     set_fdr.__doc__ = CompositionalModel2.set_fdr.__doc__ + set_fdr.__doc__

pertpy/tools/_mixscape.py CHANGED Viewed

@@ -177,7 +177,7 @@ class Mixscape:
     def mixscape(
         self,
         adata: AnnData,
-        labels: str,
+        pert_key: str,
         control: str,
         *,
         new_class_name: str | None = "mixscape_class",
@@ -201,12 +201,12 @@ class Mixscape:
         Args:
             adata: The annotated data object.
-            labels: The column of `.obs` with target gene labels.
+            pert_key: The column of `.obs` with target gene labels.
             control: Control category from the `labels` column.
             new_class_name: Name of mixscape classification to be stored in `.obs`.
             layer: Key from adata.layers whose value will be used to perform tests on. Default is using `.layers["X_pert"]`.
             min_de_genes: Required number of genes that are differentially expressed for method to separate perturbed and non-perturbed cells.
-            logfc_threshold: Limit testing to genes which show, on average, at least X-fold difference (log-scale) between the two groups of cells (default: 0.25).
+            logfc_threshold: Limit testing to genes which show, on average, at least X-fold difference (log-scale) between the two groups of cells.
             de_layer: Layer to use for identifying differentially expressed genes. If `None`, adata.X is used.
             test_method: Method to use for differential expression testing.
             iter_num: Number of normalmixEM iterations to run if convergence does not occur.
@@ -256,7 +256,7 @@ class Mixscape:
             adata=adata,
             split_masks=split_masks,
             categories=categories,
-            labels=labels,
+            pert_key=pert_key,
             control=control,
             layer=de_layer,
             pval_cutoff=pval_cutoff,
@@ -278,7 +278,7 @@ class Mixscape:
         # initialize return variables
         adata.obs[f"{new_class_name}_p_{perturbation_type.lower()}"] = 0
-        adata.obs[new_class_name] = adata.obs[labels].astype(str)
+        adata.obs[new_class_name] = adata.obs[pert_key].astype(str)
         adata.obs[f"{new_class_name}_global"] = np.empty(
             [
                 adata.n_obs,
@@ -290,12 +290,12 @@ class Mixscape:
         adata.obs[f"{new_class_name}_p_{perturbation_type.lower()}"] = 0.0
         for split, split_mask in enumerate(split_masks):
             category = categories[split]
-            gene_targets = list(set(adata[split_mask].obs[labels]).difference([control]))
+            gene_targets = list(set(adata[split_mask].obs[pert_key]).difference([control]))
             for gene in gene_targets:
                 post_prob = 0
-                orig_guide_cells = (adata.obs[labels] == gene) & split_mask
+                orig_guide_cells = (adata.obs[pert_key] == gene) & split_mask
                 orig_guide_cells_index = list(orig_guide_cells.index[orig_guide_cells])
-                nt_cells = (adata.obs[labels] == control) & split_mask
+                nt_cells = (adata.obs[pert_key] == control) & split_mask
                 all_cells = orig_guide_cells | nt_cells
                 if len(perturbation_markers[(category, gene)]) == 0:
@@ -307,7 +307,11 @@ class Mixscape:
                     dat = X[np.asarray(all_cells)][:, de_genes_indices]
                     if scale:
-                        dat = sc.pp.scale(dat)
+                        with warnings.catch_warnings():
+                            warnings.filterwarnings(
+                                "ignore", message="zero-centering a sparse array/matrix densifies it."
+                            )
+                            dat = sc.pp.scale(dat)
                     converged = False
                     n_iter = 0
@@ -335,10 +339,10 @@ class Mixscape:
                         pvec = pd.Series(np.asarray(pvec).flatten(), index=list(all_cells.index[all_cells]))
                         if n_iter == 0:
-                            gv = pd.DataFrame(columns=["pvec", labels])
+                            gv = pd.DataFrame(columns=["pvec", pert_key])
                             gv["pvec"] = pvec
-                            gv[labels] = control
-                            gv.loc[guide_cells, labels] = gene
+                            gv[pert_key] = control
+                            gv.loc[guide_cells, pert_key] = gene
                             if gene not in gv_list:
                                 gv_list[gene] = {}
                             gv_list[gene][category] = gv
@@ -389,7 +393,7 @@ class Mixscape:
     def lda(
         self,
         adata: AnnData,
-        labels: str,
+        pert_key: str,
         control: str,
         *,
         mixscape_class_global: str | None = "mixscape_class_global",
@@ -407,7 +411,7 @@ class Mixscape:
         Args:
             adata: The annotated data object.
-            labels: The column of `.obs` with target gene labels.
+            pert_key: The column of `.obs` with target gene labels.
             control: Control category from the `pert_key` column.
             mixscape_class_global: The column of `.obs` with mixscape global classification result (perturbed, NP or NT).
             layer: Layer to use for identifying differentially expressed genes. If `None`, adata.X is used.
@@ -456,7 +460,7 @@ class Mixscape:
             adata=adata,
             split_masks=split_masks,
             categories=categories,
-            labels=labels,
+            pert_key=pert_key,
             control=control,
             layer=layer,
             pval_cutoff=pval_cutoff,
@@ -475,17 +479,19 @@ class Mixscape:
                 continue
             else:
                 gene_subset = adata_subset[
-                    (adata_subset.obs[labels] == key[1]) | (adata_subset.obs[labels] == control)
+                    (adata_subset.obs[pert_key] == key[1]) | (adata_subset.obs[pert_key] == control)
                 ].copy()
-                sc.pp.scale(gene_subset)
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore", UserWarning)
+                    sc.pp.scale(gene_subset)
                 sc.tl.pca(gene_subset, n_comps=n_comps)
                 # project cells into PCA space of gene_subset
                 projected_pcs[key[1]] = np.asarray(np.dot(X, gene_subset.varm["PCs"]))
         # concatenate all pcs into a single matrix.
         projected_pcs_array = np.concatenate(list(projected_pcs.values()), axis=1)
-        clf = LinearDiscriminantAnalysis(n_components=len(np.unique(adata_subset.obs[labels])) - 1)
-        clf.fit(projected_pcs_array, adata_subset.obs[labels])
+        clf = LinearDiscriminantAnalysis(n_components=len(np.unique(adata_subset.obs[pert_key])) - 1)
+        clf.fit(projected_pcs_array, adata_subset.obs[pert_key])
         cell_embeddings = clf.transform(projected_pcs_array)
         adata.uns["mixscape_lda"] = cell_embeddings
@@ -495,9 +501,10 @@ class Mixscape:
     def _get_perturbation_markers(
         self,
         adata: AnnData,
+        *,
         split_masks: list[np.ndarray],
         categories: list[str],
-        labels: str,
+        pert_key: str,
         control: str,
         layer: str,
         pval_cutoff: float,
@@ -511,7 +518,7 @@ class Mixscape:
             adata: :class:`~anndata.AnnData` object
             split_masks: List of boolean masks for each split/group.
             categories: List of split/group names.
-            labels: The column of `.obs` with target gene labels.
+            pert_key: The column of `.obs` with target gene labels.
             control: Control category from the `labels` column.
             layer: Key from adata.layers whose value will be used to compare gene expression.
             pval_cutoff: P-value cut-off for selection of significantly DE genes.
@@ -526,7 +533,7 @@ class Mixscape:
         for split, split_mask in enumerate(split_masks):
             category = categories[split]
             # get gene sets for each split
-            gene_targets = list(set(adata[split_mask].obs[labels]).difference([control]))
+            gene_targets = list(set(adata[split_mask].obs[pert_key]).difference([control]))
             adata_split = adata[split_mask].copy()
             # find top DE genes between cells with targeting and non-targeting gRNAs
             with warnings.catch_warnings():
@@ -535,7 +542,7 @@ class Mixscape:
                 sc.tl.rank_genes_groups(
                     adata_split,
                     layer=layer,
-                    groupby=labels,
+                    groupby=pert_key,
                     groups=gene_targets,
                     reference=control,
                     method=test_method,
@@ -666,7 +673,7 @@ class Mixscape:
     def plot_heatmap(  # pragma: no cover # noqa: D417
         self,
         adata: AnnData,
-        labels: str,
+        pert_key: str,
         target_gene: str,
         control: str,
         *,
@@ -682,7 +689,7 @@ class Mixscape:
         Args:
             adata: The annotated data object.
-            labels: The column of `.obs` with target gene labels.
+            pert_key: The column of `.obs` with target gene labels.
             target_gene: Target gene name to visualize heatmap for.
             control: Control category from the `pert_key` column.
             layer: Key from `adata.layers` whose value will be used to perform tests on.
@@ -711,12 +718,13 @@ class Mixscape:
         """
         if "mixscape_class" not in adata.obs:
             raise ValueError("Please run `pt.tl.mixscape` first.")
-        adata_subset = adata[(adata.obs[labels] == target_gene) | (adata.obs[labels] == control)].copy()
+        adata_subset = adata[(adata.obs[pert_key] == target_gene) | (adata.obs[pert_key] == control)].copy()
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", RuntimeWarning)
             warnings.simplefilter("ignore", PerformanceWarning)
-            sc.tl.rank_genes_groups(adata_subset, layer=layer, groupby=labels, method=method)
-        sc.pp.scale(adata_subset, max_value=vmax)
+            warnings.simplefilter("ignore", UserWarning)
+            sc.tl.rank_genes_groups(adata_subset, layer=layer, groupby=pert_key, method=method)
+            sc.pp.scale(adata_subset, max_value=vmax)
         sc.pp.subsample(adata_subset, n_obs=subsample_number)
         fig = sc.pl.rank_genes_groups_heatmap(
@@ -739,7 +747,7 @@ class Mixscape:
     def plot_perturbscore(  # pragma: no cover # noqa: D417
         self,
         adata: AnnData,
-        labels: str,
+        pert_key: str,
         target_gene: str,
         *,
         mixscape_class: str = "mixscape_class",
@@ -758,7 +766,7 @@ class Mixscape:
         Args:
             adata: The annotated data object.
-            labels: The column of `.obs` with target gene labels.
+            pert_key: The column of `.obs` with target gene labels.
             target_gene: Target gene name to visualize perturbation scores for.
             mixscape_class: The column of `.obs` with mixscape classifications.
             color: Specify color of target gene class or knockout cell class. For control non-targeting and non-perturbed cells, colors are set to different shades of grey.
@@ -797,21 +805,21 @@ class Mixscape:
             else:
                 perturbation_score = pd.concat([perturbation_score, perturbation_score_temp])
         perturbation_score["mix"] = adata.obs[mixscape_class][perturbation_score.index]
-        gd = list(set(perturbation_score[labels]).difference({target_gene}))[0]
+        gd = list(set(perturbation_score[pert_key]).difference({target_gene}))[0]
         # If before_mixscape is True, split densities based on original target gene classification
         if before_mixscape is True:
             palette = {gd: "#7d7d7d", target_gene: color}
-            plot_dens = sns.kdeplot(data=perturbation_score, x="pvec", hue=labels, fill=False, common_norm=False)
+            plot_dens = sns.kdeplot(data=perturbation_score, x="pvec", hue=pert_key, fill=False, common_norm=False)
             top_r = max(plot_dens.get_lines()[cond].get_data()[1].max() for cond in range(len(plot_dens.get_lines())))
             plt.close()
             perturbation_score["y_jitter"] = perturbation_score["pvec"]
             rng = np.random.default_rng()
-            perturbation_score.loc[perturbation_score[labels] == gd, "y_jitter"] = rng.uniform(
-                low=0.001, high=top_r / 10, size=sum(perturbation_score[labels] == gd)
+            perturbation_score.loc[perturbation_score[pert_key] == gd, "y_jitter"] = rng.uniform(
+                low=0.001, high=top_r / 10, size=sum(perturbation_score[pert_key] == gd)
             )
-            perturbation_score.loc[perturbation_score[labels] == target_gene, "y_jitter"] = rng.uniform(
-                low=-top_r / 10, high=0, size=sum(perturbation_score[labels] == target_gene)
+            perturbation_score.loc[perturbation_score[pert_key] == target_gene, "y_jitter"] = rng.uniform(
+                low=-top_r / 10, high=0, size=sum(perturbation_score[pert_key] == target_gene)
             )
             # If split_by is provided, split densities based on the split_by
             if split_by is not None:
@@ -844,7 +852,7 @@ class Mixscape:
         else:
             if palette is None:
                 palette = {gd: "#7d7d7d", f"{target_gene} NP": "#c9c9c9", f"{target_gene} {perturbation_type}": color}
-            plot_dens = sns.kdeplot(data=perturbation_score, x="pvec", hue=labels, fill=False, common_norm=False)
+            plot_dens = sns.kdeplot(data=perturbation_score, x="pvec", hue=pert_key, fill=False, common_norm=False)
             top_r = max(plot_dens.get_lines()[i].get_data()[1].max() for i in range(len(plot_dens.get_lines())))
             plt.close()
             perturbation_score["y_jitter"] = perturbation_score["pvec"]
@@ -899,6 +907,7 @@ class Mixscape:
         if return_fig:
             return plt.gcf()
         plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1058,7 +1067,7 @@ class Mixscape:
                     data=obs_tidy,
                     order=order,
                     orient="vertical",
-                    scale=scale,
+                    density_norm=scale,
                     ax=ax,
                     hue=hue,
                     **kwargs,
@@ -1072,7 +1081,7 @@ class Mixscape:
                         data=obs_tidy,
                         order=order,
                         jitter=jitter,
-                        color="black",
+                        palette="dark:black",
                         size=size,
                         ax=ax,
                         hue=hue,

pertpy/tools/_perturbation_space/_comparison.py CHANGED Viewed

@@ -22,7 +22,7 @@ class PerturbationComparison:
     ) -> float:
         """Compare classification accuracy between real and simulated perturbations.
-        Trains a classifier on the real perturbation data + the control data and reports a normalized
+        Trains a classifier on the real perturbation data & the control data and reports a normalized
         classification accuracy on the simulated perturbation.
         Args:
@@ -64,8 +64,8 @@ class PerturbationComparison:
             real: Real perturbed data.
             simulated: Simulated perturbed data.
             control: Control data
-            use_simulated_for_knn: Include simulted perturbed data (`simulated`) into the knn graph. Only valid when
-                control (`control`) is provided.
+            use_simulated_for_knn: Include simulted perturbed data (`simulated`) into the knn graph.
+                Only valid when control (`control`) is provided.
             n_neighbors: Number of neighbors to use in k-neighbor graph.
             random_state: Random state used for k-neighbor graph construction.
             n_jobs: Number of cores to use. Defaults to -1 (all).

pertpy 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

pertpy 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl