PyPI - pertpy - Versions diffs - 0.11.4__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

pertpy 0.11.4py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

pertpy/__init__.py +4 -1
pertpy/tools/_coda/_base_coda.py +1 -1
pertpy/tools/_distances/_distances.py +3 -2
pertpy/tools/_milo.py +138 -51
pertpy/tools/_mixscape.py +42 -39
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +1 -1
pertpy/tools/_perturbation_space/_perturbation_space.py +1 -1
pertpy/tools/_scgen/_scgen.py +2 -1
{pertpy-0.11.4.dist-info → pertpy-1.0.0.dist-info}/METADATA +15 -2
{pertpy-0.11.4.dist-info → pertpy-1.0.0.dist-info}/RECORD +12 -12
{pertpy-0.11.4.dist-info → pertpy-1.0.0.dist-info}/WHEEL +0 -0
{pertpy-0.11.4.dist-info → pertpy-1.0.0.dist-info}/licenses/LICENSE +0 -0

pertpy/__init__.py CHANGED Viewed

@@ -2,10 +2,11 @@
 __author__ = "Lukas Heumos"
 __email__ = "lukas.heumos@posteo.net"
-__version__ = "0.11.4"
+__version__ = "1.0.0"
 import warnings
+from anndata._core.aligned_df import ImplicitModificationWarning
 from matplotlib import MatplotlibDeprecationWarning
 from numba import NumbaDeprecationWarning
@@ -13,6 +14,8 @@ warnings.filterwarnings("ignore", category=NumbaDeprecationWarning)
 warnings.filterwarnings("ignore", category=MatplotlibDeprecationWarning)
 warnings.filterwarnings("ignore", category=SyntaxWarning)
 warnings.filterwarnings("ignore", category=UserWarning, module="scvi._settings")
+warnings.filterwarnings("ignore", message="Environment variable.*redefined by R")
+warnings.filterwarnings("ignore", message="Transforming to str index.", category=ImplicitModificationWarning)
 import mudata

pertpy/tools/_coda/_base_coda.py CHANGED Viewed

@@ -1538,7 +1538,7 @@ class CompositionalModel2(ABC):
         if isinstance(data, MuData):
             data = data[modality_key]
         if isinstance(palette, Colormap):
-            palette = palette(range(2))
+            palette = list(palette(range(len(data.obs[feature_name].unique()))))
         # y scale transformations
         if y_scale == "relative":

pertpy/tools/_distances/_distances.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Literal, NamedTuple
+import jax
 import numpy as np
 import pandas as pd
 from numba import jit
@@ -685,6 +686,7 @@ class WassersteinDistance(AbstractDistance):
     def __init__(self) -> None:
         super().__init__()
         self.accepts_precomputed = False
+        self.solver = jax.jit(Sinkhorn())
     def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
         X = np.asarray(X, dtype=np.float64)
@@ -699,8 +701,7 @@ class WassersteinDistance(AbstractDistance):
     def solve_ot_problem(self, geom: Geometry, **kwargs):
         ot_prob = LinearProblem(geom)
-        solver = Sinkhorn()
-        ot = solver(ot_prob, **kwargs)
+        ot = self.solver(ot_prob, **kwargs)
         cost = float(ot.reg_ot_cost)
         # Check for NaN or invalid cost

pertpy/tools/_milo.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import random
 import re
+from importlib.util import find_spec
 from typing import TYPE_CHECKING, Literal
 import matplotlib.pyplot as plt
@@ -29,18 +30,6 @@ from sklearn.metrics.pairwise import euclidean_distances
 class Milo:
     """Python implementation of Milo."""
-    def __init__(self):
-        try:
-            from rpy2.robjects import conversion, numpy2ri, pandas2ri
-            from rpy2.robjects.packages import STAP, PackageNotInstalledError, importr
-        except ModuleNotFoundError:
-            raise ImportError("milo requires rpy2 to be installed.") from None
-        try:
-            importr("edgeR")
-        except ImportError as e:
-            raise ImportError("milo requires a valid R installation with edger installed:\n") from e
     def load(
         self,
         input: AnnData,
@@ -266,7 +255,7 @@ class Milo:
         subset_samples: list[str] | None = None,
         add_intercept: bool = True,
         feature_key: str | None = "rna",
-        solver: Literal["edger", "batchglm"] = "edger",
+        solver: Literal["edger", "pydeseq2"] = "edger",
     ):
         """Performs differential abundance testing on neighbourhoods using QLF test implementation as implemented in edgeR.
@@ -279,7 +268,9 @@ class Milo:
             subset_samples: subset of samples (obs in `milo_mdata['milo']`) to use for the test.
             add_intercept: whether to include an intercept in the model. If False, this is equivalent to adding + 0 in the design formula. When model_contrasts is specified, this is set to False by default.
             feature_key: If input data is MuData, specify key to cell-level AnnData object.
-            solver: The solver to fit the model to. One of "edger" (requires R, rpy2 and edgeR to be installed) or "batchglm"
+            solver: The solver to fit the model to.
+                The "edger" solver requires R, rpy2 and edgeR to be installed and is the closest to the R implementation.
+                The "pydeseq2" requires pydeseq2 to be installed. It is still very comparable to the "edger" solver but might be a bit slower.
         Returns:
             None, modifies `milo_mdata['milo']` in place, adding the results of the DA test to `.var`:
@@ -298,7 +289,6 @@ class Milo:
             >>> milo.make_nhoods(mdata["rna"])
             >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
             >>> milo.da_nhoods(mdata, design="~label")
         """
         try:
             sample_adata = mdata["milo"]
@@ -364,19 +354,32 @@ class Milo:
             # Set up rpy2 to run edgeR
             edgeR, limma, stats, base = self._setup_rpy2()
+            import rpy2.robjects as ro
+            from rpy2.robjects import numpy2ri, pandas2ri
+            from rpy2.robjects.conversion import localconverter
+            from rpy2.robjects.vectors import FloatVector
             # Define model matrix
             if not add_intercept or model_contrasts is not None:
                 design = design + " + 0"
-            model = stats.model_matrix(object=stats.formula(design), data=design_df)
+            design_df = design_df.astype(dict.fromkeys(design_df.select_dtypes(exclude=["number"]).columns, "category"))
+            with localconverter(ro.default_converter + pandas2ri.converter):
+                design_r = pandas2ri.py2rpy(design_df)
+            formula_r = stats.formula(design)
+            model = stats.model_matrix(object=formula_r, data=design_r)
             # Fit NB-GLM
-            dge = edgeR.DGEList(counts=count_mat[keep_nhoods, :][:, keep_smp], lib_size=lib_size[keep_smp])
+            counts_filtered = count_mat[np.ix_(keep_nhoods, keep_smp)]
+            lib_size_filtered = lib_size[keep_smp]
+            count_mat_r = numpy2ri.py2rpy(counts_filtered)
+            lib_size_r = FloatVector(lib_size_filtered)
+            dge = edgeR.DGEList(counts=count_mat_r, lib_size=lib_size_r)
             dge = edgeR.calcNormFactors(dge, method="TMM")
             dge = edgeR.estimateDisp(dge, model)
             fit = edgeR.glmQLFit(dge, model, robust=True)
             # Test
-            n_coef = model.shape[1]
+            model_np = np.array(model)
+            n_coef = model_np.shape[1]
             if model_contrasts is not None:
                 r_str = """
                 get_model_cols <- function(design_df, design){
@@ -387,34 +390,90 @@ class Milo:
                 from rpy2.robjects.packages import STAP
                 get_model_cols = STAP(r_str, "get_model_cols")
-                model_mat_cols = get_model_cols.get_model_cols(design_df, design)
-                model_df = pd.DataFrame(model)
+                with localconverter(ro.default_converter + numpy2ri.converter + pandas2ri.converter):
+                    model_mat_cols = get_model_cols.get_model_cols(design_df, design)
+                with localconverter(ro.default_converter + pandas2ri.converter + numpy2ri.converter):
+                    model_df = pandas2ri.rpy2py(model)
+                model_df = pd.DataFrame(model_df)
                 model_df.columns = model_mat_cols
                 try:
-                    mod_contrast = limma.makeContrasts(contrasts=model_contrasts, levels=model_df)
+                    with localconverter(ro.default_converter + pandas2ri.converter):
+                        mod_contrast = limma.makeContrasts(contrasts=model_contrasts, levels=model_df)
                 except ValueError:
                     logger.error("Model contrasts must be in the form 'A-B' or 'A+B'")
                     raise
-                res = base.as_data_frame(
-                    edgeR.topTags(edgeR.glmQLFTest(fit, contrast=mod_contrast), sort_by="none", n=np.inf)
-                )
+                with localconverter(ro.default_converter + pandas2ri.converter + numpy2ri.converter):
+                    res = base.as_data_frame(
+                        edgeR.topTags(edgeR.glmQLFTest(fit, contrast=mod_contrast), sort_by="none", n=np.inf)
+                    )
             else:
-                res = base.as_data_frame(edgeR.topTags(edgeR.glmQLFTest(fit, coef=n_coef), sort_by="none", n=np.inf))
-            from rpy2.robjects import conversion
-            res = conversion.rpy2py(res)
+                with localconverter(ro.default_converter + numpy2ri.converter + pandas2ri.converter):
+                    res = base.as_data_frame(
+                        edgeR.topTags(edgeR.glmQLFTest(fit, coef=n_coef), sort_by="none", n=np.inf)
+                    )
             if not isinstance(res, pd.DataFrame):
                 res = pd.DataFrame(res)
+            # The columns of res looks like e.g. table.A, table.B, so remove the prefix
+            res.columns = [col.replace("table.", "") for col in res.columns]
+        elif solver == "pydeseq2":
+            if find_spec("pydeseq2") is None:
+                raise ImportError("pydeseq2 is required but not installed. Install with: pip install pydeseq2")
+            from pydeseq2.dds import DeseqDataSet
+            from pydeseq2.ds import DeseqStats
+            counts_filtered = count_mat[np.ix_(keep_nhoods, keep_smp)]
+            design_df_filtered = design_df.iloc[keep_smp].copy()
+            design_df_filtered = design_df_filtered.astype(
+                dict.fromkeys(design_df_filtered.select_dtypes(exclude=["number"]).columns, "category")
+            )
+            design_clean = design if design.startswith("~") else f"~{design}"
+            dds = DeseqDataSet(
+                counts=pd.DataFrame(counts_filtered.T, index=design_df_filtered.index),
+                metadata=design_df_filtered,
+                design=design_clean,
+                refit_cooks=True,
+            )
+            dds.deseq2()
+            if model_contrasts is not None and "-" in model_contrasts:
+                if "(" in model_contrasts or "+" in model_contrasts.split("-")[1]:
+                    raise ValueError(
+                        f"Complex contrasts like '{model_contrasts}' are not supported by pydeseq2. "
+                        "Use simple pairwise contrasts (e.g., 'GroupA-GroupB') or switch to solver='edger'."
+                    )
+                parts = model_contrasts.split("-")
+                factor_name = design_clean.replace("~", "").split("+")[-1].strip()
+                group1 = parts[0].replace(factor_name, "").strip()
+                group2 = parts[1].replace(factor_name, "").strip()
+                stat_res = DeseqStats(dds, contrast=[factor_name, group1, group2])
+            else:
+                factor_name = design_clean.replace("~", "").split("+")[-1].strip()
+                if not isinstance(design_df_filtered[factor_name], pd.CategoricalDtype):
+                    design_df_filtered[factor_name] = design_df_filtered[factor_name].astype("category")
+                categories = design_df_filtered[factor_name].cat.categories
+                stat_res = DeseqStats(dds, contrast=[factor_name, categories[-1], categories[0]])
+            stat_res.summary()
+            res = stat_res.results_df
+            res = res.rename(
+                columns={"baseMean": "logCPM", "log2FoldChange": "logFC", "pvalue": "PValue", "padj": "FDR"}
+            )
+            res = res[["logCPM", "logFC", "PValue", "FDR"]]
-        # Save outputs
         res.index = sample_adata.var_names[keep_nhoods]  # type: ignore
         if any(col in sample_adata.var.columns for col in res.columns):
             sample_adata.var = sample_adata.var.drop(res.columns, axis=1)
         sample_adata.var = pd.concat([sample_adata.var, res], axis=1)
-        # Run Graph spatial FDR correction
-        self._graph_spatial_fdr(sample_adata, neighbors_key=adata.uns["nhood_neighbors_key"])
+        self._graph_spatial_fdr(sample_adata)
     def annotate_nhoods(
         self,
@@ -657,11 +716,19 @@ class Milo:
         self,
     ):
         """Set up rpy2 to run edgeR."""
-        from rpy2.robjects import numpy2ri, pandas2ri
+        try:
+            from rpy2.robjects import conversion, numpy2ri, pandas2ri
+            from rpy2.robjects.packages import STAP, PackageNotInstalledError, importr
+        except ModuleNotFoundError:
+            raise ImportError("milo requires rpy2 to be installed.") from None
+        try:
+            importr("edgeR")
+        except ImportError as e:
+            raise ImportError("milo requires a valid R installation with edger installed.") from e
         from rpy2.robjects.packages import importr
-        numpy2ri.activate()
-        pandas2ri.activate()
         edgeR = self._try_import_bioc_library("edgeR")
         limma = self._try_import_bioc_library("limma")
         stats = importr("stats")
@@ -671,26 +738,27 @@ class Milo:
     def _try_import_bioc_library(
         self,
-        name: str,
+        r_package: str,
     ):
         """Import R packages.
         Args:
-            name (str): R packages name
+            r_package: R packages name
         """
         from rpy2.robjects.packages import PackageNotInstalledError, importr
         try:
-            _r_lib = importr(name)
+            _r_lib = importr(r_package)
             return _r_lib
         except PackageNotInstalledError:
-            logger.error(f"Install Bioconductor library `{name!r}` first as `BiocManager::install({name!r}).`")
+            logger.error(
+                f"Install Bioconductor library `{r_package!r}` first as `BiocManager::install({r_package!r}).`"
+            )
             raise
     def _graph_spatial_fdr(
         self,
         sample_adata: AnnData,
-        neighbors_key: str | None = None,
     ):
         """FDR correction weighted on inverse of connectivity of neighbourhoods.
@@ -698,7 +766,6 @@ class Milo:
         Args:
             sample_adata: Sample-level AnnData.
-            neighbors_key: The key in `adata.obsp` to use as KNN graph.
         """
         # use 1/connectivity as the weighting for the weighted BH adjustment from Cydar
         w = 1 / sample_adata.var["kth_distance"]
@@ -1007,6 +1074,8 @@ class Milo:
         subset_nhoods: list[str] = None,
         log_counts: bool = False,
         return_fig: bool = False,
+        ax=None,
+        show: bool = True,
     ) -> Figure | None:
         """Plot boxplot of cell numbers vs condition of interest.
@@ -1036,18 +1105,36 @@ class Milo:
         pl_df = pd.merge(pl_df, nhood_adata.var)
         pl_df["log_n_cells"] = np.log1p(pl_df["n_cells"])
         if not log_counts:
-            sns.boxplot(data=pl_df, x=test_var, y="n_cells", color="lightblue")
-            sns.stripplot(data=pl_df, x=test_var, y="n_cells", color="black", s=3)
-            plt.ylabel("# cells")
+            sns.boxplot(data=pl_df, x=test_var, y="n_cells", color="lightblue", ax=ax)
+            sns.stripplot(data=pl_df, x=test_var, y="n_cells", color="black", s=3, ax=ax)
+            if ax:
+                ax.set_ylabel("# cells")
+            else:
+                plt.ylabel("# cells")
+        else:
+            sns.boxplot(data=pl_df, x=test_var, y="log_n_cells", color="lightblue", ax=ax)
+            sns.stripplot(data=pl_df, x=test_var, y="log_n_cells", color="black", s=3, ax=ax)
+            if ax:
+                ax.set_ylabel("log(# cells + 1)")
+            else:
+                plt.ylabel("log(# cells + 1)")
+        if ax:
+            ax.tick_params(axis="x", rotation=90)
+            ax.set_xlabel(test_var)
         else:
-            sns.boxplot(data=pl_df, x=test_var, y="log_n_cells", color="lightblue")
-            sns.stripplot(data=pl_df, x=test_var, y="log_n_cells", color="black", s=3)
-            plt.ylabel("log(# cells + 1)")
+            plt.xticks(rotation=90)
+            plt.xlabel(test_var)
-        plt.xticks(rotation=90)
-        plt.xlabel(test_var)
+        if return_fig:
+            return plt.gcf()
+        if ax is None:
+            plt.show()
         if return_fig:
             return plt.gcf()
-        plt.show()
+        if show:
+            plt.show()
         return None

pertpy/tools/_mixscape.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import copy
+import warnings
 from collections import OrderedDict
 from typing import TYPE_CHECKING, Literal
@@ -10,11 +11,12 @@ import pandas as pd
 import scanpy as sc
 import seaborn as sns
 from fast_array_utils.stats import mean, mean_var
+from pandas.errors import PerformanceWarning
 from scanpy import get
 from scanpy._utils import _check_use_raw, sanitize_anndata
 from scanpy.plotting import _utils
 from scanpy.tools._utils import _choose_representation
-from scipy.sparse import csr_matrix, spmatrix
+from scipy.sparse import csr_matrix, issparse, spmatrix
 from sklearn.mixture import GaussianMixture
 from pertpy._doc import _doc_params, doc_common_plot_args
@@ -103,6 +105,9 @@ class Mixscape:
         adata.layers["X_pert"] = adata.X.copy()
+        # Work with LIL for efficient indexing but don't store it in AnnData as LIL is not supported anymore
+        X_pert_lil = adata.layers["X_pert"].tolil() if issparse(adata.layers["X_pert"]) else adata.layers["X_pert"]
         control_mask = adata.obs[pert_key] == control
         if ref_selection_mode == "split_by":
@@ -110,9 +115,8 @@ class Mixscape:
                 split_mask = adata.obs[split_by] == split
                 control_mask_group = control_mask & split_mask
                 control_mean_expr = mean(adata.X[control_mask_group], axis=0)
-                adata.layers["X_pert"][split_mask] = (
-                    np.repeat(control_mean_expr.reshape(1, -1), split_mask.sum(), axis=0)
-                    - adata.layers["X_pert"][split_mask]
+                X_pert_lil[split_mask] = (
+                    np.repeat(control_mean_expr.reshape(1, -1), split_mask.sum(), axis=0) - X_pert_lil[split_mask]
                 )
         else:
             if split_by is None:
@@ -129,49 +133,43 @@ class Mixscape:
             for split_mask in split_masks:
                 control_mask_split = control_mask & split_mask
                 R_split = representation[split_mask]
                 R_control = representation[np.asarray(control_mask_split)]
                 eps = kwargs.pop("epsilon", 0.1)
                 nn_index = NNDescent(R_control, **kwargs)
                 indices, _ = nn_index.query(R_split, k=n_neighbors, epsilon=eps)
                 X_control = np.expm1(adata.X[np.asarray(control_mask_split)])
                 n_split = split_mask.sum()
                 n_control = X_control.shape[0]
                 if batch_size is None:
                     col_indices = np.ravel(indices)
                     row_indices = np.repeat(np.arange(n_split), n_neighbors)
                     neigh_matrix = csr_matrix(
                         (np.ones_like(col_indices, dtype=np.float64), (row_indices, col_indices)),
                         shape=(n_split, n_control),
                     )
                     neigh_matrix /= n_neighbors
-                    adata.layers["X_pert"][np.asarray(split_mask)] = (
-                        sc.pp.log1p(neigh_matrix @ X_control) - adata.layers["X_pert"][np.asarray(split_mask)]
+                    X_pert_lil[np.asarray(split_mask)] = (
+                        sc.pp.log1p(neigh_matrix @ X_control) - X_pert_lil[np.asarray(split_mask)]
                     )
                 else:
                     split_indices = np.where(split_mask)[0]
                     for i in range(0, n_split, batch_size):
                         size = min(i + batch_size, n_split)
                         select = slice(i, size)
                         batch = np.ravel(indices[select])
                         split_batch = split_indices[select]
                         size = size - i
                         means_batch = X_control[batch]
                         batch_reshaped = means_batch.reshape(size, n_neighbors, -1)
                         means_batch, _ = mean_var(batch_reshaped, axis=1)
+                        X_pert_lil[split_batch] = np.log1p(means_batch) - X_pert_lil[split_batch]
-                        adata.layers["X_pert"][split_batch] = (
-                            np.log1p(means_batch) - adata.layers["X_pert"][split_batch]
-                        )
+        if issparse(X_pert_lil):
+            adata.layers["X_pert"] = X_pert_lil.tocsr()
+        else:
+            adata.layers["X_pert"] = X_pert_lil
         if copy:
             return adata
@@ -531,26 +529,29 @@ class Mixscape:
             gene_targets = list(set(adata[split_mask].obs[labels]).difference([control]))
             adata_split = adata[split_mask].copy()
             # find top DE genes between cells with targeting and non-targeting gRNAs
-            sc.tl.rank_genes_groups(
-                adata_split,
-                layer=layer,
-                groupby=labels,
-                groups=gene_targets,
-                reference=control,
-                method=test_method,
-                use_raw=False,
-            )
-            # get DE genes for each target gene
-            for gene in gene_targets:
-                logfc_threshold_mask = (
-                    np.abs(adata_split.uns["rank_genes_groups"]["logfoldchanges"][gene]) >= logfc_threshold
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", RuntimeWarning)
+                warnings.simplefilter("ignore", PerformanceWarning)
+                sc.tl.rank_genes_groups(
+                    adata_split,
+                    layer=layer,
+                    groupby=labels,
+                    groups=gene_targets,
+                    reference=control,
+                    method=test_method,
+                    use_raw=False,
                 )
-                de_genes = adata_split.uns["rank_genes_groups"]["names"][gene][logfc_threshold_mask]
-                pvals_adj = adata_split.uns["rank_genes_groups"]["pvals_adj"][gene][logfc_threshold_mask]
-                de_genes = de_genes[pvals_adj < pval_cutoff]
-                if len(de_genes) < min_de_genes:
-                    de_genes = np.array([])
-                perturbation_markers[(category, gene)] = de_genes
+                # get DE genes for each target gene
+                for gene in gene_targets:
+                    logfc_threshold_mask = (
+                        np.abs(adata_split.uns["rank_genes_groups"]["logfoldchanges"][gene]) >= logfc_threshold
+                    )
+                    de_genes = adata_split.uns["rank_genes_groups"]["names"][gene][logfc_threshold_mask]
+                    pvals_adj = adata_split.uns["rank_genes_groups"]["pvals_adj"][gene][logfc_threshold_mask]
+                    de_genes = de_genes[pvals_adj < pval_cutoff]
+                    if len(de_genes) < min_de_genes:
+                        de_genes = np.array([])
+                    perturbation_markers[(category, gene)] = de_genes
         return perturbation_markers
@@ -711,7 +712,10 @@ class Mixscape:
         if "mixscape_class" not in adata.obs:
             raise ValueError("Please run `pt.tl.mixscape` first.")
         adata_subset = adata[(adata.obs[labels] == target_gene) | (adata.obs[labels] == control)].copy()
-        sc.tl.rank_genes_groups(adata_subset, layer=layer, groupby=labels, method=method)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", RuntimeWarning)
+            warnings.simplefilter("ignore", PerformanceWarning)
+            sc.tl.rank_genes_groups(adata_subset, layer=layer, groupby=labels, method=method)
         sc.pp.scale(adata_subset, max_value=vmax)
         sc.pp.subsample(adata_subset, n_obs=subsample_number)
@@ -998,8 +1002,7 @@ class Mixscape:
             ys = keys
         if multi_panel and groupby is None and len(ys) == 1:
-            # This is a quick and dirty way for adapting scales across several
-            # keys if groupby is None.
+            # This is a quick and dirty way for adapting scales across several keys if groupby is None.
             y = ys[0]
             g = sns.catplot(

pertpy/tools/_perturbation_space/_discriminator_classifiers.py CHANGED Viewed

@@ -226,7 +226,7 @@ class MLPClassifierSpace(PerturbationSpace):
         # Fix class unbalance (likely to happen in perturbation datasets)
         # Usually control cells are overrepresented such that predicting control all time would give good results
         # Cells with rare perturbations are sampled more
-        train_weights = 1 / (1 + torch.sum(torch.tensor(train_dataset.labels), dim=1))
+        train_weights = 1 / (1 + torch.sum(torch.tensor(train_dataset.labels.to_list()), dim=1))
         train_sampler = WeightedRandomSampler(train_weights, len(train_weights))
         self.train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=4)

pertpy/tools/_perturbation_space/_perturbation_space.py CHANGED Viewed

@@ -80,7 +80,7 @@ class PerturbationSpace:
         group_masks = (
             [(adata.obs[group_col] == sample) for sample in adata.obs[group_col].unique()]
             if group_col
-            else [[True] * adata.n_obs]
+            else [np.array([True] * adata.n_obs)]
         )
         if layer_key:

pertpy/tools/_scgen/_scgen.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Any
+import anndata as ad
 import jax.numpy as jnp
 import matplotlib.pyplot as plt
 import numpy as np
@@ -248,7 +249,7 @@ class Scgen(JaxTrainingMixin, BaseModelClass):
                 temp_cell[batch_ind[study]].X = batch_list[study].X
             shared_ct.append(temp_cell)
-        all_shared_ann = AnnData.concatenate(*shared_ct, batch_key="concat_batch", index_unique=None)
+        all_shared_ann = ad.concat(shared_ct, label="concat_batch", index_unique=None)
         if "concat_batch" in all_shared_ann.obs.columns:
             del all_shared_ann.obs["concat_batch"]
         if len(not_shared_ct) < 1:

{pertpy-0.11.4.dist-info → pertpy-1.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pertpy
-Version: 0.11.4
+Version: 1.0.0
 Summary: Perturbation Analysis in the scverse ecosystem.
 Project-URL: Documentation, https://pertpy.readthedocs.io
 Project-URL: Source, https://github.com/scverse/pertpy
@@ -131,6 +131,12 @@ You can install _pertpy_ in less than a minute via [pip] from [PyPI]:
 pip install pertpy
 ```
+or [conda-forge]:
+```console
+conda install -c conda-forge pertpy
+```
 ### Differential gene expression
 If you want to use the differential gene expression interface, please install pertpy by running:
@@ -149,7 +155,13 @@ pip install 'pertpy[tcoda]'
 ### milo
-milo further requires edger, statmod, and rpy2 to be installed:
+milo requires either the "de" extra for the "pydeseq2" solver:
+```console
+pip install 'pertpy[de]'
+```
+or, edger, statmod, and rpy2 for the "edger" solver:
 ```R
 BiocManager::install("edgeR")
@@ -179,6 +191,7 @@ pip install rpy2
 [pip]: https://pip.pypa.io/
 [pypi]: https://pypi.org/
 [api]: https://pertpy.readthedocs.io/en/latest/api.html
+[conda-forge]: https://anaconda.org/conda-forge/pertpy
 [//]: # "numfocus-fiscal-sponsor-attribution"
 pertpy is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/).

{pertpy-0.11.4.dist-info → pertpy-1.0.0.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-pertpy/__init__.py,sha256=fJegZfFrqw0e5er2WVo0NzDOgeJ7DZD9M_rflPLoizQ,716
+pertpy/__init__.py,sha256=cZHJ7PIOhtLkxJMlHbJ2rzei5xhLB4vg0c8AaIShfzc,972
 pertpy/_doc.py,sha256=j5TMNC-DA9yIMqIIUNpjpcVgWfRqyBBfvbRjnCM_OLs,427
 pertpy/_types.py,sha256=IcHCojCUqx8CapibNkcYf2TUqjBFP2ujeELvn_IBSBQ,154
 pertpy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -21,12 +21,12 @@ pertpy/tools/_augur.py,sha256=tc1YKyc0BwzrEGgctsfyy7DsTNKxyvy7ZvWraTWCc1A,55262
 pertpy/tools/_cinemaot.py,sha256=54-rS0AEj31dMe7iU4kEmLoAunq3jNuhsBE3IEp9hrI,38071
 pertpy/tools/_dialogue.py,sha256=mygIZm5i_bnEE37TTQtr1efl_KJq-ejzeL3V1Bmr7Pg,52354
 pertpy/tools/_enrichment.py,sha256=55mwotLH9DXQOhl85MCkxXu-MX0RysLyrPheJysAnF0,21369
-pertpy/tools/_milo.py,sha256=r-kZcpAcoQuhi41AnVuzh-cMIcV3HB3-RGzynHyDc1A,43712
-pertpy/tools/_mixscape.py,sha256=qjXGyH-oeBFte0efuHJfhVEbivnzUVWREwC40ef6Se8,57203
+pertpy/tools/_milo.py,sha256=9yoB9gkBNujqYDTKOlH2v3wiWhs5PdCuB8RgZ3xVI0Y,48049
+pertpy/tools/_mixscape.py,sha256=HfrpBeRlxHXaOpZkF2FmX7dg35kUB1rL0_-n2aSi2_0,57905
 pertpy/tools/decoupler_LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
 pertpy/tools/transferlearning_MMD_LICENSE,sha256=MUvDA-o_j9htRpI8fStVdCRuyLdPkQUuIH0a_EIc57w,1069
 pertpy/tools/_coda/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pertpy/tools/_coda/_base_coda.py,sha256=aw_aSB_NIUL0yQw2t-MUysxoXt1xdUDLK-pItRGUW3s,111703
+pertpy/tools/_coda/_base_coda.py,sha256=NjKIQBtTIUENnRmeIC2O8cMdU_9DKaJ5_AHPvFnc8XQ,111744
 pertpy/tools/_coda/_sccoda.py,sha256=0Ret6O56kAfCNOdBvtxqiyuj2rUPp18SV1GVK1AvYGU,22607
 pertpy/tools/_coda/_tasccoda.py,sha256=BTaOAmL458zQ_og3x4ENlDnJHD6_F4YkdCoXWsF4i1U,30465
 pertpy/tools/_differential_gene_expression/__init__.py,sha256=SEydWg0iT3Y1pApjnCAOuHxFeI6xVUfgyBHv2s3LADU,487
@@ -39,20 +39,20 @@ pertpy/tools/_differential_gene_expression/_simple_tests.py,sha256=SfU8s_P2JzEA1
 pertpy/tools/_differential_gene_expression/_statsmodels.py,sha256=90h9EPuoCtNxAbJ1Xq4j_E4yYJJpk64zTP7GyTdmrxY,2220
 pertpy/tools/_distances/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pertpy/tools/_distances/_distance_tests.py,sha256=6_nqfHUfKxkI2Yhkzspq3ujMpq56zV_Ddn7bgPzgjyo,13513
-pertpy/tools/_distances/_distances.py,sha256=89d1zShW_9dhphup2oWx5hMOFC7RdogOY56doMuBFts,50473
+pertpy/tools/_distances/_distances.py,sha256=_XbVU8dlYt_Jl2thYPUWg7HT6OXVe-Ki6qthF566sqQ,50503
 pertpy/tools/_perturbation_space/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pertpy/tools/_perturbation_space/_clustering.py,sha256=pNx_SpPkZfCbgF7vzHWqAaiiHdbxPaA-L-hTWTbzFhI,3528
 pertpy/tools/_perturbation_space/_comparison.py,sha256=-NzCPRT-IlhJ9hOz7NQLSk0riIzr2C0yZvX6zm3kon4,4291
-pertpy/tools/_perturbation_space/_discriminator_classifiers.py,sha256=gDid9Z1_AAPHPWuNgAkbP7yrgcC0qjjqTuWjTzTAAZo,23373
+pertpy/tools/_perturbation_space/_discriminator_classifiers.py,sha256=a53-YmUwDHQBCT7ZWe_RH7PZsGXvoSHmJaQyL0CBJng,23383
 pertpy/tools/_perturbation_space/_metrics.py,sha256=y8-baP8WRdB1iDgvP3uuQxSCDxA2lcxvEHHM2C_vWHY,3248
-pertpy/tools/_perturbation_space/_perturbation_space.py,sha256=8RxVUkVEPZj5YZ-C-NP5zO4aYYVD04PzlsYuaIG-wjY,19447
+pertpy/tools/_perturbation_space/_perturbation_space.py,sha256=Vyh15wWw9dcu2YUWhziQd2mA9-4IY8EC5dzkBT9HaIo,19457
 pertpy/tools/_perturbation_space/_simple.py,sha256=AJlHRaEP-vViBeMDvvMtUnXMuIKqZVc7wggnjsHMfMw,12721
 pertpy/tools/_scgen/__init__.py,sha256=uERFlFyF88TH0uLiwmsUGEfHfLVCiZMFuk8gO5f7164,45
 pertpy/tools/_scgen/_base_components.py,sha256=Qq8myRUm43q9XBrZ9gBggfa2cSV2wbz_KYoLgH7iF1A,3009
-pertpy/tools/_scgen/_scgen.py,sha256=31T8ez0FxABIbunJHCk8xvGulHFb8RHXSsyM_z1WsPY,30850
+pertpy/tools/_scgen/_scgen.py,sha256=AQNGsDe-9HEqli3oq7UBDg68ofLCoXm-R_jnLFQ-rlc,30856
 pertpy/tools/_scgen/_scgenvae.py,sha256=bPk4v7EdJc7ROdLuDitHiX_Pvwa7Flw2qHRUwBvjLJY,3889
 pertpy/tools/_scgen/_utils.py,sha256=qz5QUn_Bvk2NGyYVzp3jgjWTFOMt1YyHwUo6HWtoThY,2871
-pertpy-0.11.4.dist-info/METADATA,sha256=Ox3dUh5YA5_a72GAOjCUj-l4Xc2vqz8sEZlhNlfEykY,8701
-pertpy-0.11.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-pertpy-0.11.4.dist-info/licenses/LICENSE,sha256=XuiT2hxeRInhquEIBKMZ5M21n5syhDQ4XbABoposIAg,1100
-pertpy-0.11.4.dist-info/RECORD,,
+pertpy-1.0.0.dist-info/METADATA,sha256=PnK9O-MyIPzSy5DNOqMN7G6zcxZ2ZTJnMFB5cEr5XJQ,8920
+pertpy-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+pertpy-1.0.0.dist-info/licenses/LICENSE,sha256=XuiT2hxeRInhquEIBKMZ5M21n5syhDQ4XbABoposIAg,1100
+pertpy-1.0.0.dist-info/RECORD,,

{pertpy-0.11.4.dist-info → pertpy-1.0.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{pertpy-0.11.4.dist-info → pertpy-1.0.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

pertpy 0.11.4__py3-none-any.whl → 1.0.0__py3-none-any.whl

pertpy 0.11.4py3-none-any.whl → 1.0.0py3-none-any.whl