PyPI - pertpy - Versions diffs - 0.10.0__py3-none-any.whl → 0.11.1__py3-none-any.whl - Mend

pertpy 0.10.0py3-none-any.whl → 0.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pertpy/__init__.py +5 -1
pertpy/_doc.py +1 -3
pertpy/_types.py +6 -0
pertpy/data/_dataloader.py +68 -24
pertpy/data/_datasets.py +9 -9
pertpy/metadata/__init__.py +2 -1
pertpy/metadata/_cell_line.py +133 -25
pertpy/metadata/_look_up.py +13 -19
pertpy/metadata/_moa.py +1 -1
pertpy/preprocessing/_guide_rna.py +138 -44
pertpy/preprocessing/_guide_rna_mixture.py +17 -19
pertpy/tools/__init__.py +4 -3
pertpy/tools/_augur.py +106 -98
pertpy/tools/_cinemaot.py +74 -114
pertpy/tools/_coda/_base_coda.py +134 -148
pertpy/tools/_coda/_sccoda.py +69 -70
pertpy/tools/_coda/_tasccoda.py +74 -80
pertpy/tools/_dialogue.py +48 -41
pertpy/tools/_differential_gene_expression/_base.py +21 -31
pertpy/tools/_differential_gene_expression/_checks.py +4 -6
pertpy/tools/_differential_gene_expression/_dge_comparison.py +5 -6
pertpy/tools/_differential_gene_expression/_edger.py +6 -10
pertpy/tools/_differential_gene_expression/_pydeseq2.py +1 -1
pertpy/tools/_differential_gene_expression/_simple_tests.py +3 -3
pertpy/tools/_differential_gene_expression/_statsmodels.py +8 -5
pertpy/tools/_distances/_distance_tests.py +1 -2
pertpy/tools/_distances/_distances.py +31 -46
pertpy/tools/_enrichment.py +7 -22
pertpy/tools/_milo.py +19 -15
pertpy/tools/_mixscape.py +73 -75
pertpy/tools/_perturbation_space/_clustering.py +4 -4
pertpy/tools/_perturbation_space/_comparison.py +4 -4
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +83 -32
pertpy/tools/_perturbation_space/_perturbation_space.py +10 -10
pertpy/tools/_perturbation_space/_simple.py +12 -14
pertpy/tools/_scgen/_scgen.py +16 -17
pertpy/tools/_scgen/_scgenvae.py +2 -2
pertpy/tools/_scgen/_utils.py +3 -1
{pertpy-0.10.0.dist-info → pertpy-0.11.1.dist-info}/METADATA +42 -24
pertpy-0.11.1.dist-info/RECORD +58 -0
{pertpy-0.10.0.dist-info → pertpy-0.11.1.dist-info}/licenses/LICENSE +1 -0
pertpy/tools/_kernel_pca.py +0 -50
pertpy-0.10.0.dist-info/RECORD +0 -58
{pertpy-0.10.0.dist-info → pertpy-0.11.1.dist-info}/WHEEL +0 -0

pertpy/tools/_distances/_distances.py CHANGED Viewed

@@ -1,12 +1,11 @@
 from __future__ import annotations
-import multiprocessing
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Literal, NamedTuple
-import numba
 import numpy as np
 import pandas as pd
+from numba import jit
 from ott.geometry.geometry import Geometry
 from ott.geometry.pointcloud import PointCloud
 from ott.problems.linear.linear_problem import LinearProblem
@@ -135,9 +134,7 @@ class Distance:
         self.aggregation_func = agg_fct
         if metric == "edistance":
             metric_fct = Edistance()
-        elif metric == "euclidean":
-            metric_fct = EuclideanDistance(self.aggregation_func)
-        elif metric == "root_mean_squared_error":
+        elif metric in ("euclidean", "root_mean_squared_error"):
             metric_fct = EuclideanDistance(self.aggregation_func)
         elif metric == "mse":
             metric_fct = MeanSquaredDistance(self.aggregation_func)
@@ -181,7 +178,7 @@ class Distance:
         if layer_key and obsm_key:
             raise ValueError(
-                "Cannot use 'layer_key' and 'obsm_key' at the same time.\n" "Please provide only one of the two keys."
+                "Cannot use 'layer_key' and 'obsm_key' at the same time.\nPlease provide only one of the two keys."
             )
         if not layer_key and not obsm_key:
             obsm_key = "X_pca"
@@ -201,6 +198,7 @@ class Distance:
         Args:
             X: First vector of shape (n_samples, n_features).
             Y: Second vector of shape (n_samples, n_features).
+            kwargs: Passed to the metric function.
         Returns:
             float: Distance between X and Y.
@@ -239,9 +237,10 @@ class Distance:
             Y: Second vector of shape (n_samples, n_features).
             n_bootstrap: Number of bootstrap samples.
             random_state: Random state for bootstrapping.
+            **kwargs: Passed to the metric function.
         Returns:
-            MeanVar: Mean and variance of distance between X and Y.
+            Mean and variance of distance between X and Y.
         Examples:
             >>> import pertpy as pt
@@ -286,8 +285,8 @@ class Distance:
             kwargs: Additional keyword arguments passed to the metric function.
         Returns:
-            pd.DataFrame: Dataframe with pairwise distances.
-            tuple[pd.DataFrame, pd.DataFrame]: Two Dataframes, one for the mean and one for the variance of pairwise distances.
+            :class:`pandas.DataFrame`: Dataframe with pairwise distances.
+            tuple[:class:`pandas.DataFrame`, :class:`pandas.DataFrame`]: Two Dataframes, one for the mean and one for the variance of pairwise distances.
         Examples:
             >>> import pertpy as pt
@@ -309,7 +308,7 @@ class Distance:
         # able to handle precomputed distances such as the PseudobulkDistance.
         if self.metric_fct.accepts_precomputed:
             # Precompute the pairwise distances if needed
-            if f"{self.obsm_key}_{self.cell_wise_metric}_predistances" not in adata.obsp.keys():
+            if f"{self.obsm_key}_{self.cell_wise_metric}_predistances" not in adata.obsp:
                 self.precompute_distances(adata, n_jobs=n_jobs, **kwargs)
             pwd = adata.obsp[f"{self.obsm_key}_{self.cell_wise_metric}_predistances"]
             for index_x, group_x in enumerate(fct(groups)):
@@ -339,10 +338,7 @@ class Distance:
                         df.loc[group_x, group_y] = df.loc[group_y, group_x] = bootstrap_output.mean
                         df_var.loc[group_x, group_y] = df_var.loc[group_y, group_x] = bootstrap_output.variance
         else:
-            if self.layer_key:
-                embedding = adata.layers[self.layer_key]
-            else:
-                embedding = adata.obsm[self.obsm_key].copy()
+            embedding = adata.layers[self.layer_key] if self.layer_key else adata.obsm[self.obsm_key].copy()
             for index_x, group_x in enumerate(fct(groups)):
                 cells_x = embedding[np.asarray(grouping == group_x)].copy()
                 for group_y in groups[index_x:]:  # type: ignore
@@ -409,8 +405,8 @@ class Distance:
             kwargs: Additional keyword arguments passed to the metric function.
         Returns:
-            pd.DataFrame: Dataframe with distances of groups to selected_group.
-            tuple[pd.DataFrame, pd.DataFrame]: Two Dataframes, one for the mean and one for the variance of distances of groups to selected_group.
+            :class:`pandas.DataFrame`: Dataframe with distances of groups to selected_group.
+            tuple[:class:`pandas.DataFrame`, :class:`pandas.DataFrame`]: Two Dataframes, one for the mean and one for the variance of distances of groups to selected_group.
         Examples:
@@ -446,7 +442,7 @@ class Distance:
         # able to handle precomputed distances such as the PseudobulkDistance.
         if self.metric_fct.accepts_precomputed:
             # Precompute the pairwise distances if needed
-            if f"{self.obsm_key}_{self.cell_wise_metric}_predistances" not in adata.obsp.keys():
+            if f"{self.obsm_key}_{self.cell_wise_metric}_predistances" not in adata.obsp:
                 self.precompute_distances(adata, n_jobs=n_jobs, **kwargs)
             pwd = adata.obsp[f"{self.obsm_key}_{self.cell_wise_metric}_predistances"]
             for group_x in fct(groups):
@@ -473,10 +469,7 @@ class Distance:
                         df.loc[group_x] = bootstrap_output.mean
                         df_var.loc[group_x] = bootstrap_output.variance
         else:
-            if self.layer_key:
-                embedding = adata.layers[self.layer_key]
-            else:
-                embedding = adata.obsm[self.obsm_key].copy()
+            embedding = adata.layers[self.layer_key] if self.layer_key else adata.obsm[self.obsm_key].copy()
             for group_x in fct(groups):
                 cells_x = embedding[np.asarray(grouping == group_x)].copy()
                 group_y = selected_group
@@ -524,10 +517,7 @@ class Distance:
             >>> distance = pt.tools.Distance(metric="edistance")
             >>> distance.precompute_distances(adata)
         """
-        if self.layer_key:
-            cells = adata.layers[self.layer_key]
-        else:
-            cells = adata.obsm[self.obsm_key].copy()
+        cells = adata.layers[self.layer_key] if self.layer_key else adata.obsm[self.obsm_key].copy()
         pwd = pairwise_distances(cells, cells, metric=self.cell_wise_metric, n_jobs=n_jobs)
         adata.obsp[f"{self.obsm_key}_{self.cell_wise_metric}_predistances"] = pwd
@@ -618,6 +608,7 @@ class AbstractDistance(ABC):
         Args:
             X: First vector of shape (n_samples, n_features).
             Y: Second vector of shape (n_samples, n_features).
+            kwargs: Passed to the metrics function.
         Returns:
             float: Distance between X and Y.
@@ -630,8 +621,8 @@ class AbstractDistance(ABC):
         Args:
             P: Pairwise distance matrix of shape (n_samples, n_samples).
-            idx: Boolean array of shape (n_samples,) indicating which
-            samples belong to X (or Y, since each metric is symmetric).
+            idx: Boolean array of shape (n_samples,) indicating which samples belong to X (or Y, since each metric is symmetric).
+            kwargs: Passed to the metrics function.
         Returns:
             float: Distance between X and Y.
@@ -645,12 +636,12 @@ class Edistance(AbstractDistance):
     def __init__(self) -> None:
         super().__init__()
         self.accepts_precomputed = True
-        self.cell_wise_metric = "sqeuclidean"
+        self.cell_wise_metric = "euclidean"
     def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
-        sigma_X = pairwise_distances(X, X, metric="sqeuclidean").mean()
-        sigma_Y = pairwise_distances(Y, Y, metric="sqeuclidean").mean()
-        delta = pairwise_distances(X, Y, metric="sqeuclidean").mean()
+        sigma_X = pairwise_distances(X, X, metric=self.cell_wise_metric, **kwargs).mean()
+        sigma_Y = pairwise_distances(Y, Y, metric=self.cell_wise_metric, **kwargs).mean()
+        delta = pairwise_distances(X, Y, metric=self.cell_wise_metric, **kwargs).mean()
         return 2 * delta - sigma_X - sigma_Y
     def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
@@ -881,7 +872,7 @@ class R2ScoreDistance(AbstractDistance):
 class SymmetricKLDivergence(AbstractDistance):
-    """Average of symmetric KL divergence between gene distributions of two groups
+    """Average of symmetric KL divergence between gene distributions of two groups.
     Assuming a Gaussian distribution for each gene in each group, calculates
     the KL divergence between them and averages over all genes. Repeats this ABBA to get a symmetrized distance.
@@ -908,7 +899,7 @@ class SymmetricKLDivergence(AbstractDistance):
 class TTestDistance(AbstractDistance):
-    """Average of T test statistic between two groups assuming unequal variances"""
+    """Average of T test statistic between two groups assuming unequal variances."""
     def __init__(self) -> None:
         super().__init__()
@@ -932,16 +923,14 @@ class TTestDistance(AbstractDistance):
 class KSTestDistance(AbstractDistance):
-    """Average of two-sided KS test statistic between two groups"""
+    """Average of two-sided KS test statistic between two groups."""
     def __init__(self) -> None:
         super().__init__()
         self.accepts_precomputed = False
     def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
-        stats = []
-        for i in range(X.shape[1]):
-            stats.append(abs(kstest(X[:, i], Y[:, i])[0]))
+        stats = [abs(kstest(X[:, i], Y[:, i])[0]) for i in range(X.shape[1])]
         return sum(stats) / len(stats)
     def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
@@ -949,10 +938,7 @@ class KSTestDistance(AbstractDistance):
 class NBLL(AbstractDistance):
-    """
-    Average of Log likelihood (scalar) of group B cells
-    according to a NB distribution fitted over group A
-    """
+    """Average of Log likelihood (scalar) of group B cells according to a NB distribution fitted over group A."""
     def __init__(self) -> None:
         super().__init__()
@@ -960,15 +946,12 @@ class NBLL(AbstractDistance):
     def __call__(self, X: np.ndarray, Y: np.ndarray, epsilon=1e-8, **kwargs) -> float:
         def _is_count_matrix(matrix, tolerance=1e-6):
-            if matrix.dtype.kind == "i" or np.all(np.abs(matrix - np.round(matrix)) < tolerance):
-                return True
-            else:
-                return False
+            return bool(matrix.dtype.kind == "i" or np.all(np.abs(matrix - np.round(matrix)) < tolerance))
         if not _is_count_matrix(matrix=X) or not _is_count_matrix(matrix=Y):
             raise ValueError("NBLL distance only works for raw counts.")
-        @numba.jit(forceobj=True)
+        @jit(forceobj=True)
         def _compute_nll(y: np.ndarray, nb_params: tuple[float, float], epsilon: float) -> float:
             mu = np.exp(nb_params[0])
             theta = 1 / nb_params[1]
@@ -1163,9 +1146,11 @@ class MeanVarDistributionDistance(AbstractDistance):
     def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
         """Difference of mean-var distributions in 2 matrices.
         Args:
             X: Normalized and log transformed cells x genes count matrix.
             Y: Normalized and log transformed cells x genes count matrix.
+            kwargs: Passed to the metrics function.
         """
         mean_x, var_x = self._mean_var(X, log=True)
         mean_y, var_y = self._mean_var(Y, log=True)

pertpy/tools/_enrichment.py CHANGED Viewed

@@ -25,10 +25,7 @@ def _prepare_targets(
     categories: str | Sequence[str] = None,
 ) -> ChainMap | dict:
     if categories is not None:
-        if isinstance(categories, str):
-            categories = [categories]
-        else:
-            categories = list(categories)
+        categories = [categories] if isinstance(categories, str) else list(categories)
     if targets is None:
         pt_drug = Drug()
@@ -97,10 +94,7 @@ class Enrichment:
         Returns:
             An AnnData object with scores.
         """
-        if layer is not None:
-            mtx = adata.layers[layer]
-        else:
-            mtx = adata.X
+        mtx = adata.layers[layer] if layer is not None else adata.X
         targets = _prepare_targets(targets=targets, nested=nested, categories=categories)  # type: ignore
         full_targets = targets.copy()
@@ -114,10 +108,7 @@ class Enrichment:
         weights = pd.DataFrame(targets, index=adata.var_names)
         weights = weights.loc[:, weights.sum() > 0]
         weights = weights / weights.sum()
-        if issparse(mtx):
-            scores = mtx.dot(weights)
-        else:
-            scores = np.dot(mtx, weights)
+        scores = mtx.dot(weights) if issparse(mtx) else np.dot(mtx, weights)
         if method == "seurat":
             obs_avg = _mean(mtx, names=adata.var_names, axis=0)
@@ -136,10 +127,7 @@ class Enrichment:
             control_gene_weights = pd.DataFrame(control_groups, index=adata.var_names)
             control_gene_weights = control_gene_weights / control_gene_weights.sum()
-            if issparse(mtx):
-                control_profiles = mtx.dot(control_gene_weights)
-            else:
-                control_profiles = np.dot(mtx, control_gene_weights)
+            control_profiles = mtx.dot(control_gene_weights) if issparse(mtx) else np.dot(mtx, control_gene_weights)
             drug_bins = {}
             for drug in weights.columns:
                 bins = np.unique(obs_cut[targets[drug]])
@@ -178,7 +166,7 @@ class Enrichment:
                      Accepts two forms:
                      - A dictionary with the names of the groups as keys, and the entries being the corresponding gene lists.
                      - A dictionary of dictionaries defined like above, with names of gene group categories as keys.
-                       If passing one of those, specify `nested=True`.
+                     If passing one of those, specify `nested=True`.
             nested: Whether `targets` is a dictionary of dictionaries with group categories as keys.
             categories: If `targets=None` or `nested=True`, this argument can be used to subset the gene groups to one or more categories (keys of the original dictionary).
                         In case of the ChEMBL drug targets, these are ATC level 1/level 2 category codes.
@@ -293,7 +281,7 @@ class Enrichment:
         return enrichment
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_dotplot(
+    def plot_dotplot(  # pragma: no cover # noqa: D417
         self,
         adata: AnnData,
         *,
@@ -341,10 +329,7 @@ class Enrichment:
             .. image:: /_static/docstring_previews/enrichment_dotplot.png
         """
         if categories is not None:
-            if isinstance(categories, str):
-                categories = [categories]
-            else:
-                categories = list(categories)
+            categories = [categories] if isinstance(categories, str) else list(categories)
         if targets is None:
             pt_drug = Drug()

pertpy/tools/_milo.py CHANGED Viewed

@@ -51,14 +51,16 @@ class Milo:
         Args:
             input: AnnData
             feature_key: Key to store the cell-level AnnData object in the MuData object
         Returns:
-            MuData: MuData object with original AnnData.
+            :class:`mudata.MuData` object with original AnnData.
         Examples:
             >>> import pertpy as pt
             >>> adata = pt.dt.bhattacherjee()
             >>> milo = pt.tl.Milo()
             >>> mdata = milo.load(adata)
         """
         mdata = MuData({feature_key: input, "milo": AnnData()})
@@ -113,6 +115,7 @@ class Milo:
             >>> mdata = milo.load(adata)
             >>> sc.pp.neighbors(mdata["rna"])
             >>> milo.make_nhoods(mdata["rna"])
         """
         if isinstance(data, MuData):
             adata = data[feature_key]
@@ -177,10 +180,7 @@ class Milo:
         adata.obs["nhood_ixs_random"] = adata.obs["nhood_ixs_random"].astype("int")
         adata.uns["nhood_neighbors_key"] = neighbors_key
         # Store distance to K-th nearest neighbor (used for spatial FDR correction)
-        if neighbors_key is None:
-            knn_dists = adata.obsp["distances"]
-        else:
-            knn_dists = adata.obsp[neighbors_key + "_distances"]
+        knn_dists = adata.obsp["distances"] if neighbors_key is None else adata.obsp[neighbors_key + "_distances"]
         nhood_ixs = adata.obs["nhood_ixs_refined"] == 1
         dist_mat = knn_dists[np.asarray(nhood_ixs), :]
@@ -223,6 +223,7 @@ class Milo:
             >>> sc.pp.neighbors(mdata["rna"])
             >>> milo.make_nhoods(mdata["rna"])
             >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
         """
         if isinstance(data, MuData):
             adata = data[feature_key]
@@ -297,6 +298,7 @@ class Milo:
             >>> milo.make_nhoods(mdata["rna"])
             >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
             >>> milo.da_nhoods(mdata, design="~label")
         """
         try:
             sample_adata = mdata["milo"]
@@ -428,7 +430,7 @@ class Milo:
             feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
-            None. Adds in place:
+            Adds in place.
             - `milo_mdata['milo'].var["nhood_annotation"]`: assigning a label to each nhood
             - `milo_mdata['milo'].var["nhood_annotation_frac"]` stores the fraciton of cells in the neighbourhood with the assigned label
             - `milo_mdata['milo'].varm['frac_annotation']`: stores the fraction of cells from each label in each nhood
@@ -444,6 +446,7 @@ class Milo:
             >>> milo.make_nhoods(mdata["rna"])
             >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
             >>> milo.annotate_nhoods(mdata, anno_col="cell_type")
         """
         try:
             sample_adata = mdata["milo"]
@@ -482,7 +485,7 @@ class Milo:
             feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
-            None. Adds in place:
+            Adds in place.
             - `milo_mdata['milo'].var["nhood_{anno_col}"]`: assigning a continuous value to each nhood
         Examples:
@@ -567,7 +570,7 @@ class Milo:
         sample_adata.obs = sample_obs.loc[sample_adata.obs_names]
     def build_nhood_graph(self, mdata: MuData, basis: str = "X_umap", feature_key: str | None = "rna"):
-        """Build graph of neighbourhoods used for visualization of DA results
+        """Build graph of neighbourhoods used for visualization of DA results.
         Args:
             mdata: MuData object
@@ -625,6 +628,7 @@ class Milo:
             >>> milo.make_nhoods(mdata["rna"])
             >>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
             >>> milo.add_nhood_expression(mdata)
         """
         try:
             sample_adata = mdata["milo"]
@@ -652,7 +656,7 @@ class Milo:
     def _setup_rpy2(
         self,
     ):
-        """Set up rpy2 to run edgeR"""
+        """Set up rpy2 to run edgeR."""
         from rpy2.robjects import numpy2ri, pandas2ri
         from rpy2.robjects.packages import importr
@@ -715,7 +719,7 @@ class Milo:
         sample_adata.var.loc[keep_nhoods, "SpatialFDR"] = adjp
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_nhood_graph(
+    def plot_nhood_graph(  # pragma: no cover # noqa: D417
         self,
         mdata: MuData,
         *,
@@ -730,7 +734,7 @@ class Milo:
         return_fig: bool = False,
         **kwargs,
     ) -> Figure | None:
-        """Visualize DA results on abstracted graph (wrapper around sc.pl.embedding)
+        """Visualize DA results on abstracted graph (wrapper around sc.pl.embedding).
         Args:
             mdata: MuData object
@@ -808,7 +812,7 @@ class Milo:
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_nhood(
+    def plot_nhood(  # pragma: no cover # noqa: D417
         self,
         mdata: MuData,
         ix: int,
@@ -869,7 +873,7 @@ class Milo:
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_da_beeswarm(
+    def plot_da_beeswarm(  # pragma: no cover # noqa: D417
         self,
         mdata: MuData,
         *,
@@ -880,7 +884,7 @@ class Milo:
         palette: str | Sequence[str] | dict[str, str] | None = None,
         return_fig: bool = False,
     ) -> Figure | None:
-        """Plot beeswarm plot of logFC against nhood labels
+        """Plot beeswarm plot of logFC against nhood labels.
         Args:
             mdata: MuData object
@@ -995,7 +999,7 @@ class Milo:
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_nhood_counts_by_cond(
+    def plot_nhood_counts_by_cond(  # pragma: no cover # noqa: D417
         self,
         mdata: MuData,
         test_var: str,

pertpy 0.10.0__py3-none-any.whl → 0.11.1__py3-none-any.whl

pertpy 0.10.0py3-none-any.whl → 0.11.1py3-none-any.whl