PyPI - pertpy - Versions diffs - 0.9.5__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

pertpy 0.9.5py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pertpy/__init__.py +5 -1
pertpy/_doc.py +2 -5
pertpy/_types.py +6 -0
pertpy/data/_dataloader.py +68 -24
pertpy/data/_datasets.py +9 -9
pertpy/metadata/__init__.py +2 -1
pertpy/metadata/_cell_line.py +136 -30
pertpy/metadata/_look_up.py +13 -19
pertpy/metadata/_moa.py +1 -1
pertpy/preprocessing/_guide_rna.py +221 -39
pertpy/preprocessing/_guide_rna_mixture.py +177 -0
pertpy/tools/__init__.py +1 -1
pertpy/tools/_augur.py +138 -142
pertpy/tools/_cinemaot.py +75 -117
pertpy/tools/_coda/_base_coda.py +150 -174
pertpy/tools/_coda/_sccoda.py +66 -69
pertpy/tools/_coda/_tasccoda.py +71 -79
pertpy/tools/_dialogue.py +60 -56
pertpy/tools/_differential_gene_expression/_base.py +25 -43
pertpy/tools/_differential_gene_expression/_checks.py +4 -6
pertpy/tools/_differential_gene_expression/_dge_comparison.py +5 -6
pertpy/tools/_differential_gene_expression/_edger.py +6 -10
pertpy/tools/_differential_gene_expression/_pydeseq2.py +1 -1
pertpy/tools/_differential_gene_expression/_simple_tests.py +3 -3
pertpy/tools/_differential_gene_expression/_statsmodels.py +8 -5
pertpy/tools/_distances/_distance_tests.py +1 -2
pertpy/tools/_distances/_distances.py +86 -92
pertpy/tools/_enrichment.py +8 -25
pertpy/tools/_milo.py +23 -27
pertpy/tools/_mixscape.py +261 -175
pertpy/tools/_perturbation_space/_clustering.py +4 -4
pertpy/tools/_perturbation_space/_comparison.py +4 -4
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +83 -32
pertpy/tools/_perturbation_space/_perturbation_space.py +10 -10
pertpy/tools/_perturbation_space/_simple.py +13 -17
pertpy/tools/_scgen/_scgen.py +17 -20
pertpy/tools/_scgen/_scgenvae.py +2 -2
pertpy/tools/_scgen/_utils.py +3 -1
{pertpy-0.9.5.dist-info → pertpy-0.11.0.dist-info}/METADATA +37 -21
pertpy-0.11.0.dist-info/RECORD +58 -0
{pertpy-0.9.5.dist-info → pertpy-0.11.0.dist-info}/licenses/LICENSE +1 -0
pertpy/tools/_kernel_pca.py +0 -50
pertpy-0.9.5.dist-info/RECORD +0 -57
{pertpy-0.9.5.dist-info → pertpy-0.11.0.dist-info}/WHEEL +0 -0

pertpy/tools/_differential_gene_expression/_edger.py CHANGED Viewed

@@ -10,7 +10,7 @@ from ._checks import check_is_integer_matrix
 class EdgeR(LinearModelBase):
-    """Differential expression test using EdgeR"""
+    """Differential expression test using EdgeR."""
     def _check_counts(self):
         check_is_integer_matrix(self.data)
@@ -39,17 +39,13 @@ class EdgeR(LinearModelBase):
             edger = importr("edgeR")
         except ImportError as e:
             raise ImportError(
-                "edgeR requires a valid R installation with the following packages:\n"
-                "edgeR, BiocParallel, RhpcBLASctl"
+                "edgeR requires a valid R installation with the following packages:\nedgeR, BiocParallel, RhpcBLASctl"
             ) from e
         # Convert dataframe
         with localconverter(get_conversion() + numpy2ri.converter):
             expr = self.adata.X if self.layer is None else self.adata.layers[self.layer]
-            if issparse(expr):
-                expr = expr.T.toarray()
-            else:
-                expr = expr.T
+            expr = expr.T.toarray() if issparse(expr) else expr.T
         with localconverter(get_conversion() + pandas2ri.converter):
             expr_r = ro.conversion.py2rpy(pd.DataFrame(expr, index=self.adata.var_names, columns=self.adata.obs_names))
@@ -72,8 +68,8 @@ class EdgeR(LinearModelBase):
         ro.globalenv["fit"] = fit
         self.fit = fit
-    def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> pd.DataFrame:
-        """Conduct test for each contrast and return a data frame
+    def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> pd.DataFrame:  # noqa: D417
+        """Conduct test for each contrast and return a data frame.
         Args:
             contrast: numpy array of integars indicating contrast i.e. [-1, 0, 1, 0, 0]
@@ -100,7 +96,7 @@ class EdgeR(LinearModelBase):
             importr("edgeR")
         except ImportError:
             raise ImportError(
-                "edgeR requires a valid R installation with the following packages: " "edgeR, BiocParallel, RhpcBLASctl"
+                "edgeR requires a valid R installation with the following packages: edgeR, BiocParallel, RhpcBLASctl"
             ) from None
         # Convert vector to R, which drops a category like `self.design_matrix` to use the intercept for the left out.

pertpy/tools/_differential_gene_expression/_pydeseq2.py CHANGED Viewed

@@ -16,7 +16,7 @@ from ._checks import check_is_integer_matrix
 class PyDESeq2(LinearModelBase):
-    """Differential expression test using a PyDESeq2"""
+    """Differential expression test using a PyDESeq2."""
     def __init__(
         self, adata: AnnData, design: str | ndarray, *, mask: str | None = None, layer: str | None = None, **kwargs

pertpy/tools/_differential_gene_expression/_simple_tests.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Simple tests such as t-test, wilcoxon"""
+"""Simple tests such as t-test, wilcoxon."""
 import warnings
 from abc import abstractmethod
@@ -10,7 +10,7 @@ import pandas as pd
 import scipy.stats
 import statsmodels
 from anndata import AnnData
-from pandas.core.api import DataFrame as DataFrame
+from pandas.core.api import DataFrame
 from scipy.sparse import diags, issparse
 from tqdm.auto import tqdm
@@ -152,7 +152,7 @@ class WilcoxonTest(SimpleComparisonBase):
 class TTest(SimpleComparisonBase):
-    """Perform a unpaired or paired T-test"""
+    """Perform a unpaired or paired T-test."""
     @staticmethod
     def _test(x0: np.ndarray, x1: np.ndarray, paired: bool, **kwargs) -> float:

pertpy/tools/_differential_gene_expression/_statsmodels.py CHANGED Viewed

@@ -6,14 +6,14 @@ import statsmodels.api as sm
 from tqdm.auto import tqdm
 from ._base import LinearModelBase
-from ._checks import check_is_integer_matrix
+from ._checks import check_is_numeric_matrix
 class Statsmodels(LinearModelBase):
-    """Differential expression test using a statsmodels linear regression"""
+    """Differential expression test using a statsmodels linear regression."""
     def _check_counts(self):
-        check_is_integer_matrix(self.data)
+        check_is_numeric_matrix(self.data)
     def fit(
         self,
@@ -55,7 +55,10 @@ class Statsmodels(LinearModelBase):
                     "t_value": t_test.tvalue.item(),
                     "sd": t_test.sd.item(),
                     "log_fc": t_test.effect.item(),
-                    "adj_p_value": statsmodels.stats.multitest.fdrcorrection(np.array([t_test.pvalue]))[1].item(),
                 }
             )
-        return pd.DataFrame(res).sort_values("p_value")
+        return (
+            pd.DataFrame(res)
+            .sort_values("p_value")
+            .assign(adj_p_value=lambda x: statsmodels.stats.multitest.fdrcorrection(x["p_value"])[1])
+        )

pertpy/tools/_distances/_distance_tests.py CHANGED Viewed

@@ -83,8 +83,7 @@ class DistanceTest:
         contrast: str,
         show_progressbar: bool = True,
     ) -> pd.DataFrame:
-        """Run a permutation test using the specified distance metric, testing
-        all groups of cells against a specified contrast group ("control").
+        """Run a permutation test using the specified distance metric, testing all groups of cells against a specified contrast group ("control").
         Args:
             adata: Annotated data matrix.

pertpy/tools/_distances/_distances.py CHANGED Viewed

@@ -4,9 +4,9 @@ import multiprocessing
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Literal, NamedTuple
-import numba
 import numpy as np
 import pandas as pd
+from numba import jit
 from ott.geometry.geometry import Geometry
 from ott.geometry.pointcloud import PointCloud
 from ott.problems.linear.linear_problem import LinearProblem
@@ -135,9 +135,7 @@ class Distance:
         self.aggregation_func = agg_fct
         if metric == "edistance":
             metric_fct = Edistance()
-        elif metric == "euclidean":
-            metric_fct = EuclideanDistance(self.aggregation_func)
-        elif metric == "root_mean_squared_error":
+        elif metric in ("euclidean", "root_mean_squared_error"):
             metric_fct = EuclideanDistance(self.aggregation_func)
         elif metric == "mse":
             metric_fct = MeanSquaredDistance(self.aggregation_func)
@@ -181,7 +179,7 @@ class Distance:
         if layer_key and obsm_key:
             raise ValueError(
-                "Cannot use 'layer_key' and 'obsm_key' at the same time.\n" "Please provide only one of the two keys."
+                "Cannot use 'layer_key' and 'obsm_key' at the same time.\nPlease provide only one of the two keys."
             )
         if not layer_key and not obsm_key:
             obsm_key = "X_pca"
@@ -201,6 +199,7 @@ class Distance:
         Args:
             X: First vector of shape (n_samples, n_features).
             Y: Second vector of shape (n_samples, n_features).
+            kwargs: Passed to the metric function.
         Returns:
             float: Distance between X and Y.
@@ -239,9 +238,10 @@ class Distance:
             Y: Second vector of shape (n_samples, n_features).
             n_bootstrap: Number of bootstrap samples.
             random_state: Random state for bootstrapping.
+            **kwargs: Passed to the metric function.
         Returns:
-            MeanVar: Mean and variance of distance between X and Y.
+            Mean and variance of distance between X and Y.
         Examples:
             >>> import pertpy as pt
@@ -286,8 +286,8 @@ class Distance:
             kwargs: Additional keyword arguments passed to the metric function.
         Returns:
-            pd.DataFrame: Dataframe with pairwise distances.
-            tuple[pd.DataFrame, pd.DataFrame]: Two Dataframes, one for the mean and one for the variance of pairwise distances.
+            :class:`pandas.DataFrame`: Dataframe with pairwise distances.
+            tuple[:class:`pandas.DataFrame`, :class:`pandas.DataFrame`]: Two Dataframes, one for the mean and one for the variance of pairwise distances.
         Examples:
             >>> import pertpy as pt
@@ -309,7 +309,7 @@ class Distance:
         # able to handle precomputed distances such as the PseudobulkDistance.
         if self.metric_fct.accepts_precomputed:
             # Precompute the pairwise distances if needed
-            if f"{self.obsm_key}_{self.cell_wise_metric}_predistances" not in adata.obsp.keys():
+            if f"{self.obsm_key}_{self.cell_wise_metric}_predistances" not in adata.obsp:
                 self.precompute_distances(adata, n_jobs=n_jobs, **kwargs)
             pwd = adata.obsp[f"{self.obsm_key}_{self.cell_wise_metric}_predistances"]
             for index_x, group_x in enumerate(fct(groups)):
@@ -339,10 +339,7 @@ class Distance:
                         df.loc[group_x, group_y] = df.loc[group_y, group_x] = bootstrap_output.mean
                         df_var.loc[group_x, group_y] = df_var.loc[group_y, group_x] = bootstrap_output.variance
         else:
-            if self.layer_key:
-                embedding = adata.layers[self.layer_key]
-            else:
-                embedding = adata.obsm[self.obsm_key].copy()
+            embedding = adata.layers[self.layer_key] if self.layer_key else adata.obsm[self.obsm_key].copy()
             for index_x, group_x in enumerate(fct(groups)):
                 cells_x = embedding[np.asarray(grouping == group_x)].copy()
                 for group_y in groups[index_x:]:  # type: ignore
@@ -409,8 +406,8 @@ class Distance:
             kwargs: Additional keyword arguments passed to the metric function.
         Returns:
-            pd.DataFrame: Dataframe with distances of groups to selected_group.
-            tuple[pd.DataFrame, pd.DataFrame]: Two Dataframes, one for the mean and one for the variance of distances of groups to selected_group.
+            :class:`pandas.DataFrame`: Dataframe with distances of groups to selected_group.
+            tuple[:class:`pandas.DataFrame`, :class:`pandas.DataFrame`]: Two Dataframes, one for the mean and one for the variance of distances of groups to selected_group.
         Examples:
@@ -446,7 +443,7 @@ class Distance:
         # able to handle precomputed distances such as the PseudobulkDistance.
         if self.metric_fct.accepts_precomputed:
             # Precompute the pairwise distances if needed
-            if f"{self.obsm_key}_{self.cell_wise_metric}_predistances" not in adata.obsp.keys():
+            if f"{self.obsm_key}_{self.cell_wise_metric}_predistances" not in adata.obsp:
                 self.precompute_distances(adata, n_jobs=n_jobs, **kwargs)
             pwd = adata.obsp[f"{self.obsm_key}_{self.cell_wise_metric}_predistances"]
             for group_x in fct(groups):
@@ -473,10 +470,7 @@ class Distance:
                         df.loc[group_x] = bootstrap_output.mean
                         df_var.loc[group_x] = bootstrap_output.variance
         else:
-            if self.layer_key:
-                embedding = adata.layers[self.layer_key]
-            else:
-                embedding = adata.obsm[self.obsm_key].copy()
+            embedding = adata.layers[self.layer_key] if self.layer_key else adata.obsm[self.obsm_key].copy()
             for group_x in fct(groups):
                 cells_x = embedding[np.asarray(grouping == group_x)].copy()
                 group_y = selected_group
@@ -524,10 +518,7 @@ class Distance:
             >>> distance = pt.tools.Distance(metric="edistance")
             >>> distance.precompute_distances(adata)
         """
-        if self.layer_key:
-            cells = adata.layers[self.layer_key]
-        else:
-            cells = adata.obsm[self.obsm_key].copy()
+        cells = adata.layers[self.layer_key] if self.layer_key else adata.obsm[self.obsm_key].copy()
         pwd = pairwise_distances(cells, cells, metric=self.cell_wise_metric, n_jobs=n_jobs)
         adata.obsp[f"{self.obsm_key}_{self.cell_wise_metric}_predistances"] = pwd
@@ -618,6 +609,7 @@ class AbstractDistance(ABC):
         Args:
             X: First vector of shape (n_samples, n_features).
             Y: Second vector of shape (n_samples, n_features).
+            kwargs: Passed to the metrics function.
         Returns:
             float: Distance between X and Y.
@@ -630,8 +622,8 @@ class AbstractDistance(ABC):
         Args:
             P: Pairwise distance matrix of shape (n_samples, n_samples).
-            idx: Boolean array of shape (n_samples,) indicating which
-            samples belong to X (or Y, since each metric is symmetric).
+            idx: Boolean array of shape (n_samples,) indicating which samples belong to X (or Y, since each metric is symmetric).
+            kwargs: Passed to the metrics function.
         Returns:
             float: Distance between X and Y.
@@ -645,12 +637,12 @@ class Edistance(AbstractDistance):
     def __init__(self) -> None:
         super().__init__()
         self.accepts_precomputed = True
-        self.cell_wise_metric = "sqeuclidean"
+        self.cell_wise_metric = "euclidean"
     def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
-        sigma_X = pairwise_distances(X, X, metric="sqeuclidean").mean()
-        sigma_Y = pairwise_distances(Y, Y, metric="sqeuclidean").mean()
-        delta = pairwise_distances(X, Y, metric="sqeuclidean").mean()
+        sigma_X = pairwise_distances(X, X, metric=self.cell_wise_metric, **kwargs).mean()
+        sigma_Y = pairwise_distances(Y, Y, metric=self.cell_wise_metric, **kwargs).mean()
+        delta = pairwise_distances(X, Y, metric=self.cell_wise_metric, **kwargs).mean()
         return 2 * delta - sigma_X - sigma_Y
     def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
@@ -881,7 +873,7 @@ class R2ScoreDistance(AbstractDistance):
 class SymmetricKLDivergence(AbstractDistance):
-    """Average of symmetric KL divergence between gene distributions of two groups
+    """Average of symmetric KL divergence between gene distributions of two groups.
     Assuming a Gaussian distribution for each gene in each group, calculates
     the KL divergence between them and averages over all genes. Repeats this ABBA to get a symmetrized distance.
@@ -908,7 +900,7 @@ class SymmetricKLDivergence(AbstractDistance):
 class TTestDistance(AbstractDistance):
-    """Average of T test statistic between two groups assuming unequal variances"""
+    """Average of T test statistic between two groups assuming unequal variances."""
     def __init__(self) -> None:
         super().__init__()
@@ -932,16 +924,14 @@ class TTestDistance(AbstractDistance):
 class KSTestDistance(AbstractDistance):
-    """Average of two-sided KS test statistic between two groups"""
+    """Average of two-sided KS test statistic between two groups."""
     def __init__(self) -> None:
         super().__init__()
         self.accepts_precomputed = False
     def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
-        stats = []
-        for i in range(X.shape[1]):
-            stats.append(abs(kstest(X[:, i], Y[:, i])[0]))
+        stats = [abs(kstest(X[:, i], Y[:, i])[0]) for i in range(X.shape[1])]
         return sum(stats) / len(stats)
     def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
@@ -949,10 +939,7 @@ class KSTestDistance(AbstractDistance):
 class NBLL(AbstractDistance):
-    """
-    Average of Log likelihood (scalar) of group B cells
-    according to a NB distribution fitted over group A
-    """
+    """Average of Log likelihood (scalar) of group B cells according to a NB distribution fitted over group A."""
     def __init__(self) -> None:
         super().__init__()
@@ -960,15 +947,12 @@ class NBLL(AbstractDistance):
     def __call__(self, X: np.ndarray, Y: np.ndarray, epsilon=1e-8, **kwargs) -> float:
         def _is_count_matrix(matrix, tolerance=1e-6):
-            if matrix.dtype.kind == "i" or np.all(np.abs(matrix - np.round(matrix)) < tolerance):
-                return True
-            else:
-                return False
+            return bool(matrix.dtype.kind == "i" or np.all(np.abs(matrix - np.round(matrix)) < tolerance))
         if not _is_count_matrix(matrix=X) or not _is_count_matrix(matrix=Y):
             raise ValueError("NBLL distance only works for raw counts.")
-        @numba.jit(forceobj=True)
+        @jit(forceobj=True)
         def _compute_nll(y: np.ndarray, nb_params: tuple[float, float], epsilon: float) -> float:
             mu = np.exp(nb_params[0])
             theta = 1 / nb_params[1]
@@ -1117,67 +1101,77 @@ class MeanVarDistributionDistance(AbstractDistance):
         super().__init__()
         self.accepts_precomputed = False
+    @staticmethod
+    def _mean_var(x, log: bool = False):
+        mean = np.mean(x, axis=0)
+        var = np.var(x, axis=0)
+        positive = mean > 0
+        mean = mean[positive]
+        var = var[positive]
+        if log:
+            mean = np.log(mean)
+            var = np.log(var)
+        return mean, var
+    @staticmethod
+    def _prep_kde_data(x, y):
+        return np.concatenate([x.reshape(-1, 1), y.reshape(-1, 1)], axis=1)
+    @staticmethod
+    def _grid_points(d, n_points=100):
+        # Make grid, add 1 bin on lower/upper end to get final n_points
+        d_min = d.min()
+        d_max = d.max()
+        # Compute bin size
+        d_bin = (d_max - d_min) / (n_points - 2)
+        d_min = d_min - d_bin
+        d_max = d_max + d_bin
+        return np.arange(start=d_min + 0.5 * d_bin, stop=d_max, step=d_bin)
+    @staticmethod
+    def _kde_eval_both(x_kde, y_kde, grid):
+        n_points = len(grid)
+        chunk_size = 10000
+        result_x = np.zeros(n_points)
+        result_y = np.zeros(n_points)
+        # Process same chunks for both KDEs
+        for start in range(0, n_points, chunk_size):
+            end = min(start + chunk_size, n_points)
+            chunk = grid[start:end]
+            result_x[start:end] = x_kde.score_samples(chunk)
+            result_y[start:end] = y_kde.score_samples(chunk)
+        return result_x, result_y
     def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
         """Difference of mean-var distributions in 2 matrices.
         Args:
             X: Normalized and log transformed cells x genes count matrix.
             Y: Normalized and log transformed cells x genes count matrix.
+            kwargs: Passed to the metrics function.
         """
+        mean_x, var_x = self._mean_var(X, log=True)
+        mean_y, var_y = self._mean_var(Y, log=True)
-        def _mean_var(x, log: bool = False):
-            mean = np.mean(x, axis=0)
-            var = np.var(x, axis=0)
-            positive = mean > 0
-            mean = mean[positive]
-            var = var[positive]
-            if log:
-                mean = np.log(mean)
-                var = np.log(var)
-            return mean, var
-        def _prep_kde_data(x, y):
-            return np.concatenate([x.reshape(-1, 1), y.reshape(-1, 1)], axis=1)
-        def _grid_points(d, n_points=100):
-            # Make grid, add 1 bin on lower/upper end to get final n_points
-            d_min = d.min()
-            d_max = d.max()
-            # Compute bin size
-            d_bin = (d_max - d_min) / (n_points - 2)
-            d_min = d_min - d_bin
-            d_max = d_max + d_bin
-            return np.arange(start=d_min + 0.5 * d_bin, stop=d_max, step=d_bin)
-        def _parallel_score_samples(kde, samples, thread_count=int(0.875 * multiprocessing.cpu_count())):
-            # the thread_count is determined using the factor 0.875 as recommended here:
-            # https://stackoverflow.com/questions/32625094/scipy-parallel-computing-in-ipython-notebook
-            with multiprocessing.Pool(thread_count) as p:
-                return np.concatenate(p.map(kde.score_samples, np.array_split(samples, thread_count)))
-        def _kde_eval(d, grid):
-            # Kernel choice: Gaussian is too smoothing and cosine or other kernels that do not stretch out
-            # can not be compared well on regions further away from the data as they are -inf
-            kde = KernelDensity(bandwidth="silverman", kernel="exponential").fit(d)
-            return _parallel_score_samples(kde, grid)
-        mean_x, var_x = _mean_var(X, log=True)
-        mean_y, var_y = _mean_var(Y, log=True)
-        x = _prep_kde_data(mean_x, var_x)
-        y = _prep_kde_data(mean_y, var_y)
+        x = self._prep_kde_data(mean_x, var_x)
+        y = self._prep_kde_data(mean_y, var_y)
         # Gridpoints to eval KDE on
-        mean_grid = _grid_points(np.concatenate([mean_x, mean_y]))
-        var_grid = _grid_points(np.concatenate([var_x, var_y]))
+        mean_grid = self._grid_points(np.concatenate([mean_x, mean_y]))
+        var_grid = self._grid_points(np.concatenate([var_x, var_y]))
         grid = np.array(np.meshgrid(mean_grid, var_grid)).T.reshape(-1, 2)
-        kde_x = _kde_eval(x, grid)
-        kde_y = _kde_eval(y, grid)
+        # Fit both KDEs first
+        x_kde = KernelDensity(bandwidth="silverman", kernel="exponential").fit(x)
+        y_kde = KernelDensity(bandwidth="silverman", kernel="exponential").fit(y)
-        kde_diff = ((kde_x - kde_y) ** 2).mean()
+        # Evaluate both KDEs on same grid chunks
+        kde_x, kde_y = self._kde_eval_both(x_kde, y_kde, grid)
-        return kde_diff
+        return ((np.exp(kde_x) - np.exp(kde_y)) ** 2).mean()
     def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
         raise NotImplementedError("MeanVarDistributionDistance cannot be called on a pairwise distance matrix.")

pertpy/tools/_enrichment.py CHANGED Viewed

@@ -25,10 +25,7 @@ def _prepare_targets(
     categories: str | Sequence[str] = None,
 ) -> ChainMap | dict:
     if categories is not None:
-        if isinstance(categories, str):
-            categories = [categories]
-        else:
-            categories = list(categories)
+        categories = [categories] if isinstance(categories, str) else list(categories)
     if targets is None:
         pt_drug = Drug()
@@ -97,10 +94,7 @@ class Enrichment:
         Returns:
             An AnnData object with scores.
         """
-        if layer is not None:
-            mtx = adata.layers[layer]
-        else:
-            mtx = adata.X
+        mtx = adata.layers[layer] if layer is not None else adata.X
         targets = _prepare_targets(targets=targets, nested=nested, categories=categories)  # type: ignore
         full_targets = targets.copy()
@@ -114,10 +108,7 @@ class Enrichment:
         weights = pd.DataFrame(targets, index=adata.var_names)
         weights = weights.loc[:, weights.sum() > 0]
         weights = weights / weights.sum()
-        if issparse(mtx):
-            scores = mtx.dot(weights)
-        else:
-            scores = np.dot(mtx, weights)
+        scores = mtx.dot(weights) if issparse(mtx) else np.dot(mtx, weights)
         if method == "seurat":
             obs_avg = _mean(mtx, names=adata.var_names, axis=0)
@@ -136,10 +127,7 @@ class Enrichment:
             control_gene_weights = pd.DataFrame(control_groups, index=adata.var_names)
             control_gene_weights = control_gene_weights / control_gene_weights.sum()
-            if issparse(mtx):
-                control_profiles = mtx.dot(control_gene_weights)
-            else:
-                control_profiles = np.dot(mtx, control_gene_weights)
+            control_profiles = mtx.dot(control_gene_weights) if issparse(mtx) else np.dot(mtx, control_gene_weights)
             drug_bins = {}
             for drug in weights.columns:
                 bins = np.unique(obs_cut[targets[drug]])
@@ -178,7 +166,7 @@ class Enrichment:
                      Accepts two forms:
                      - A dictionary with the names of the groups as keys, and the entries being the corresponding gene lists.
                      - A dictionary of dictionaries defined like above, with names of gene group categories as keys.
-                       If passing one of those, specify `nested=True`.
+                     If passing one of those, specify `nested=True`.
             nested: Whether `targets` is a dictionary of dictionaries with group categories as keys.
             categories: If `targets=None` or `nested=True`, this argument can be used to subset the gene groups to one or more categories (keys of the original dictionary).
                         In case of the ChEMBL drug targets, these are ATC level 1/level 2 category codes.
@@ -293,7 +281,7 @@ class Enrichment:
         return enrichment
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_dotplot(
+    def plot_dotplot(  # pragma: no cover # noqa: D417
         self,
         adata: AnnData,
         *,
@@ -304,7 +292,6 @@ class Enrichment:
         groupby: str = None,
         key: str = "pertpy_enrichment",
         ax: Axes | None = None,
-        show: bool = True,
         return_fig: bool = False,
         **kwargs,
     ) -> DotPlot | None:
@@ -342,10 +329,7 @@ class Enrichment:
             .. image:: /_static/docstring_previews/enrichment_dotplot.png
         """
         if categories is not None:
-            if isinstance(categories, str):
-                categories = [categories]
-            else:
-                categories = list(categories)
+            categories = [categories] if isinstance(categories, str) else list(categories)
         if targets is None:
             pt_drug = Drug()
@@ -417,10 +401,9 @@ class Enrichment:
             **kwargs,
         )
-        if show:
-            plt.show()
         if return_fig:
             return fig
+        plt.show()
         return None
     def plot_gsea(

pertpy 0.9.5__py3-none-any.whl → 0.11.0__py3-none-any.whl

pertpy 0.9.5py3-none-any.whl → 0.11.0py3-none-any.whl