PyPI - pertpy - Versions diffs - 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

pertpy 0.10.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pertpy/__init__.py +5 -1
pertpy/_doc.py +1 -3
pertpy/_types.py +6 -0
pertpy/data/_dataloader.py +68 -24
pertpy/data/_datasets.py +9 -9
pertpy/metadata/__init__.py +2 -1
pertpy/metadata/_cell_line.py +133 -25
pertpy/metadata/_look_up.py +13 -19
pertpy/metadata/_moa.py +1 -1
pertpy/preprocessing/_guide_rna.py +138 -44
pertpy/preprocessing/_guide_rna_mixture.py +17 -19
pertpy/tools/__init__.py +1 -1
pertpy/tools/_augur.py +106 -98
pertpy/tools/_cinemaot.py +74 -114
pertpy/tools/_coda/_base_coda.py +129 -145
pertpy/tools/_coda/_sccoda.py +66 -69
pertpy/tools/_coda/_tasccoda.py +71 -79
pertpy/tools/_dialogue.py +48 -40
pertpy/tools/_differential_gene_expression/_base.py +21 -31
pertpy/tools/_differential_gene_expression/_checks.py +4 -6
pertpy/tools/_differential_gene_expression/_dge_comparison.py +5 -6
pertpy/tools/_differential_gene_expression/_edger.py +6 -10
pertpy/tools/_differential_gene_expression/_pydeseq2.py +1 -1
pertpy/tools/_differential_gene_expression/_simple_tests.py +3 -3
pertpy/tools/_differential_gene_expression/_statsmodels.py +8 -5
pertpy/tools/_distances/_distance_tests.py +1 -2
pertpy/tools/_distances/_distances.py +31 -45
pertpy/tools/_enrichment.py +7 -22
pertpy/tools/_milo.py +19 -15
pertpy/tools/_mixscape.py +73 -75
pertpy/tools/_perturbation_space/_clustering.py +4 -4
pertpy/tools/_perturbation_space/_comparison.py +4 -4
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +83 -32
pertpy/tools/_perturbation_space/_perturbation_space.py +10 -10
pertpy/tools/_perturbation_space/_simple.py +12 -14
pertpy/tools/_scgen/_scgen.py +16 -17
pertpy/tools/_scgen/_scgenvae.py +2 -2
pertpy/tools/_scgen/_utils.py +3 -1
{pertpy-0.10.0.dist-info → pertpy-0.11.0.dist-info}/METADATA +36 -20
pertpy-0.11.0.dist-info/RECORD +58 -0
{pertpy-0.10.0.dist-info → pertpy-0.11.0.dist-info}/licenses/LICENSE +1 -0
pertpy/tools/_kernel_pca.py +0 -50
pertpy-0.10.0.dist-info/RECORD +0 -58
{pertpy-0.10.0.dist-info → pertpy-0.11.0.dist-info}/WHEEL +0 -0

pertpy/tools/_dialogue.py CHANGED Viewed

@@ -33,9 +33,17 @@ if TYPE_CHECKING:
 class Dialogue:
-    """Python implementation of DIALOGUE"""
+    """Python implementation of DIALOGUE."""
-    def __init__(self, sample_id: str, celltype_key: str, n_counts_key: str, n_mpcs: int):
+    def __init__(
+        self,
+        sample_id: str,
+        celltype_key: str,
+        n_counts_key: str,
+        n_mpcs: int,
+        feature_space_key: str = "X_pca",
+        n_components: int = 50,
+    ):
         """Constructor for Dialogue.
         Args:
@@ -43,6 +51,8 @@ class Dialogue:
             celltype_key: The key in AnnData.obs which contains the cell type column.
             n_counts_key: The key of the number of counts in Anndata.obs . Also commonly the size factor.
             n_mpcs: Number of PMD components which corresponds to the number of determined MCPs.
+            feature_space_key: The key in adata.obsm for the feature space (e.g., "X_pca", "X_umap").
+            n_components: The number of components of the feature space to use, e.g. PCA components.
         """
         self.sample_id = sample_id
         self.celltype_key = celltype_key
@@ -53,6 +63,8 @@ class Dialogue:
             )
         self.n_counts_key = n_counts_key
         self.n_mcps = n_mpcs
+        self.feature_space_key = feature_space_key
+        self.n_components = n_components
     def _get_pseudobulks(
         self, adata: AnnData, groupby: str, strategy: Literal["median", "mean"] = "median"
@@ -62,6 +74,7 @@ class Dialogue:
         Copied from `https://github.com/schillerlab/sc-toolbox/blob/397e80dc5e8fb8017b75f6c3fa634a1e1213d484/sc_toolbox/tools/__init__.py#L458`
         Args:
+            adata: Annotated data matrix.
             groupby: The key to groupby for pseudobulks
             strategy: The pseudobulking strategy. One of "median" or "mean"
@@ -83,7 +96,9 @@ class Dialogue:
         return pseudobulk
     def _pseudobulk_feature_space(
-        self, adata: AnnData, groupby: str, n_components: int = 50, feature_space_key: str = "X_pca"
+        self,
+        adata: AnnData,
+        groupby: str,
     ) -> pd.DataFrame:
         """Return Cell-averaged components from a passed feature space.
@@ -91,9 +106,8 @@ class Dialogue:
         TODO: DIALOGUE recommends running PCA on each cell type separately before running PMD - this should be implemented as an option here.
         Args:
+            adata: Annotated data matrix.
             groupby: The key to groupby for pseudobulks.
-            n_components: The number of components to use.
-            feature_key: The key in adata.obsm for the feature space (e.g., "X_pca", "X_umap").
         Returns:
             A pseudobulk DataFrame of the averaged components.
@@ -101,7 +115,7 @@ class Dialogue:
         aggr = {}
         for category in adata.obs.loc[:, groupby].cat.categories:
             temp = adata.obs.loc[:, groupby] == category
-            aggr[category] = adata[temp].obsm[feature_space_key][:, :n_components].mean(axis=0)
+            aggr[category] = adata[temp].obsm[self.feature_space_key][:, : self.n_components].mean(axis=0)
         aggr = pd.DataFrame(aggr)
         return aggr
@@ -130,6 +144,7 @@ class Dialogue:
         Args:
             adata: The AnnData object to append mcp scores to.
+            ct_subs: cell type objects.
             mcp_scores: The MCP scores dictionary.
             celltype_key: Key of the cell type column in obs.
@@ -213,7 +228,7 @@ class Dialogue:
         sample_obs: str,
         return_all: bool = False,
     ):
-        """Applies a mixed linear model using the specified formula (MCP scores used for the dependent var) and returns the coefficient and p-value
+        """Applies a mixed linear model using the specified formula (MCP scores used for the dependent var) and returns the coefficient and p-value.
         TODO: reduce runtime? Maybe we can use an approximation or something that isn't statsmodels.
@@ -332,7 +347,7 @@ class Dialogue:
         Args:
             mcp_name: The name of the MCP to model.
-            scores: The MCP scores for a cell type. Number of MCPs x number of features.
+            scores_df: The MCP scores for a cell type. Number of MCPs x number of features.
             ct_data: The AnnData object containing the metadata and labels in obs.
             tme: Transcript mean expression in `x`.
             sig: DataFrame containing a series of up and downregulated MCPs.
@@ -418,11 +433,10 @@ class Dialogue:
         # Finally get corr coeff
         return np.dot(A_mA, B_mB.T) / np.sqrt(np.dot(ssA[:, None], ssB[None]))
+    # TODO: needs check for correctness and variable renaming
+    # TODO: Confirm that this doesn't return duplicate gene names.
     def _get_top_elements(self, m: pd.DataFrame, max_length: int, min_threshold: float):
-        """
-        TODO: needs check for correctness and variable renaming
-        TODO: Confirm that this doesn't return duplicate gene names
+        """Get top elements.
         Args:
             m: Any DataFrame of Gene name as index with variable columns.
@@ -457,12 +471,11 @@ class Dialogue:
         # TODO this whole function should be standalone
         # It will contain the calculation of up/down + calculation (new final mcp scores)
         # Ensure that it'll still fit/work with the hierarchical multilevel_modeling
         """Determine the up and down genes per MCP."""
         # TODO: something is slightly slow here
         cca_sig_results: dict[Any, dict[str, Any]] = {}
         new_mcp_scores: dict[Any, list[Any]] = {}
-        for ct in ct_subs.keys():
+        for ct in ct_subs:
             ct_adata = ct_subs[ct]
             conf_m = ct_adata.obs[n_counts_key].values
@@ -483,9 +496,7 @@ class Dialogue:
             from scipy.stats import spearmanr
             def _pcor_mat(v1, v2, v3, method="spearman"):
-                """
-                MAJOR TODO: I've only used normal correlation instead of partial correlation as we wait on the implementation
-                """
+                """MAJOR TODO: I've only used normal correlation instead of partial correlation as we wait on the implementation."""
                 correlations = []  # R
                 pvals = []  # P
                 for x2 in v2:
@@ -506,7 +517,7 @@ class Dialogue:
                 return np.array(correlations), np.array(pvals)  # pvals_adjusted
             C1, P1 = _pcor_mat(ct_adata[:, top_cor_genes_flattened].X.toarray().T, mcp_scores[ct].T, conf_m)
-            C1[P1 > (0.05 / ct_adata.shape[1])] = 0  # why?
+            C1[(0.05 / ct_adata.shape[1]) < P1] = 0  # why?
             cca_sig_unformatted = self._get_top_elements(  # 3 up, 3 dn, for each mcp
                 pd.DataFrame(C1.T, index=top_cor_genes_flattened), max_length=max_genes, min_threshold=0.05
@@ -514,7 +525,7 @@ class Dialogue:
             # TODO: probably format the up and down within get_top_elements
             cca_sig: dict[str, Any] = defaultdict(dict)
-            for i in range(0, int(len(cca_sig_unformatted) / 2)):
+            for i in range(int(len(cca_sig_unformatted) / 2)):
                 cca_sig[f"MCP{i}"]["up"] = cca_sig_unformatted[i * 2]
                 cca_sig[f"MCP{i}"]["down"] = cca_sig_unformatted[i * 2 + 1]
@@ -523,7 +534,7 @@ class Dialogue:
             # This is basically DIALOGUE 3 now
             pre_r_scores = {
-                ct: ct_subs[ct].obsm["X_pca"][:, :50] @ ws_dict[ct]
+                ct: ct_subs[ct].obsm[self.feature_space_key][:, : self.n_components] @ ws_dict[ct]
                 for i, ct in enumerate(ct_subs.keys())
                 # TODO This is a recalculation and not a new calculation
             }
@@ -591,8 +602,8 @@ class Dialogue:
     def calculate_multifactor_PMD(
         self,
         adata: AnnData,
-        penalties: list[int] = None,
-        ct_order: list[str] = None,
+        penalties: list[int] | None = None,
+        ct_order: list[str] | None = None,
         agg_feature: bool = True,
         solver: Literal["lp", "bs"] = "bs",
         normalize: bool = True,
@@ -603,10 +614,9 @@ class Dialogue:
         Args:
             adata: AnnData object to calculate PMD for.
-            sample_id: Key to use for pseudobulk determination.
             penalties: PMD penalties.
             ct_order: The order of cell types.
-            agg_features: Whether to calculate cell-averaged principal components.
+            agg_feature: Whether to calculate cell-averaged principal components.
             solver: Which solver to use for PMD. Must be one of "lp" (linear programming) or "bs" (binary search).
                     For differences between these to please refer to https://github.com/theislab/sparsecca/blob/main/examples/linear_programming_multicca.ipynb
             normalize: Whether to mimic DIALOGUE as close as possible
@@ -644,8 +654,6 @@ class Dialogue:
                     raise ValueError("Please ensure that every cell type is represented in every sample.") from e
                 else:
                     raise
-        else:
-            penalties = penalties
         if solver == "bs":
             ws, _ = multicca_pmd(mcca_in, penalties, K=self.n_mcps, standardize=True, niter=100, mimic_R=normalize)
@@ -656,8 +664,8 @@ class Dialogue:
         ws_dict = {ct: ws[i] for i, ct in enumerate(ct_order)}
         pre_r_scores = {
-            ct: ct_subs[ct].obsm["X_pca"][:, :50] @ ws[i]
-            for i, ct in enumerate(cell_types)  # TODO change from 50
+            ct: ct_subs[ct].obsm[self.feature_space_key][:, : self.n_components] @ ws[i]
+            for i, ct in enumerate(cell_types)
         }
         # TODO: output format needs some cleanup, even though each MCP score is matched to one cell, it's not at all
@@ -681,17 +689,17 @@ class Dialogue:
         ws_dict: dict,
         confounder: str | None,
         formula: str = None,
-    ):
+    ) -> pd.DataFrame:
         """Runs the multilevel modeling step to match genes to MCPs and generate p-values for MCPs.
         Args:
             ct_subs: The DIALOGUE cell type objects.
             mcp_scores: The determined MCP scores from the PMD step.
+            ws_dict: WS dictionary.
             confounder: Any modeling confounders.
             formula: The hierarchical modeling formula. Defaults to y ~ x + n_counts.
         Returns:
-            A Pandas DataFrame containing:
             - for each mcp: HLM_result_1, HLM_result_2, sig_genes_1, sig_genes_2
             - merged HLM_result_1, HLM_result_2, sig_genes_1, sig_genes_2 of all mcps
@@ -875,15 +883,15 @@ class Dialogue:
             if len(conditions_compare) != 2:
                 raise ValueError("Please specify conditions to compare or supply an object with only 2 conditions")
-        pvals = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(0, n_mcps)])
-        tstats = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(0, n_mcps)])
-        pvals_adj = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(0, n_mcps)])
+        pvals = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
+        tstats = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
+        pvals_adj = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
         response = adata.obs.groupby(sample_label)[condition_label].agg(pd.Series.mode)
         for celltype in adata.obs[celltype_label].unique():
             df = adata.obs[adata.obs[celltype_label] == celltype]
-            for mcpnum in ["mcp_" + str(n) for n in range(0, n_mcps)]:
+            for mcpnum in ["mcp_" + str(n) for n in range(n_mcps)]:
                 mns = df.groupby(sample_label)[mcpnum].mean()
                 mns = pd.concat([mns, response], axis=1)
                 res = stats.ttest_ind(
@@ -893,7 +901,7 @@ class Dialogue:
                 pvals.loc[celltype, mcpnum] = res[1]
                 tstats.loc[celltype, mcpnum] = res[0]
-        for mcpnum in ["mcp_" + str(n) for n in range(0, n_mcps)]:
+        for mcpnum in ["mcp_" + str(n) for n in range(n_mcps)]:
             pvals_adj[mcpnum] = multipletests(pvals[mcpnum], method="fdr_bh")[1]
         return {"pvals": pvals, "tstats": tstats, "pvals_adj": pvals_adj}
@@ -956,7 +964,7 @@ class Dialogue:
         genes_dict_up = {}  # type: ignore
         genes_dict_down = {}  # type: ignore
-        for celltype2 in mcp_dict.keys():
+        for celltype2 in mcp_dict:
             for gene in mcp_dict[celltype2][MCP + ".up"]:
                 if gene in genes_dict_up:
                     genes_dict_up[gene] += 1
@@ -1008,7 +1016,7 @@ class Dialogue:
             >>> genes_results = _get_extrema_MCP_genes_single(ct_subs, mcp="mcp_4", fraction=0.2)
         """
         genes = {}
-        for ct in ct_subs.keys():
+        for ct in ct_subs:
             mini = ct_subs[ct]
             mini.obs["extrema"] = pd.qcut(
                 mini.obs[mcp],
@@ -1056,13 +1064,13 @@ class Dialogue:
         for mcp in mcps:
             rank_dfs[mcp] = {}
             ct_ranked = self._get_extrema_MCP_genes_single(ct_subs, mcp=mcp, fraction=fraction)
-            for celltype in ct_ranked.keys():
+            for celltype in ct_ranked:
                 rank_dfs[mcp][celltype] = sc.get.rank_genes_groups_df(ct_ranked[celltype], group=None)
         return rank_dfs
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_split_violins(
+    def plot_split_violins(  # pragma: no cover # noqa: D417
         self,
         adata: AnnData,
         split_key: str,
@@ -1115,7 +1123,7 @@ class Dialogue:
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_pairplot(
+    def plot_pairplot(  # pragma: no cover # noqa: D417
         self,
         adata: AnnData,
         celltype_key: str,

pertpy/tools/_differential_gene_expression/_base.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import contextlib
 import math
 from abc import ABC, abstractmethod
 from collections.abc import Iterable, Mapping, Sequence
@@ -23,8 +24,7 @@ from pertpy.tools._differential_gene_expression._checks import check_is_numeric_
 class MethodBase(ABC):
     def __init__(self, adata, *, mask=None, layer=None, **kwargs):
-        """
-        Initialize the method.
+        """Initialize the method.
         Args:
             adata: AnnData object, usually pseudobulked.
@@ -62,8 +62,7 @@ class MethodBase(ABC):
         fit_kwargs=MappingProxyType({}),
         test_kwargs=MappingProxyType({}),
     ):
-        """
-        Compare between groups in a specified column.
+        """Compare between groups in a specified column.
         Args:
             adata: AnnData object.
@@ -100,7 +99,7 @@ class MethodBase(ABC):
         ...
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_volcano(
+    def plot_volcano(  # pragma: no cover # noqa: D417
         self,
         data: pd.DataFrame | ad.AnnData,
         *,
@@ -188,8 +187,7 @@ class MethodBase(ABC):
             colors = ["gray", "#D62728", "#1F77B4"]
         def _pval_reciprocal(lfc: float) -> float:
-            """
-            Function for relating -log10(pvalue) and logfoldchange in a reciprocal.
+            """Function for relating -log10(pvalue) and logfoldchange in a reciprocal.
             Used for plotting the S-curve
             """
@@ -197,7 +195,7 @@ class MethodBase(ABC):
         def _map_shape(symbol: str) -> str:
             if shape_dict is not None:
-                for k in shape_dict.keys():
+                for k in shape_dict:
                     if shape_dict[k] is not None and symbol in shape_dict[k]:
                         return k
             return "other"
@@ -211,8 +209,7 @@ class MethodBase(ABC):
             pval_thresh: float = None,
             s_curve: bool = False,
         ) -> str:
-            """
-            Map genes to categorize based on log2fc and pvalue.
+            """Map genes to categorize based on log2fc and pvalue.
             These categories are used for coloring the dots.
             Used when no color_dict is passed, sets up/down/nonsignificant.
@@ -229,14 +226,13 @@ class MethodBase(ABC):
                     return "Down"
                 else:
                     return "not DE"
+            # Standard condition for Up or Down categorization
+            elif log2fc > log2fc_thresh and nlog10 > pval_thresh:
+                return "Up"
+            elif log2fc < -log2fc_thresh and nlog10 > pval_thresh:
+                return "Down"
             else:
-                # Standard condition for Up or Down categorization
-                if log2fc > log2fc_thresh and nlog10 > pval_thresh:
-                    return "Up"
-                elif log2fc < -log2fc_thresh and nlog10 > pval_thresh:
-                    return "Down"
-                else:
-                    return "not DE"
+                return "not DE"
         def _map_genes_categories_highlight(
             row: pd.Series,
@@ -247,8 +243,7 @@ class MethodBase(ABC):
             s_curve: bool = False,
             symbol_col: str = None,
         ) -> str:
-            """
-            Map genes to categorize based on log2fc and pvalue.
+            """Map genes to categorize based on log2fc and pvalue.
             These categories are used for coloring the dots.
             Used when color_dict is passed, sets DE / not DE for background and user supplied highlight genes.
@@ -258,7 +253,7 @@ class MethodBase(ABC):
             symbol = row[symbol_col]
             if color_dict is not None:
-                for k in color_dict.keys():
+                for k in color_dict:
                     if symbol in color_dict[k]:
                         return k
@@ -489,7 +484,7 @@ class MethodBase(ABC):
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_paired(
+    def plot_paired(  # pragma: no cover # noqa: D417
         self,
         adata: ad.AnnData,
         results_df: pd.DataFrame,
@@ -581,14 +576,9 @@ class MethodBase(ABC):
                 adata, target_col=groupby, groups_col=pairedby, layer_key=layer, mode="sum", min_cells=1, min_counts=1
             )
-        if layer is not None:
-            X = adata.layers[layer]
-        else:
-            X = adata.X
-        try:
+        X = adata.layers[layer] if layer is not None else adata.X
+        with contextlib.suppress(AttributeError):
             X = X.toarray()
-        except AttributeError:
-            pass
         groupby_cols = [pairedby, groupby]
         df = adata.obs.loc[:, groupby_cols].join(pd.DataFrame(X, index=adata.obs_names, columns=var_names))
@@ -682,7 +672,7 @@ class MethodBase(ABC):
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_fold_change(
+    def plot_fold_change(  # pragma: no cover # noqa: D417
         self,
         results_df: pd.DataFrame,
         *,
@@ -763,7 +753,7 @@ class MethodBase(ABC):
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_multicomparison_fc(
+    def plot_multicomparison_fc(  # pragma: no cover # noqa: D417
         self,
         results_df: pd.DataFrame,
         *,
@@ -1013,7 +1003,7 @@ class LinearModelBase(MethodBase):
             )
         return self.formulaic_contrasts.cond(**kwargs)
-    def contrast(self, *args, **kwargs):
+    def contrast(self, *args, **kwargs):  # noqa: D417
         """Build a simple contrast for pairwise comparisons.
         Args:

pertpy/tools/_differential_gene_expression/_checks.py CHANGED Viewed

@@ -16,9 +16,8 @@ def check_is_numeric_matrix(array: np.ndarray | spmatrix) -> None:
     if issparse(array):
         if np.any(~np.isfinite(array.data)):
             raise ValueError("Counts cannot contain negative, NaN or Inf values.")
-    else:
-        if np.any(~np.isfinite(array)):
-            raise ValueError("Counts cannot contain negative, NaN or Inf values.")
+    elif np.any(~np.isfinite(array)):
+        raise ValueError("Counts cannot contain negative, NaN or Inf values.")
 def check_is_integer_matrix(array: np.ndarray | spmatrix, tolerance: float = 1e-6) -> None:
@@ -34,8 +33,7 @@ def check_is_integer_matrix(array: np.ndarray | spmatrix, tolerance: float = 1e-
     if issparse(array):
         if not array.data.dtype.kind == "i" and not np.all(np.abs(array.data - np.round(array.data)) < tolerance):
             raise ValueError("Non-zero elements of the matrix must be close to integer values.")
-    else:
-        if not array.dtype.kind == "i" and not np.all(np.abs(array - np.round(array)) < tolerance):
-            raise ValueError("Matrix must be a count matrix.")
+    elif array.dtype.kind != "i" and not np.all(np.abs(array - np.round(array)) < tolerance):
+        raise ValueError("Matrix must be a count matrix.")
     if (array < 0).sum() > 0:
         raise ValueError("Non-zero elements of the matrix must be positive.")

pertpy/tools/_differential_gene_expression/_dge_comparison.py CHANGED Viewed

@@ -36,16 +36,15 @@ class DGEEVAL:
             if not de_key1 or not de_key2:
                 raise ValueError("Both `de_key1` and `de_key2` must be provided together if using `adata`.")
-        else:  # use dfs
-            if de_df1 is None or de_df2 is None:
-                raise ValueError("Both `de_df1` and `de_df2` must be provided together if using DataFrames.")
+        elif de_df1 is None or de_df2 is None:
+            raise ValueError("Both `de_df1` and `de_df2` must be provided together if using DataFrames.")
         if de_key1:
             if not adata:
                 raise ValueError("`adata` should be provided with `de_key1` and `de_key2`. ")
-            assert all(
-                k in adata.uns for k in [de_key1, de_key2]
-            ), "Provided `de_key1` and `de_key2` must exist in `adata.uns`."
+            assert all(k in adata.uns for k in [de_key1, de_key2]), (
+                "Provided `de_key1` and `de_key2` must exist in `adata.uns`."
+            )
             vars = adata.var_names
         if de_df1 is not None:

pertpy/tools/_differential_gene_expression/_edger.py CHANGED Viewed

@@ -10,7 +10,7 @@ from ._checks import check_is_integer_matrix
 class EdgeR(LinearModelBase):
-    """Differential expression test using EdgeR"""
+    """Differential expression test using EdgeR."""
     def _check_counts(self):
         check_is_integer_matrix(self.data)
@@ -39,17 +39,13 @@ class EdgeR(LinearModelBase):
             edger = importr("edgeR")
         except ImportError as e:
             raise ImportError(
-                "edgeR requires a valid R installation with the following packages:\n"
-                "edgeR, BiocParallel, RhpcBLASctl"
+                "edgeR requires a valid R installation with the following packages:\nedgeR, BiocParallel, RhpcBLASctl"
             ) from e
         # Convert dataframe
         with localconverter(get_conversion() + numpy2ri.converter):
             expr = self.adata.X if self.layer is None else self.adata.layers[self.layer]
-            if issparse(expr):
-                expr = expr.T.toarray()
-            else:
-                expr = expr.T
+            expr = expr.T.toarray() if issparse(expr) else expr.T
         with localconverter(get_conversion() + pandas2ri.converter):
             expr_r = ro.conversion.py2rpy(pd.DataFrame(expr, index=self.adata.var_names, columns=self.adata.obs_names))
@@ -72,8 +68,8 @@ class EdgeR(LinearModelBase):
         ro.globalenv["fit"] = fit
         self.fit = fit
-    def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> pd.DataFrame:
-        """Conduct test for each contrast and return a data frame
+    def _test_single_contrast(self, contrast: Sequence[float], **kwargs) -> pd.DataFrame:  # noqa: D417
+        """Conduct test for each contrast and return a data frame.
         Args:
             contrast: numpy array of integars indicating contrast i.e. [-1, 0, 1, 0, 0]
@@ -100,7 +96,7 @@ class EdgeR(LinearModelBase):
             importr("edgeR")
         except ImportError:
             raise ImportError(
-                "edgeR requires a valid R installation with the following packages: " "edgeR, BiocParallel, RhpcBLASctl"
+                "edgeR requires a valid R installation with the following packages: edgeR, BiocParallel, RhpcBLASctl"
             ) from None
         # Convert vector to R, which drops a category like `self.design_matrix` to use the intercept for the left out.

pertpy/tools/_differential_gene_expression/_pydeseq2.py CHANGED Viewed

@@ -16,7 +16,7 @@ from ._checks import check_is_integer_matrix
 class PyDESeq2(LinearModelBase):
-    """Differential expression test using a PyDESeq2"""
+    """Differential expression test using a PyDESeq2."""
     def __init__(
         self, adata: AnnData, design: str | ndarray, *, mask: str | None = None, layer: str | None = None, **kwargs

pertpy/tools/_differential_gene_expression/_simple_tests.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Simple tests such as t-test, wilcoxon"""
+"""Simple tests such as t-test, wilcoxon."""
 import warnings
 from abc import abstractmethod
@@ -10,7 +10,7 @@ import pandas as pd
 import scipy.stats
 import statsmodels
 from anndata import AnnData
-from pandas.core.api import DataFrame as DataFrame
+from pandas.core.api import DataFrame
 from scipy.sparse import diags, issparse
 from tqdm.auto import tqdm
@@ -152,7 +152,7 @@ class WilcoxonTest(SimpleComparisonBase):
 class TTest(SimpleComparisonBase):
-    """Perform a unpaired or paired T-test"""
+    """Perform a unpaired or paired T-test."""
     @staticmethod
     def _test(x0: np.ndarray, x1: np.ndarray, paired: bool, **kwargs) -> float:

pertpy/tools/_differential_gene_expression/_statsmodels.py CHANGED Viewed

@@ -6,14 +6,14 @@ import statsmodels.api as sm
 from tqdm.auto import tqdm
 from ._base import LinearModelBase
-from ._checks import check_is_integer_matrix
+from ._checks import check_is_numeric_matrix
 class Statsmodels(LinearModelBase):
-    """Differential expression test using a statsmodels linear regression"""
+    """Differential expression test using a statsmodels linear regression."""
     def _check_counts(self):
-        check_is_integer_matrix(self.data)
+        check_is_numeric_matrix(self.data)
     def fit(
         self,
@@ -55,7 +55,10 @@ class Statsmodels(LinearModelBase):
                     "t_value": t_test.tvalue.item(),
                     "sd": t_test.sd.item(),
                     "log_fc": t_test.effect.item(),
-                    "adj_p_value": statsmodels.stats.multitest.fdrcorrection(np.array([t_test.pvalue]))[1].item(),
                 }
             )
-        return pd.DataFrame(res).sort_values("p_value")
+        return (
+            pd.DataFrame(res)
+            .sort_values("p_value")
+            .assign(adj_p_value=lambda x: statsmodels.stats.multitest.fdrcorrection(x["p_value"])[1])
+        )

pertpy/tools/_distances/_distance_tests.py CHANGED Viewed

@@ -83,8 +83,7 @@ class DistanceTest:
         contrast: str,
         show_progressbar: bool = True,
     ) -> pd.DataFrame:
-        """Run a permutation test using the specified distance metric, testing
-        all groups of cells against a specified contrast group ("control").
+        """Run a permutation test using the specified distance metric, testing all groups of cells against a specified contrast group ("control").
         Args:
             adata: Annotated data matrix.

pertpy 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

pertpy 0.10.0py3-none-any.whl → 0.11.0py3-none-any.whl