PyPI - pertpy - Versions diffs - 0.9.5__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

pertpy 0.9.5py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pertpy/__init__.py +5 -1
pertpy/_doc.py +2 -5
pertpy/_types.py +6 -0
pertpy/data/_dataloader.py +68 -24
pertpy/data/_datasets.py +9 -9
pertpy/metadata/__init__.py +2 -1
pertpy/metadata/_cell_line.py +136 -30
pertpy/metadata/_look_up.py +13 -19
pertpy/metadata/_moa.py +1 -1
pertpy/preprocessing/_guide_rna.py +221 -39
pertpy/preprocessing/_guide_rna_mixture.py +177 -0
pertpy/tools/__init__.py +1 -1
pertpy/tools/_augur.py +138 -142
pertpy/tools/_cinemaot.py +75 -117
pertpy/tools/_coda/_base_coda.py +150 -174
pertpy/tools/_coda/_sccoda.py +66 -69
pertpy/tools/_coda/_tasccoda.py +71 -79
pertpy/tools/_dialogue.py +60 -56
pertpy/tools/_differential_gene_expression/_base.py +25 -43
pertpy/tools/_differential_gene_expression/_checks.py +4 -6
pertpy/tools/_differential_gene_expression/_dge_comparison.py +5 -6
pertpy/tools/_differential_gene_expression/_edger.py +6 -10
pertpy/tools/_differential_gene_expression/_pydeseq2.py +1 -1
pertpy/tools/_differential_gene_expression/_simple_tests.py +3 -3
pertpy/tools/_differential_gene_expression/_statsmodels.py +8 -5
pertpy/tools/_distances/_distance_tests.py +1 -2
pertpy/tools/_distances/_distances.py +86 -92
pertpy/tools/_enrichment.py +8 -25
pertpy/tools/_milo.py +23 -27
pertpy/tools/_mixscape.py +261 -175
pertpy/tools/_perturbation_space/_clustering.py +4 -4
pertpy/tools/_perturbation_space/_comparison.py +4 -4
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +83 -32
pertpy/tools/_perturbation_space/_perturbation_space.py +10 -10
pertpy/tools/_perturbation_space/_simple.py +13 -17
pertpy/tools/_scgen/_scgen.py +17 -20
pertpy/tools/_scgen/_scgenvae.py +2 -2
pertpy/tools/_scgen/_utils.py +3 -1
{pertpy-0.9.5.dist-info → pertpy-0.11.0.dist-info}/METADATA +37 -21
pertpy-0.11.0.dist-info/RECORD +58 -0
{pertpy-0.9.5.dist-info → pertpy-0.11.0.dist-info}/licenses/LICENSE +1 -0
pertpy/tools/_kernel_pca.py +0 -50
pertpy-0.9.5.dist-info/RECORD +0 -57
{pertpy-0.9.5.dist-info → pertpy-0.11.0.dist-info}/WHEEL +0 -0

pertpy/tools/_dialogue.py CHANGED Viewed

@@ -33,9 +33,17 @@ if TYPE_CHECKING:
 class Dialogue:
-    """Python implementation of DIALOGUE"""
+    """Python implementation of DIALOGUE."""
-    def __init__(self, sample_id: str, celltype_key: str, n_counts_key: str, n_mpcs: int):
+    def __init__(
+        self,
+        sample_id: str,
+        celltype_key: str,
+        n_counts_key: str,
+        n_mpcs: int,
+        feature_space_key: str = "X_pca",
+        n_components: int = 50,
+    ):
         """Constructor for Dialogue.
         Args:
@@ -43,6 +51,8 @@ class Dialogue:
             celltype_key: The key in AnnData.obs which contains the cell type column.
             n_counts_key: The key of the number of counts in Anndata.obs . Also commonly the size factor.
             n_mpcs: Number of PMD components which corresponds to the number of determined MCPs.
+            feature_space_key: The key in adata.obsm for the feature space (e.g., "X_pca", "X_umap").
+            n_components: The number of components of the feature space to use, e.g. PCA components.
         """
         self.sample_id = sample_id
         self.celltype_key = celltype_key
@@ -53,6 +63,8 @@ class Dialogue:
             )
         self.n_counts_key = n_counts_key
         self.n_mcps = n_mpcs
+        self.feature_space_key = feature_space_key
+        self.n_components = n_components
     def _get_pseudobulks(
         self, adata: AnnData, groupby: str, strategy: Literal["median", "mean"] = "median"
@@ -62,6 +74,7 @@ class Dialogue:
         Copied from `https://github.com/schillerlab/sc-toolbox/blob/397e80dc5e8fb8017b75f6c3fa634a1e1213d484/sc_toolbox/tools/__init__.py#L458`
         Args:
+            adata: Annotated data matrix.
             groupby: The key to groupby for pseudobulks
             strategy: The pseudobulking strategy. One of "median" or "mean"
@@ -82,27 +95,28 @@ class Dialogue:
         return pseudobulk
-    def _pseudobulk_pca(self, adata: AnnData, groupby: str, n_components: int = 50) -> pd.DataFrame:
-        """Return cell-averaged PCA components.
+    def _pseudobulk_feature_space(
+        self,
+        adata: AnnData,
+        groupby: str,
+    ) -> pd.DataFrame:
+        """Return Cell-averaged components from a passed feature space.
         TODO: consider merging with `get_pseudobulks`
         TODO: DIALOGUE recommends running PCA on each cell type separately before running PMD - this should be implemented as an option here.
         Args:
-            groupby: The key to groupby for pseudobulks
-            n_components: The number of PCA components
+            adata: Annotated data matrix.
+            groupby: The key to groupby for pseudobulks.
         Returns:
-            A pseudobulk of PCA components.
+            A pseudobulk DataFrame of the averaged components.
         """
         aggr = {}
         for category in adata.obs.loc[:, groupby].cat.categories:
             temp = adata.obs.loc[:, groupby] == category
-            aggr[category] = adata[temp].obsm["X_pca"][:, :n_components].mean(axis=0)
+            aggr[category] = adata[temp].obsm[self.feature_space_key][:, : self.n_components].mean(axis=0)
         aggr = pd.DataFrame(aggr)
         return aggr
     def _scale_data(self, pseudobulks: pd.DataFrame, normalize: bool = True) -> np.ndarray:
@@ -130,6 +144,7 @@ class Dialogue:
         Args:
             adata: The AnnData object to append mcp scores to.
+            ct_subs: cell type objects.
             mcp_scores: The MCP scores dictionary.
             celltype_key: Key of the cell type column in obs.
@@ -213,7 +228,7 @@ class Dialogue:
         sample_obs: str,
         return_all: bool = False,
     ):
-        """Applies a mixed linear model using the specified formula (MCP scores used for the dependent var) and returns the coefficient and p-value
+        """Applies a mixed linear model using the specified formula (MCP scores used for the dependent var) and returns the coefficient and p-value.
         TODO: reduce runtime? Maybe we can use an approximation or something that isn't statsmodels.
@@ -332,7 +347,7 @@ class Dialogue:
         Args:
             mcp_name: The name of the MCP to model.
-            scores: The MCP scores for a cell type. Number of MCPs x number of features.
+            scores_df: The MCP scores for a cell type. Number of MCPs x number of features.
             ct_data: The AnnData object containing the metadata and labels in obs.
             tme: Transcript mean expression in `x`.
             sig: DataFrame containing a series of up and downregulated MCPs.
@@ -418,11 +433,10 @@ class Dialogue:
         # Finally get corr coeff
         return np.dot(A_mA, B_mB.T) / np.sqrt(np.dot(ssA[:, None], ssB[None]))
+    # TODO: needs check for correctness and variable renaming
+    # TODO: Confirm that this doesn't return duplicate gene names.
     def _get_top_elements(self, m: pd.DataFrame, max_length: int, min_threshold: float):
-        """
-        TODO: needs check for correctness and variable renaming
-        TODO: Confirm that this doesn't return duplicate gene names
+        """Get top elements.
         Args:
             m: Any DataFrame of Gene name as index with variable columns.
@@ -457,12 +471,11 @@ class Dialogue:
         # TODO this whole function should be standalone
         # It will contain the calculation of up/down + calculation (new final mcp scores)
         # Ensure that it'll still fit/work with the hierarchical multilevel_modeling
         """Determine the up and down genes per MCP."""
         # TODO: something is slightly slow here
         cca_sig_results: dict[Any, dict[str, Any]] = {}
         new_mcp_scores: dict[Any, list[Any]] = {}
-        for ct in ct_subs.keys():
+        for ct in ct_subs:
             ct_adata = ct_subs[ct]
             conf_m = ct_adata.obs[n_counts_key].values
@@ -483,9 +496,7 @@ class Dialogue:
             from scipy.stats import spearmanr
             def _pcor_mat(v1, v2, v3, method="spearman"):
-                """
-                MAJOR TODO: I've only used normal correlation instead of partial correlation as we wait on the implementation
-                """
+                """MAJOR TODO: I've only used normal correlation instead of partial correlation as we wait on the implementation."""
                 correlations = []  # R
                 pvals = []  # P
                 for x2 in v2:
@@ -506,7 +517,7 @@ class Dialogue:
                 return np.array(correlations), np.array(pvals)  # pvals_adjusted
             C1, P1 = _pcor_mat(ct_adata[:, top_cor_genes_flattened].X.toarray().T, mcp_scores[ct].T, conf_m)
-            C1[P1 > (0.05 / ct_adata.shape[1])] = 0  # why?
+            C1[(0.05 / ct_adata.shape[1]) < P1] = 0  # why?
             cca_sig_unformatted = self._get_top_elements(  # 3 up, 3 dn, for each mcp
                 pd.DataFrame(C1.T, index=top_cor_genes_flattened), max_length=max_genes, min_threshold=0.05
@@ -514,7 +525,7 @@ class Dialogue:
             # TODO: probably format the up and down within get_top_elements
             cca_sig: dict[str, Any] = defaultdict(dict)
-            for i in range(0, int(len(cca_sig_unformatted) / 2)):
+            for i in range(int(len(cca_sig_unformatted) / 2)):
                 cca_sig[f"MCP{i}"]["up"] = cca_sig_unformatted[i * 2]
                 cca_sig[f"MCP{i}"]["down"] = cca_sig_unformatted[i * 2 + 1]
@@ -523,7 +534,7 @@ class Dialogue:
             # This is basically DIALOGUE 3 now
             pre_r_scores = {
-                ct: ct_subs[ct].obsm["X_pca"][:, :50] @ ws_dict[ct]
+                ct: ct_subs[ct].obsm[self.feature_space_key][:, : self.n_components] @ ws_dict[ct]
                 for i, ct in enumerate(ct_subs.keys())
                 # TODO This is a recalculation and not a new calculation
             }
@@ -558,7 +569,7 @@ class Dialogue:
         self,
         adata: AnnData,
         ct_order: list[str],
-        agg_pca: bool = True,
+        agg_feature: bool = True,
         normalize: bool = True,
     ) -> tuple[list, dict]:
         """Separates cell into AnnDatas by celltype_key and creates the multifactor PMD input.
@@ -568,14 +579,14 @@ class Dialogue:
         Args:
             adata: AnnData object generate celltype objects for
             ct_order: The order of cell types
-            agg_pca: Whether to aggregate pseudobulks with PCA or not.
+            agg_feature: Whether to aggregate pseudobulks with some embeddings or not.
             normalize: Whether to mimic DIALOGUE behavior or not.
         Returns:
             A celltype_label:array dictionary.
         """
         ct_subs = {ct: adata[adata.obs[self.celltype_key] == ct].copy() for ct in ct_order}
-        fn = self._pseudobulk_pca if agg_pca else self._get_pseudobulks
+        fn = self._pseudobulk_feature_space if agg_feature else self._get_pseudobulks
         ct_aggr = {ct: fn(ad, self.sample_id) for ct, ad in ct_subs.items()}  # type: ignore
         # TODO: implement check (as in https://github.com/livnatje/DIALOGUE/blob/55da9be0a9bf2fcd360d9e11f63e30d041ec4318/R/DIALOGUE.main.R#L114-L119)
@@ -591,9 +602,9 @@ class Dialogue:
     def calculate_multifactor_PMD(
         self,
         adata: AnnData,
-        penalties: list[int] = None,
-        ct_order: list[str] = None,
-        agg_pca: bool = True,
+        penalties: list[int] | None = None,
+        ct_order: list[str] | None = None,
+        agg_feature: bool = True,
         solver: Literal["lp", "bs"] = "bs",
         normalize: bool = True,
     ) -> tuple[AnnData, dict[str, np.ndarray], dict[Any, Any], dict[Any, Any]]:
@@ -603,10 +614,9 @@ class Dialogue:
         Args:
             adata: AnnData object to calculate PMD for.
-            sample_id: Key to use for pseudobulk determination.
             penalties: PMD penalties.
             ct_order: The order of cell types.
-            agg_pca: Whether to calculate cell-averaged PCA components.
+            agg_feature: Whether to calculate cell-averaged principal components.
             solver: Which solver to use for PMD. Must be one of "lp" (linear programming) or "bs" (binary search).
                     For differences between these to please refer to https://github.com/theislab/sparsecca/blob/main/examples/linear_programming_multicca.ipynb
             normalize: Whether to mimic DIALOGUE as close as possible
@@ -631,7 +641,7 @@ class Dialogue:
         else:
             ct_order = cell_types = adata.obs[self.celltype_key].astype("category").cat.categories
-        mcca_in, ct_subs = self._load(adata, ct_order=cell_types, agg_pca=agg_pca, normalize=normalize)
+        mcca_in, ct_subs = self._load(adata, ct_order=cell_types, agg_feature=agg_feature, normalize=normalize)
         n_samples = mcca_in[0].shape[1]
         if penalties is None:
@@ -644,8 +654,6 @@ class Dialogue:
                     raise ValueError("Please ensure that every cell type is represented in every sample.") from e
                 else:
                     raise
-        else:
-            penalties = penalties
         if solver == "bs":
             ws, _ = multicca_pmd(mcca_in, penalties, K=self.n_mcps, standardize=True, niter=100, mimic_R=normalize)
@@ -656,8 +664,8 @@ class Dialogue:
         ws_dict = {ct: ws[i] for i, ct in enumerate(ct_order)}
         pre_r_scores = {
-            ct: ct_subs[ct].obsm["X_pca"][:, :50] @ ws[i]
-            for i, ct in enumerate(cell_types)  # TODO change from 50
+            ct: ct_subs[ct].obsm[self.feature_space_key][:, : self.n_components] @ ws[i]
+            for i, ct in enumerate(cell_types)
         }
         # TODO: output format needs some cleanup, even though each MCP score is matched to one cell, it's not at all
@@ -681,17 +689,17 @@ class Dialogue:
         ws_dict: dict,
         confounder: str | None,
         formula: str = None,
-    ):
+    ) -> pd.DataFrame:
         """Runs the multilevel modeling step to match genes to MCPs and generate p-values for MCPs.
         Args:
             ct_subs: The DIALOGUE cell type objects.
             mcp_scores: The determined MCP scores from the PMD step.
+            ws_dict: WS dictionary.
             confounder: Any modeling confounders.
             formula: The hierarchical modeling formula. Defaults to y ~ x + n_counts.
         Returns:
-            A Pandas DataFrame containing:
             - for each mcp: HLM_result_1, HLM_result_2, sig_genes_1, sig_genes_2
             - merged HLM_result_1, HLM_result_2, sig_genes_1, sig_genes_2 of all mcps
@@ -875,15 +883,15 @@ class Dialogue:
             if len(conditions_compare) != 2:
                 raise ValueError("Please specify conditions to compare or supply an object with only 2 conditions")
-        pvals = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(0, n_mcps)])
-        tstats = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(0, n_mcps)])
-        pvals_adj = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(0, n_mcps)])
+        pvals = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
+        tstats = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
+        pvals_adj = pd.DataFrame(1, adata.obs[celltype_label].unique(), ["mcp_" + str(n) for n in range(n_mcps)])
         response = adata.obs.groupby(sample_label)[condition_label].agg(pd.Series.mode)
         for celltype in adata.obs[celltype_label].unique():
             df = adata.obs[adata.obs[celltype_label] == celltype]
-            for mcpnum in ["mcp_" + str(n) for n in range(0, n_mcps)]:
+            for mcpnum in ["mcp_" + str(n) for n in range(n_mcps)]:
                 mns = df.groupby(sample_label)[mcpnum].mean()
                 mns = pd.concat([mns, response], axis=1)
                 res = stats.ttest_ind(
@@ -893,7 +901,7 @@ class Dialogue:
                 pvals.loc[celltype, mcpnum] = res[1]
                 tstats.loc[celltype, mcpnum] = res[0]
-        for mcpnum in ["mcp_" + str(n) for n in range(0, n_mcps)]:
+        for mcpnum in ["mcp_" + str(n) for n in range(n_mcps)]:
             pvals_adj[mcpnum] = multipletests(pvals[mcpnum], method="fdr_bh")[1]
         return {"pvals": pvals, "tstats": tstats, "pvals_adj": pvals_adj}
@@ -956,7 +964,7 @@ class Dialogue:
         genes_dict_up = {}  # type: ignore
         genes_dict_down = {}  # type: ignore
-        for celltype2 in mcp_dict.keys():
+        for celltype2 in mcp_dict:
             for gene in mcp_dict[celltype2][MCP + ".up"]:
                 if gene in genes_dict_up:
                     genes_dict_up[gene] += 1
@@ -1008,7 +1016,7 @@ class Dialogue:
             >>> genes_results = _get_extrema_MCP_genes_single(ct_subs, mcp="mcp_4", fraction=0.2)
         """
         genes = {}
-        for ct in ct_subs.keys():
+        for ct in ct_subs:
             mini = ct_subs[ct]
             mini.obs["extrema"] = pd.qcut(
                 mini.obs[mcp],
@@ -1056,13 +1064,13 @@ class Dialogue:
         for mcp in mcps:
             rank_dfs[mcp] = {}
             ct_ranked = self._get_extrema_MCP_genes_single(ct_subs, mcp=mcp, fraction=fraction)
-            for celltype in ct_ranked.keys():
+            for celltype in ct_ranked:
                 rank_dfs[mcp][celltype] = sc.get.rank_genes_groups_df(ct_ranked[celltype], group=None)
         return rank_dfs
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_split_violins(
+    def plot_split_violins(  # pragma: no cover # noqa: D417
         self,
         adata: AnnData,
         split_key: str,
@@ -1070,7 +1078,6 @@ class Dialogue:
         *,
         split_which: tuple[str, str] = None,
         mcp: str = "mcp_0",
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Plots split violin plots for a given MCP and split variable.
@@ -1110,14 +1117,13 @@ class Dialogue:
         ax = sns.violinplot(data=df, x=celltype_key, y=mcp, hue=split_key, split=True)
         ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_pairplot(
+    def plot_pairplot(  # pragma: no cover # noqa: D417
         self,
         adata: AnnData,
         celltype_key: str,
@@ -1125,7 +1131,6 @@ class Dialogue:
         sample_id: str,
         *,
         mcp: str = "mcp_0",
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Generate a pairplot visualization for multi-cell perturbation (MCP) data.
@@ -1167,8 +1172,7 @@ class Dialogue:
         mcp_pivot = pd.concat([mcp_pivot, aggstats[color]], axis=1)
         sns.pairplot(mcp_pivot, hue=color, corner=True)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None

pertpy/tools/_differential_gene_expression/_base.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import contextlib
 import math
 from abc import ABC, abstractmethod
 from collections.abc import Iterable, Mapping, Sequence
@@ -23,8 +24,7 @@ from pertpy.tools._differential_gene_expression._checks import check_is_numeric_
 class MethodBase(ABC):
     def __init__(self, adata, *, mask=None, layer=None, **kwargs):
-        """
-        Initialize the method.
+        """Initialize the method.
         Args:
             adata: AnnData object, usually pseudobulked.
@@ -62,8 +62,7 @@ class MethodBase(ABC):
         fit_kwargs=MappingProxyType({}),
         test_kwargs=MappingProxyType({}),
     ):
-        """
-        Compare between groups in a specified column.
+        """Compare between groups in a specified column.
         Args:
             adata: AnnData object.
@@ -100,7 +99,7 @@ class MethodBase(ABC):
         ...
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_volcano(
+    def plot_volcano(  # pragma: no cover # noqa: D417
         self,
         data: pd.DataFrame | ad.AnnData,
         *,
@@ -125,7 +124,6 @@ class MethodBase(ABC):
         shape_order: list[str] | None = None,
         x_label: str | None = None,
         y_label: str | None = None,
-        show: bool = True,
         return_fig: bool = False,
         **kwargs: int,
     ) -> Figure | None:
@@ -189,8 +187,7 @@ class MethodBase(ABC):
             colors = ["gray", "#D62728", "#1F77B4"]
         def _pval_reciprocal(lfc: float) -> float:
-            """
-            Function for relating -log10(pvalue) and logfoldchange in a reciprocal.
+            """Function for relating -log10(pvalue) and logfoldchange in a reciprocal.
             Used for plotting the S-curve
             """
@@ -198,7 +195,7 @@ class MethodBase(ABC):
         def _map_shape(symbol: str) -> str:
             if shape_dict is not None:
-                for k in shape_dict.keys():
+                for k in shape_dict:
                     if shape_dict[k] is not None and symbol in shape_dict[k]:
                         return k
             return "other"
@@ -212,8 +209,7 @@ class MethodBase(ABC):
             pval_thresh: float = None,
             s_curve: bool = False,
         ) -> str:
-            """
-            Map genes to categorize based on log2fc and pvalue.
+            """Map genes to categorize based on log2fc and pvalue.
             These categories are used for coloring the dots.
             Used when no color_dict is passed, sets up/down/nonsignificant.
@@ -230,14 +226,13 @@ class MethodBase(ABC):
                     return "Down"
                 else:
                     return "not DE"
+            # Standard condition for Up or Down categorization
+            elif log2fc > log2fc_thresh and nlog10 > pval_thresh:
+                return "Up"
+            elif log2fc < -log2fc_thresh and nlog10 > pval_thresh:
+                return "Down"
             else:
-                # Standard condition for Up or Down categorization
-                if log2fc > log2fc_thresh and nlog10 > pval_thresh:
-                    return "Up"
-                elif log2fc < -log2fc_thresh and nlog10 > pval_thresh:
-                    return "Down"
-                else:
-                    return "not DE"
+                return "not DE"
         def _map_genes_categories_highlight(
             row: pd.Series,
@@ -248,8 +243,7 @@ class MethodBase(ABC):
             s_curve: bool = False,
             symbol_col: str = None,
         ) -> str:
-            """
-            Map genes to categorize based on log2fc and pvalue.
+            """Map genes to categorize based on log2fc and pvalue.
             These categories are used for coloring the dots.
             Used when color_dict is passed, sets DE / not DE for background and user supplied highlight genes.
@@ -259,7 +253,7 @@ class MethodBase(ABC):
             symbol = row[symbol_col]
             if color_dict is not None:
-                for k in color_dict.keys():
+                for k in color_dict:
                     if symbol in color_dict[k]:
                         return k
@@ -484,14 +478,13 @@ class MethodBase(ABC):
         plt.legend(loc=1, bbox_to_anchor=legend_pos, frameon=False)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_paired(
+    def plot_paired(  # pragma: no cover # noqa: D417
         self,
         adata: ad.AnnData,
         results_df: pd.DataFrame,
@@ -511,7 +504,6 @@ class MethodBase(ABC):
         pvalue_template=lambda x: f"p={x:.2e}",
         boxplot_properties=None,
         palette=None,
-        show: bool = True,
         return_fig: bool = False,
     ) -> Figure | None:
         """Creates a pairwise expression plot from a Pandas DataFrame or Anndata.
@@ -584,14 +576,9 @@ class MethodBase(ABC):
                 adata, target_col=groupby, groups_col=pairedby, layer_key=layer, mode="sum", min_cells=1, min_counts=1
             )
-        if layer is not None:
-            X = adata.layers[layer]
-        else:
-            X = adata.X
-        try:
+        X = adata.layers[layer] if layer is not None else adata.X
+        with contextlib.suppress(AttributeError):
             X = X.toarray()
-        except AttributeError:
-            pass
         groupby_cols = [pairedby, groupby]
         df = adata.obs.loc[:, groupby_cols].join(pd.DataFrame(X, index=adata.obs_names, columns=var_names))
@@ -679,14 +666,13 @@ class MethodBase(ABC):
             )
         plt.tight_layout()
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_fold_change(
+    def plot_fold_change(  # pragma: no cover # noqa: D417
         self,
         results_df: pd.DataFrame,
         *,
@@ -696,7 +682,6 @@ class MethodBase(ABC):
         symbol_col: str = "variable",
         y_label: str = "Log2 fold change",
         figsize: tuple[int, int] = (10, 5),
-        show: bool = True,
         return_fig: bool = False,
         **barplot_kwargs,
     ) -> Figure | None:
@@ -762,14 +747,13 @@ class MethodBase(ABC):
         plt.xlabel("")
         plt.ylabel(y_label)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
-    def plot_multicomparison_fc(
+    def plot_multicomparison_fc(  # pragma: no cover # noqa: D417
         self,
         results_df: pd.DataFrame,
         *,
@@ -782,7 +766,6 @@ class MethodBase(ABC):
         figsize: tuple[int, int] = (10, 2),
         x_label: str = "Contrast",
         y_label: str = "Gene",
-        show: bool = True,
         return_fig: bool = False,
         **heatmap_kwargs,
     ) -> Figure | None:
@@ -880,10 +863,9 @@ class MethodBase(ABC):
         plt.xlabel(x_label)
         plt.ylabel(y_label)
-        if show:
-            plt.show()
         if return_fig:
             return plt.gcf()
+        plt.show()
         return None
@@ -1021,7 +1003,7 @@ class LinearModelBase(MethodBase):
             )
         return self.formulaic_contrasts.cond(**kwargs)
-    def contrast(self, *args, **kwargs):
+    def contrast(self, *args, **kwargs):  # noqa: D417
         """Build a simple contrast for pairwise comparisons.
         Args:

pertpy/tools/_differential_gene_expression/_checks.py CHANGED Viewed

@@ -16,9 +16,8 @@ def check_is_numeric_matrix(array: np.ndarray | spmatrix) -> None:
     if issparse(array):
         if np.any(~np.isfinite(array.data)):
             raise ValueError("Counts cannot contain negative, NaN or Inf values.")
-    else:
-        if np.any(~np.isfinite(array)):
-            raise ValueError("Counts cannot contain negative, NaN or Inf values.")
+    elif np.any(~np.isfinite(array)):
+        raise ValueError("Counts cannot contain negative, NaN or Inf values.")
 def check_is_integer_matrix(array: np.ndarray | spmatrix, tolerance: float = 1e-6) -> None:
@@ -34,8 +33,7 @@ def check_is_integer_matrix(array: np.ndarray | spmatrix, tolerance: float = 1e-
     if issparse(array):
         if not array.data.dtype.kind == "i" and not np.all(np.abs(array.data - np.round(array.data)) < tolerance):
             raise ValueError("Non-zero elements of the matrix must be close to integer values.")
-    else:
-        if not array.dtype.kind == "i" and not np.all(np.abs(array - np.round(array)) < tolerance):
-            raise ValueError("Matrix must be a count matrix.")
+    elif array.dtype.kind != "i" and not np.all(np.abs(array - np.round(array)) < tolerance):
+        raise ValueError("Matrix must be a count matrix.")
     if (array < 0).sum() > 0:
         raise ValueError("Non-zero elements of the matrix must be positive.")

pertpy/tools/_differential_gene_expression/_dge_comparison.py CHANGED Viewed

@@ -36,16 +36,15 @@ class DGEEVAL:
             if not de_key1 or not de_key2:
                 raise ValueError("Both `de_key1` and `de_key2` must be provided together if using `adata`.")
-        else:  # use dfs
-            if de_df1 is None or de_df2 is None:
-                raise ValueError("Both `de_df1` and `de_df2` must be provided together if using DataFrames.")
+        elif de_df1 is None or de_df2 is None:
+            raise ValueError("Both `de_df1` and `de_df2` must be provided together if using DataFrames.")
         if de_key1:
             if not adata:
                 raise ValueError("`adata` should be provided with `de_key1` and `de_key2`. ")
-            assert all(
-                k in adata.uns for k in [de_key1, de_key2]
-            ), "Provided `de_key1` and `de_key2` must exist in `adata.uns`."
+            assert all(k in adata.uns for k in [de_key1, de_key2]), (
+                "Provided `de_key1` and `de_key2` must exist in `adata.uns`."
+            )
             vars = adata.var_names
         if de_df1 is not None:

pertpy 0.9.5__py3-none-any.whl → 0.11.0__py3-none-any.whl

pertpy 0.9.5py3-none-any.whl → 0.11.0py3-none-any.whl