PyPI - pertpy - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pertpy 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

pertpy/__init__.py +2 -1
pertpy/data/__init__.py +61 -0
pertpy/data/_dataloader.py +27 -23
pertpy/data/_datasets.py +58 -0
pertpy/metadata/__init__.py +2 -0
pertpy/metadata/_cell_line.py +39 -70
pertpy/metadata/_compound.py +3 -4
pertpy/metadata/_drug.py +2 -6
pertpy/metadata/_look_up.py +38 -51
pertpy/metadata/_metadata.py +7 -10
pertpy/metadata/_moa.py +2 -6
pertpy/plot/__init__.py +0 -5
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +2 -3
pertpy/tools/__init__.py +42 -4
pertpy/tools/_augur.py +14 -15
pertpy/tools/_cinemaot.py +2 -2
pertpy/tools/_coda/_base_coda.py +118 -142
pertpy/tools/_coda/_sccoda.py +16 -15
pertpy/tools/_coda/_tasccoda.py +21 -22
pertpy/tools/_dialogue.py +18 -23
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +21 -16
pertpy/tools/_distances/_distances.py +406 -70
pertpy/tools/_enrichment.py +10 -15
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +76 -53
pertpy/tools/_mixscape.py +15 -11
pertpy/tools/_perturbation_space/_clustering.py +5 -2
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
pertpy/tools/_perturbation_space/_simple.py +3 -3
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +33 -28
pertpy/tools/_scgen/_utils.py +2 -2
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
pertpy-0.8.0.dist-info/RECORD +57 -0
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -171
pertpy/plot/_coda.py +0 -601
pertpy/plot/_guide_rna.py +0 -64
pertpy/plot/_milopy.py +0 -209
pertpy/plot/_mixscape.py +0 -355
pertpy/tools/_differential_gene_expression.py +0 -325
pertpy-0.7.0.dist-info/RECORD +0 -53
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_enrichment.py CHANGED Viewed

@@ -82,18 +82,15 @@ class Enrichment:
                      - A dictionary of dictionaries with group categories as keys. Use `nested=True` in this case.
                      If not provided, ChEMBL-derived drug target sets are used.
             nested: Indicates if `targets` is a dictionary of dictionaries with group categories as keys.
-                    Defaults to False.
             categories: To subset the gene groups to specific categories, especially when `targets=None` or `nested=True`.
                         For ChEMBL drug targets, these are ATC level 1/level 2 category codes.
             method: Method for scoring gene groups. `"mean"` calculates the mean over all genes,
                     while `"seurat"` uses a background profile subtraction approach.
-                    Defaults to 'mean'.
-            layer: Specifies which `.layers` of AnnData to use for expression values. Defaults to `.X` if None.
+            layer: Specifies which `.layers` of AnnData to use for expression values.
             n_bins: The number of expression bins for the `'seurat'` method.
             ctrl_size: The number of genes to randomly sample from each expression bin for the `"seurat"` method.
             key_added: Prefix key that adds the results to `uns`.
                        Note that the actual values are `key_added_score`, `key_added_variables`, `key_added_genes`, `key_added_all_genes`.
-                       Defaults to `pertpy_enrichment`.
         Returns:
             An AnnData object with scores.
@@ -259,16 +256,15 @@ class Enrichment:
                    in the original expression space.
             targets: The gene groups to evaluate, either as a dictionary with names of the
                      groups as keys and gene lists as values, or a dictionary of dictionaries
-                     with names of gene group categories as keys. Defaults to None, in which
+                     with names of gene group categories as keys.
                      case it uses `d2c.score()` output or loads ChEMBL-derived drug target sets.
             nested: Indicates if `targets` is a dictionary of dictionaries with group
-                    categories as keys. Defaults to False.
+                    categories as keys.
             categories: Used to subset the gene groups to one or more categories,
-                        applicable if `targets=None` or `nested=True`. Defaults to None.
+                        applicable if `targets=None` or `nested=True`.
             absolute: If True, passes the absolute values of scores to GSEA, improving
-                      statistical power. Defaults to False.
+                      statistical power.
             key_added: Prefix key that adds the results to `uns`.
-                       Defaults to `pertpy_enrichment_gsea`.
         Returns:
             A dictionary with clusters as keys and data frames of test results sorted on
@@ -317,13 +313,12 @@ class Enrichment:
             targets: Gene groups to evaluate, which can be targets of known drugs, GO terms, pathway memberships, etc.
                      Accepts a dictionary of dictionaries with group categories as keys.
                      If not provided, ChEMBL-derived or dgbidb drug target sets are used, given by `source`.
-            source: Source of drug target sets when `targets=None`, `chembl`, `dgidb` or `pharmgkb`. Defaults to `chembl`.
+            source: Source of drug target sets when `targets=None`, `chembl`, `dgidb` or `pharmgkb`.
             categories: To subset the gene groups to specific categories, especially when `targets=None`.
                             For ChEMBL drug targets, these are ATC level 1/level 2 category codes.
-            category_name: The name of category used to generate a nested drug target set when `targets=None` and `source=dgidb|pharmgkb`. Defaults to `interaction_type`.
+            category_name: The name of category used to generate a nested drug target set when `targets=None` and `source=dgidb|pharmgkb`.
             groupby: dotplot groupby such as clusters or cell types.
             key: Prefix key of enrichment results in `uns`.
-                 Defaults to `pertpy_enrichment`.
             kwargs: Passed to scanpy dotplot.
         Returns:
@@ -436,9 +431,9 @@ class Enrichment:
         Args:
             adata: AnnData object to plot.
             enrichment: Cluster names as keys, blitzgsea's ``gsea()`` output as values.
-            n: How many top scores to show for each group. Defaults to 10.
-            key: GSEA results key in `uns`. Defaults to "pertpy_enrichment_gsea".
-            interactive_plot: Whether to plot interactively or not. Defaults to False.
+            n: How many top scores to show for each group.
+            key: GSEA results key in `uns`.
+            interactive_plot: Whether to plot interactively or not.
         Examples:
             >>> import pertpy as pt

pertpy/tools/_kernel_pca.py CHANGED Viewed

@@ -31,7 +31,7 @@ def kernel_pca(
     Returns:
         If `copy=True`, returns the copy of `adata` with kernel pca in `.obsm["X_kpca"]`.
-        Otherwise writes kernel pca directly to `.obsm["X_kpca"]` of the provided `adata`.
+        Otherwise, writes kernel pca directly to `.obsm["X_kpca"]` of the provided `adata`.
         If `return_transformer=True`, returns also the fitted `KernelPCA` transformer.
     """
     if copy:

pertpy/tools/_milo.py CHANGED Viewed

@@ -11,22 +11,16 @@ import pandas as pd
 import scanpy as sc
 import seaborn as sns
 from anndata import AnnData
+from lamin_utils import logger
 from mudata import MuData
-from rich import print
 if TYPE_CHECKING:
     from collections.abc import Sequence
     from matplotlib.axes import Axes
     from matplotlib.colors import Colormap
+    from matplotlib.figure import Figure
-try:
-    from rpy2.robjects import conversion, numpy2ri, pandas2ri
-    from rpy2.robjects.packages import STAP, PackageNotInstalledError, importr
-except ModuleNotFoundError:
-    print(
-        "[bold yellow]ryp2 is not installed. Install with [green]pip install rpy2 [yellow]to run tools with R support."
-    )
 from scipy.sparse import csr_matrix
 from sklearn.metrics.pairwise import euclidean_distances
@@ -35,7 +29,16 @@ class Milo:
     """Python implementation of Milo."""
     def __init__(self):
-        pass
+        try:
+            from rpy2.robjects import conversion, numpy2ri, pandas2ri
+            from rpy2.robjects.packages import STAP, PackageNotInstalledError, importr
+        except ModuleNotFoundError:
+            raise ImportError("milo requires rpy2 to be installed.") from None
+        try:
+            importr("edgeR")
+        except ImportError as e:
+            raise ImportError("milo requires a valid R installation with edger installed:\n") from e
     def load(
         self,
@@ -48,7 +51,7 @@ class Milo:
             input: AnnData
             feature_key: Key to store the cell-level AnnData object in the MuData object
         Returns:
-            MuData: MuData object with original AnnData. Defaults to`mudata[feature_key]`.
+            MuData: MuData object with original AnnData.
         Examples:
             >>> import pertpy as pt
@@ -80,11 +83,10 @@ class Milo:
             neighbors_key: The key in `adata.obsp` or `mdata[feature_key].obsp` to use as KNN graph.
                            If not specified, `make_nhoods` looks .obsp[‘connectivities’] for connectivities (default storage places for `scanpy.pp.neighbors`).
                            If specified, it looks at .obsp[.uns[neighbors_key][‘connectivities_key’]] for connectivities.
-                           Defaults to None.
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
-            prop: Fraction of cells to sample for neighbourhood index search. Defaults to 0.1.
-            seed: Random seed for cell sampling. Defaults to 0.
-            copy: Determines whether a copy of the `adata` is returned. Defaults to False.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
+            prop: Fraction of cells to sample for neighbourhood index search.
+            seed: Random seed for cell sampling.
+            copy: Determines whether a copy of the `adata` is returned.
         Returns:
             If `copy=True`, returns the copy of `adata` with the result in `.obs`, `.obsm`, and `.uns`.
@@ -128,7 +130,7 @@ class Milo:
             try:
                 knn_graph = adata.obsp["connectivities"].copy()
             except KeyError:
-                print('No "connectivities" slot in adata.obsp -- please run scanpy.pp.neighbors(adata) first')
+                logger.error('No "connectivities" slot in adata.obsp -- please run scanpy.pp.neighbors(adata) first')
                 raise
         else:
             try:
@@ -183,6 +185,7 @@ class Milo:
         dist_mat = knn_dists[nhood_ixs, :]
         k_distances = dist_mat.max(1).toarray().ravel()
         adata.obs["nhood_kth_distance"] = 0
+        adata.obs["nhood_kth_distance"] = adata.obs["nhood_kth_distance"].astype(float)
         adata.obs.loc[adata.obs["nhood_ixs_refined"] == 1, "nhood_kth_distance"] = k_distances
         if copy:
@@ -199,7 +202,7 @@ class Milo:
         Args:
             data: AnnData object with neighbourhoods defined in `obsm['nhoods']` or MuData object with a modality with neighbourhoods defined in `obsm['nhoods']`
             sample_col: Column in adata.obs that contains sample information
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             MuData object storing the original (i.e. rna) AnnData in `mudata[feature_key]`
@@ -230,7 +233,7 @@ class Milo:
             try:
                 nhoods = adata.obsm["nhoods"]
             except KeyError:
-                print('Cannot find "nhoods" slot in adata.obsm -- please run milopy.make_nhoods(adata)')
+                logger.error('Cannot find "nhoods" slot in adata.obsm -- please run milopy.make_nhoods(adata)')
                 raise
         # Make nhood abundance matrix
         sample_dummies = pd.get_dummies(adata.obs[sample_col])
@@ -238,7 +241,7 @@ class Milo:
         sample_dummies = csr_matrix(sample_dummies.values)
         nhood_count_mat = nhoods.T.dot(sample_dummies)
         sample_obs = pd.DataFrame(index=all_samples)
-        sample_adata = AnnData(X=nhood_count_mat.T, obs=sample_obs, dtype=np.float32)
+        sample_adata = AnnData(X=nhood_count_mat.T, obs=sample_obs)
         sample_adata.uns["sample_col"] = sample_col
         # Save nhood index info
         sample_adata.var["index_cell"] = adata.obs_names[adata.obs["nhood_ixs_refined"] == 1]
@@ -270,10 +273,10 @@ class Milo:
             design: Formula for the test, following glm syntax from R (e.g. '~ condition').
                     Terms should be columns in `milo_mdata[feature_key].obs`.
             model_contrasts: A string vector that defines the contrasts used to perform DA testing, following glm syntax from R (e.g. "conditionDisease - conditionControl").
-                             If no contrast is specified (default), then the last categorical level in condition of interest is used as the test group. Defaults to None.
-            subset_samples: subset of samples (obs in `milo_mdata['milo']`) to use for the test. Defaults to None.
-            add_intercept: whether to include an intercept in the model. If False, this is equivalent to adding + 0 in the design formula. When model_contrasts is specified, this is set to False by default. Defaults to True.
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+                             If no contrast is specified (default), then the last categorical level in condition of interest is used as the test group.
+            subset_samples: subset of samples (obs in `milo_mdata['milo']`) to use for the test.
+            add_intercept: whether to include an intercept in the model. If False, this is equivalent to adding + 0 in the design formula. When model_contrasts is specified, this is set to False by default.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
             solver: The solver to fit the model to. One of "edger" (requires R, rpy2 and edgeR to be installed) or "batchglm"
         Returns:
@@ -297,8 +300,8 @@ class Milo:
         try:
             sample_adata = mdata["milo"]
         except KeyError:
-            print(
-                "[bold red]milo_mdata should be a MuData object with two slots:"
+            logger.error(
+                "milo_mdata should be a MuData object with two slots:"
                 " feature_key and 'milo' - please run milopy.count_nhoods() first"
             )
             raise
@@ -312,7 +315,7 @@ class Milo:
             sample_obs = adata.obs[covariates + [sample_col]].drop_duplicates()
         except KeyError:
             missing_cov = [x for x in covariates if x not in sample_adata.obs.columns]
-            print("Covariates {c} are not columns in adata.obs".format(c=" ".join(missing_cov)))
+            logger.warning("Covariates {c} are not columns in adata.obs".format(c=" ".join(missing_cov)))
             raise
         sample_obs = sample_obs[covariates + [sample_col]]
         sample_obs.index = sample_obs[sample_col].astype("str")
@@ -320,7 +323,7 @@ class Milo:
         try:
             assert sample_obs.loc[sample_adata.obs_names].shape[0] == len(sample_adata.obs_names)
         except AssertionError:
-            print(
+            logger.warning(
                 f"Values in mdata[{feature_key}].obs[{covariates}] cannot be unambiguously assigned to each sample"
                 f" -- each sample value should match a single covariate value"
             )
@@ -332,7 +335,9 @@ class Milo:
             design_df = sample_adata.obs[covariates]
         except KeyError:
             missing_cov = [x for x in covariates if x not in sample_adata.obs.columns]
-            print('Covariates {c} are not columns in adata.uns["sample_adata"].obs'.format(c=" ".join(missing_cov)))
+            logger.error(
+                'Covariates {c} are not columns in adata.uns["sample_adata"].obs'.format(c=" ".join(missing_cov))
+            )
             raise
         # Get count matrix
         count_mat = sample_adata.X.T.toarray()
@@ -376,6 +381,8 @@ class Milo:
                     return(colnames(m))
                 }
                 """
+                from rpy2.robjects.packages import STAP
                 get_model_cols = STAP(r_str, "get_model_cols")
                 model_mat_cols = get_model_cols.get_model_cols(design_df, design)
                 model_df = pd.DataFrame(model)
@@ -383,13 +390,16 @@ class Milo:
                 try:
                     mod_contrast = limma.makeContrasts(contrasts=model_contrasts, levels=model_df)
                 except ValueError:
-                    print("Model contrasts must be in the form 'A-B' or 'A+B'")
+                    logger.error("Model contrasts must be in the form 'A-B' or 'A+B'")
                     raise
                 res = base.as_data_frame(
                     edgeR.topTags(edgeR.glmQLFTest(fit, contrast=mod_contrast), sort_by="none", n=np.inf)
                 )
             else:
                 res = base.as_data_frame(edgeR.topTags(edgeR.glmQLFTest(fit, coef=n_coef), sort_by="none", n=np.inf))
+            from rpy2.robjects import conversion
             res = conversion.rpy2py(res)
             if not isinstance(res, pd.DataFrame):
                 res = pd.DataFrame(res)
@@ -414,7 +424,7 @@ class Milo:
         Args:
             mdata: MuData object
             anno_col: Column in adata.obs containing the cell annotations to use for nhood labelling
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             None. Adds in place:
@@ -437,7 +447,7 @@ class Milo:
         try:
             sample_adata = mdata["milo"]
         except KeyError:
-            print(
+            logger.error(
                 "milo_mdata should be a MuData object with two slots: feature_key and 'milo' - please run milopy.count_nhoods(adata) first"
             )
             raise
@@ -468,7 +478,7 @@ class Milo:
         Args:
             mdata: MuData object
             anno_col: Column in adata.obs containing the cell annotations to use for nhood labelling
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             None. Adds in place:
@@ -509,7 +519,7 @@ class Milo:
         Args:
             mdata: MuData object
             new_covariates: columns in `milo_mdata[feature_key].obs` to add to `milo_mdata['milo'].obs`.
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             None, adds columns to `milo_mdata['milo']` in place
@@ -528,7 +538,7 @@ class Milo:
         try:
             sample_adata = mdata["milo"]
         except KeyError:
-            print(
+            logger.error(
                 "milo_mdata should be a MuData object with two slots: feature_key and 'milo' - please run milopy.count_nhoods(adata) first"
             )
             raise
@@ -542,14 +552,14 @@ class Milo:
             sample_obs = adata.obs[covariates + [sample_col]].drop_duplicates()
         except KeyError:
             missing_cov = [covar for covar in covariates if covar not in sample_adata.obs.columns]
-            print("Covariates {c} are not columns in adata.obs".format(c=" ".join(missing_cov)))
+            logger.error("Covariates {c} are not columns in adata.obs".format(c=" ".join(missing_cov)))
             raise
         sample_obs = sample_obs[covariates + [sample_col]].astype("str")
         sample_obs.index = sample_obs[sample_col]
         try:
             assert sample_obs.loc[sample_adata.obs_names].shape[0] == len(sample_adata.obs_names)
         except ValueError:
-            print(
+            logger.error(
                 "Covariates cannot be unambiguously assigned to each sample -- each sample value should match a single covariate value"
             )
             raise
@@ -560,8 +570,8 @@ class Milo:
         Args:
             mdata: MuData object
-            basis: Name of the obsm basis to use for layout of neighbourhoods (key in `adata.obsm`). Defaults to "X_umap".
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            basis: Name of the obsm basis to use for layout of neighbourhoods (key in `adata.obsm`).
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             - `milo_mdata['milo'].varp['nhood_connectivities']`: graph of overlap between neighbourhoods (i.e. no of shared cells)
@@ -593,13 +603,13 @@ class Milo:
             "distances_key": "",
         }
-    def add_nhood_expression(self, mdata: MuData, layer: str | None = None, feature_key: str | None = "rna"):
+    def add_nhood_expression(self, mdata: MuData, layer: str | None = None, feature_key: str | None = "rna") -> None:
         """Calculates the mean expression in neighbourhoods of each feature.
         Args:
             mdata: MuData object
-            layer: If provided, use `milo_mdata[feature_key][layer]` as expression matrix instead of `milo_mdata[feature_key].X`. Defaults to None.
-            feature_key: If input data is MuData, specify key to cell-level AnnData object. Defaults to 'rna'.
+            layer: If provided, use `milo_mdata[feature_key][layer]` as expression matrix instead of `milo_mdata[feature_key].X`.
+            feature_key: If input data is MuData, specify key to cell-level AnnData object.
         Returns:
             Updates adata in place to store the matrix of average expression in each neighbourhood in `milo_mdata['milo'].varm['expr']`
@@ -618,7 +628,7 @@ class Milo:
         try:
             sample_adata = mdata["milo"]
         except KeyError:
-            print(
+            logger.error(
                 "milo_mdata should be a MuData object with two slots:"
                 " feature_key and 'milo' - please run milopy.count_nhoods(adata) first"
             )
@@ -642,6 +652,9 @@ class Milo:
         self,
     ):
         """Set up rpy2 to run edgeR"""
+        from rpy2.robjects import numpy2ri, pandas2ri
+        from rpy2.robjects.packages import importr
         numpy2ri.activate()
         pandas2ri.activate()
         edgeR = self._try_import_bioc_library("edgeR")
@@ -660,11 +673,13 @@ class Milo:
         Args:
             name (str): R packages name
         """
+        from rpy2.robjects.packages import PackageNotInstalledError, importr
         try:
             _r_lib = importr(name)
             return _r_lib
         except PackageNotInstalledError:
-            print(f"Install Bioconductor library `{name!r}` first as `BiocManager::install({name!r}).`")
+            logger.error(f"Install Bioconductor library `{name!r}` first as `BiocManager::install({name!r}).`")
             raise
     def _graph_spatial_fdr(
@@ -678,7 +693,7 @@ class Milo:
         Args:
             sample_adata: Sample-level AnnData.
-            neighbors_key: The key in `adata.obsp` to use as KNN graph. Defaults to None.
+            neighbors_key: The key in `adata.obsp` to use as KNN graph.
         """
         # use 1/connectivity as the weighting for the weighted BH adjustment from Cydar
         w = 1 / sample_adata.var["kth_distance"]
@@ -718,10 +733,10 @@ class Milo:
         Args:
             mdata: MuData object
             alpha: Significance threshold. (default: 0.1)
-            min_logFC: Minimum absolute log-Fold Change to show results. If is 0, show all significant neighbourhoods. Defaults to 0.
+            min_logFC: Minimum absolute log-Fold Change to show results. If is 0, show all significant neighbourhoods.
             min_size: Minimum size of nodes in visualization. (default: 10)
-            plot_edges: If edges for neighbourhood overlaps whould be plotted. Defaults to False.
-            title: Plot title. Defaults to "DA log-Fold Change".
+            plot_edges: If edges for neighbourhood overlaps whould be plotted.
+            title: Plot title.
             show: Show the plot, do not return axis.
             save: If `True` or a `str`, save the figure. A string is appended to the default filename.
                   Infer the filetype if ending on {`'.pdf'`, `'.png'`, `'.svg'`}.
@@ -807,7 +822,7 @@ class Milo:
         Args:
             mdata: MuData object with feature_key slot, storing neighbourhood assignments in `mdata[feature_key].obsm['nhoods']`
             ix: index of neighbourhood to visualize
-            basis: Embedding to use for visualization. Defaults to "X_umap".
+            basis: Embedding to use for visualization.
             show: Show the plot, do not return axis.
             save: If True or a str, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}.
             **kwargs: Additional arguments to `scanpy.pl.embedding`.
@@ -853,14 +868,14 @@ class Milo:
         return_fig: bool | None = None,
         save: bool | str | None = None,
         show: bool | None = None,
-    ) -> None:
+    ) -> Figure | Axes | None:
         """Plot beeswarm plot of logFC against nhood labels
         Args:
             mdata: MuData object
             anno_col: Column in adata.uns['nhood_adata'].obs to use as annotation. (default: 'nhood_annotation'.)
             alpha: Significance threshold. (default: 0.1)
-            subset_nhoods: List of nhoods to plot. If None, plot all nhoods. Defaults to None.
+            subset_nhoods: List of nhoods to plot. If None, plot all nhoods.
             palette: Name of Seaborn color palette for violinplots.
                      Defaults to pre-defined category colors for violinplots.
@@ -960,13 +975,17 @@ class Milo:
         if save:
             plt.savefig(save, bbox_inches="tight")
+            return None
         if show:
             plt.show()
+            return None
         if return_fig:
             return plt.gcf()
         if (not show and not save) or (show is None and save is None):
             return plt.gca()
+        return None
     def plot_nhood_counts_by_cond(
         self,
         mdata: MuData,
@@ -976,14 +995,14 @@ class Milo:
         return_fig: bool | None = None,
         save: bool | str | None = None,
         show: bool | None = None,
-    ) -> None:
+    ) -> Figure | Axes | None:
         """Plot boxplot of cell numbers vs condition of interest.
         Args:
             mdata: MuData object storing cell level and nhood level information
             test_var: Name of column in adata.obs storing condition of interest (y-axis for boxplot)
-            subset_nhoods: List of obs_names for neighbourhoods to include in plot. If None, plot all nhoods. Defaults to None.
-            log_counts: Whether to plot log1p of cell counts. Defaults to False.
+            subset_nhoods: List of obs_names for neighbourhoods to include in plot. If None, plot all nhoods.
+            log_counts: Whether to plot log1p of cell counts.
         """
         try:
             nhood_adata = mdata["milo"].T.copy()
@@ -1014,9 +1033,13 @@ class Milo:
         if save:
             plt.savefig(save, bbox_inches="tight")
+            return None
         if show:
             plt.show()
+            return None
         if return_fig:
             return plt.gcf()
         if not (show or save):
             return plt.gca()
+        return None

pertpy/tools/_mixscape.py CHANGED Viewed

@@ -178,7 +178,7 @@ class Mixscape:
             split_by: Provide the column `.obs` if multiple biological replicates exist to calculate
                     the perturbation signature for every replicate separately.
             pval_cutoff: P-value cut-off for selection of significantly DE genes.
-            perturbation_type: specify type of CRISPR perturbation expected for labeling mixscape classifications. Defaults to KO.
+            perturbation_type: specify type of CRISPR perturbation expected for labeling mixscape classifications.
             copy: Determines whether a copy of the `adata` is returned.
         Returns:
@@ -227,7 +227,7 @@ class Mixscape:
                 X = adata_comp.layers["X_pert"]
             except KeyError:
                 raise KeyError(
-                    "No 'X_pert' found in .layers! Please run pert_sign first to calculate perturbation signature!"
+                    "No 'X_pert' found in .layers! Please run perturbation_signature first to calculate perturbation signature!"
                 ) from None
         # initialize return variables
         adata.obs[f"{new_class_name}_p_{perturbation_type.lower()}"] = 0
@@ -315,7 +315,9 @@ class Mixscape:
                     )
                 adata.obs[f"{new_class_name}_global"] = [a.split(" ")[-1] for a in adata.obs[new_class_name]]
-                adata.obs.loc[orig_guide_cells_index, f"{new_class_name}_p_{perturbation_type.lower()}"] = post_prob
+                adata.obs.loc[orig_guide_cells_index, f"{new_class_name}_p_{perturbation_type.lower()}"] = np.round(
+                    post_prob
+                ).astype("int64")
         adata.uns["mixscape"] = gv_list
         if copy:
@@ -344,15 +346,13 @@ class Mixscape:
             control: Control category from the `pert_key` column.
             mixscape_class_global: The column of `.obs` with mixscape global classification result (perturbed, NP or NT).
             layer: Key from `adata.layers` whose value will be used to perform tests on.
-            control: Control category from the `pert_key` column. Defaults to 'NT'.
-            n_comps: Number of principal components to use. Defaults to 10.
+            control: Control category from the `pert_key` column.
+            n_comps: Number of principal components to use.
             min_de_genes: Required number of genes that are differentially expressed for method to separate perturbed and non-perturbed cells.
             logfc_threshold: Limit testing to genes which show, on average, at least X-fold difference (log-scale) between the two groups of cells.
-                             Defaults to 0.25.
             split_by: Provide the column `.obs` if multiple biological replicates exist to calculate
             pval_cutoff: P-value cut-off for selection of significantly DE genes.
             perturbation_type: Specify type of CRISPR perturbation expected for labeling mixscape classifications.
-                               Defaults to KO.
             copy: Determines whether a copy of the `adata` is returned.
         Returns:
@@ -461,7 +461,13 @@ class Mixscape:
             adata_split = adata[split_mask].copy()
             # find top DE genes between cells with targeting and non-targeting gRNAs
             sc.tl.rank_genes_groups(
-                adata_split, layer=layer, groupby=labels, groups=genes, reference=control, method="t-test"
+                adata_split,
+                layer=layer,
+                groupby=labels,
+                groups=genes,
+                reference=control,
+                method="t-test",
+                use_raw=False,
             )
             # get DE genes for each gene
             for gene in genes:
@@ -704,7 +710,6 @@ class Mixscape:
             before_mixscape: Option to split densities based on mixscape classification (default) or original target gene classification.
                              Default is set to NULL and plots cells by original class ID.
             perturbation_type: Specify type of CRISPR perturbation expected for labeling mixscape classifications.
-                               Defaults to `KO`.
         Examples:
             Visualizing the perturbation scores for the cells in a dataset:
@@ -881,7 +886,7 @@ class Mixscape:
             keys: Keys for accessing variables of `.var_names` or fields of `.obs`. Default is 'mixscape_class_p_ko'.
             groupby: The key of the observation grouping to consider. Default is 'mixscape_class'.
             log: Plot on logarithmic axis.
-            use_raw: Whether to use `raw` attribute of `adata`. Defaults to `True` if `.raw` is present.
+            use_raw: Whether to use `raw` attribute of `adata`.
             stripplot: Add a stripplot on top of the violin plot.
             order: Order in which to show the categories.
             xlabel: Label of the x-axis. Defaults to `groupby` if `rotation` is `None`, otherwise, no label is shown.
@@ -1075,7 +1080,6 @@ class Mixscape:
             mixscape_class: The column of `.obs` with the mixscape classification result.
             mixscape_class_global: The column of `.obs` with mixscape global classification result (perturbed, NP or NT).
             perturbation_type: Specify type of CRISPR perturbation expected for labeling mixscape classifications.
-                               Defaults to 'KO'.
             lda_key: If not specified, lda looks .uns["mixscape_lda"] for the LDA results.
             n_components: The number of dimensions of the embedding.
             show: Show the plot, do not return axis.

pertpy/tools/_perturbation_space/_clustering.py CHANGED Viewed

@@ -7,6 +7,8 @@ from sklearn.metrics import pairwise_distances
 from pertpy.tools._perturbation_space._perturbation_space import PerturbationSpace
 if TYPE_CHECKING:
+    from collections.abc import Iterable
     from anndata import AnnData
@@ -14,6 +16,7 @@ class ClusteringSpace(PerturbationSpace):
     """Applies various clustering techniques to an embedding."""
     def __init__(self):
+        super().__init__()
         self.X = None
     def evaluate_clustering(
@@ -21,7 +24,7 @@ class ClusteringSpace(PerturbationSpace):
         adata: AnnData,
         true_label_col: str,
         cluster_col: str,
-        metrics: list[str] = None,
+        metrics: Iterable[str] = None,
         **kwargs,
     ):
         """Evaluation of previously computed clustering against ground truth labels.
@@ -30,7 +33,7 @@ class ClusteringSpace(PerturbationSpace):
             adata: AnnData object that contains the clustered data and the cluster labels.
             true_label_col: ground truth labels.
             cluster_col: cluster computed labels.
-            metrics: Metrics to compute. Defaults to ['nmi', 'ari', 'asw'].
+            metrics: Metrics to compute. If `None` it defaults to ["nmi", "ari", "asw"].
             **kwargs: Additional arguments to pass to the metrics. For nmi, average_method can be passed.
                 For asw, metric, distances, sample_size, and random_state can be passed.

pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pertpy 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl