PyPI - pertpy - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pertpy 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

pertpy/__init__.py +2 -1
pertpy/data/__init__.py +61 -0
pertpy/data/_dataloader.py +27 -23
pertpy/data/_datasets.py +58 -0
pertpy/metadata/__init__.py +2 -0
pertpy/metadata/_cell_line.py +39 -70
pertpy/metadata/_compound.py +3 -4
pertpy/metadata/_drug.py +2 -6
pertpy/metadata/_look_up.py +38 -51
pertpy/metadata/_metadata.py +7 -10
pertpy/metadata/_moa.py +2 -6
pertpy/plot/__init__.py +0 -5
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +2 -3
pertpy/tools/__init__.py +42 -4
pertpy/tools/_augur.py +14 -15
pertpy/tools/_cinemaot.py +2 -2
pertpy/tools/_coda/_base_coda.py +118 -142
pertpy/tools/_coda/_sccoda.py +16 -15
pertpy/tools/_coda/_tasccoda.py +21 -22
pertpy/tools/_dialogue.py +18 -23
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +21 -16
pertpy/tools/_distances/_distances.py +406 -70
pertpy/tools/_enrichment.py +10 -15
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +76 -53
pertpy/tools/_mixscape.py +15 -11
pertpy/tools/_perturbation_space/_clustering.py +5 -2
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
pertpy/tools/_perturbation_space/_simple.py +3 -3
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +33 -28
pertpy/tools/_scgen/_utils.py +2 -2
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
pertpy-0.8.0.dist-info/RECORD +57 -0
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -171
pertpy/plot/_coda.py +0 -601
pertpy/plot/_guide_rna.py +0 -64
pertpy/plot/_milopy.py +0 -209
pertpy/plot/_mixscape.py +0 -355
pertpy/tools/_differential_gene_expression.py +0 -325
pertpy-0.7.0.dist-info/RECORD +0 -53
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_coda/_tasccoda.py CHANGED Viewed

@@ -10,9 +10,9 @@ import numpyro.distributions as npd
 import toytree as tt
 from anndata import AnnData
 from jax import config, random
+from lamin_utils import logger
 from mudata import MuData
 from numpyro.infer import Predictive
-from rich import print
 from pertpy.tools._coda._base_coda import (
     CompositionalModel2,
@@ -85,18 +85,18 @@ class Tasccoda(CompositionalModel2):
         Args:
             adata: AnnData object.
             type: Specify the input adata type, which could be either a cell-level AnnData or an aggregated sample-level AnnData.
-            cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types. Defaults to None.
-            sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample. Defaults to None.
-            covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored. Defaults to None.
-            covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored. Defaults to None.
-            covariate_df: If type is "cell_level", specify dataFrame with covariates. Defaults to None.
-            dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object. Defaults to None.
-            levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
-            levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
-            add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}. Defaults to False.
-            key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2]. Defaults to "tree".
-            modality_key_1: Key to the cell-level AnnData in the MuData object. Defaults to "rna".
-            modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object. Defaults to "coda".
+            cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
+            sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
+            covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
+            covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
+            covariate_df: If type is "cell_level", specify dataFrame with covariates.
+            dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
+            levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
+            levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
+            add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
+            key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
+            modality_key_1: Key to the cell-level AnnData in the MuData object.
+            modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
         Returns:
             MuData: MuData object with cell-level AnnData (`mudata[modality_key_1]`) and aggregated sample-level AnnData (`mudata[modality_key_2]`).
@@ -155,14 +155,13 @@ class Tasccoda(CompositionalModel2):
                      To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
             reference_cell_type: Column name that sets the reference cell type.
                                  If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
-                                 Defaults to "automatic".
             automatic_reference_absence_threshold: If using reference_cell_type = "automatic",
                                                    determine the maximum fraction of zero entries for a cell type
-                                                   to be considered as a possible reference cell type. Defaults to 0.05.
+                                                   to be considered as a possible reference cell type.
             tree_key: Key in `adata.uns` that contains the tree structure
             pen_args: Dictionary with penalty arguments. With `reg="scaled_3"`, the parameters phi (aggregation bias), lambda_1, lambda_0 can be set here.
                 See the tascCODA paper for an explanation of these parameters. Default: lambda_0 = 50, lambda_1 = 5, phi = 0.
-            modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object. Defaults to "coda".
+            modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
         Returns:
             Return an AnnData (if input data is an AnnData object) or return a MuData (if input data is a MuData object)
@@ -208,7 +207,7 @@ class Tasccoda(CompositionalModel2):
             ) from None
         # toytree tree - only for legacy reasons, can be removed in the final version
-        if isinstance(adata.uns[tree_key], tt.tree):
+        if isinstance(adata.uns[tree_key], tt.core.ToyTree):
             # Collapse singularities in the tree
             phy_tree = collapse_singularities(adata.uns[tree_key])
@@ -477,10 +476,10 @@ class Tasccoda(CompositionalModel2):
         Args:
             data: AnnData object or MuData object.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
-            rng_key: The rng state used for the prior simulation. If None, a random state will be selected. Defaults to None.
-            num_prior_samples: Number of prior samples calculated. Defaults to 500.
-            use_posterior_predictive: If True, the posterior predictive will be calculated. Defaults to True.
+            modality_key: If data is a MuData object, specify which modality to use.
+            rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
+            num_prior_samples: Number of prior samples calculated.
+            use_posterior_predictive: If True, the posterior predictive will be calculated.
         Returns:
             arviz.InferenceData: arviz_data
@@ -504,7 +503,7 @@ class Tasccoda(CompositionalModel2):
             try:
                 sample_adata = data[modality_key]
             except IndexError:
-                print("When data is a MuData object, modality_key must be specified!")
+                logger.error("When data is a MuData object, modality_key must be specified!")
                 raise
         if isinstance(data, AnnData):
             sample_adata = data

pertpy/tools/_dialogue.py CHANGED Viewed

@@ -13,8 +13,8 @@ import seaborn as sns
 import statsmodels.formula.api as smf
 import statsmodels.stats.multitest as ssm
 from anndata import AnnData
+from lamin_utils import logger
 from pandas import DataFrame
-from rich import print
 from rich.console import Group
 from rich.live import Live
 from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
@@ -291,7 +291,7 @@ class Dialogue:
             mcp_name: Name of mcp which was used for calculation of column value.
             max_length: Value needed to later decide at what index the threshold value should be extracted from column.
             min_threshold: Minimal threshold to select final scores by if it is smaller than calculated threshold.
-            index: Column index to use eto calculate the significant genes. Defaults to `z_score`.
+            index: Column index to use eto calculate the significant genes.
         Returns:
             According to the values in a df column (default: zscore) the significant up and downregulated gene names
@@ -377,12 +377,6 @@ class Dialogue:
             `argmin|Ax - y|`
-        Args:
-            A_orig:
-            y_orig:
-            feature_ranks:
-            n_iter: Passed to scipy.optimize.nnls. Defaults to 1000.
         Returns:
             Returns the aggregated coefficients from nnls.
         """
@@ -572,8 +566,8 @@ class Dialogue:
         Args:
             adata: AnnData object generate celltype objects for
             ct_order: The order of cell types
-            agg_pca: Whether to aggregate pseudobulks with PCA or not. Defaults to True.
-            normalize: Whether to mimic DIALOGUE behavior or not. Defaults to True.
+            agg_pca: Whether to aggregate pseudobulks with PCA or not.
+            normalize: Whether to mimic DIALOGUE behavior or not.
         Returns:
             A celltype_label:array dictionary.
@@ -613,7 +607,6 @@ class Dialogue:
             agg_pca: Whether to calculate cell-averaged PCA components.
             solver: Which solver to use for PMD. Must be one of "lp" (linear programming) or "bs" (binary search).
                     For differences between these to please refer to https://github.com/theislab/sparsecca/blob/main/examples/linear_programming_multicca.ipynb
-                    Defaults to 'bs'.
             normalize: Whether to mimic DIALOGUE as close as possible
         Returns:
@@ -640,9 +633,15 @@ class Dialogue:
         n_samples = mcca_in[0].shape[1]
         if penalties is None:
-            penalties = multicca_permute(
-                mcca_in, penalties=np.sqrt(n_samples) / 2, nperms=10, niter=50, standardize=True
-            )["bestpenalties"]
+            try:
+                penalties = multicca_permute(
+                    mcca_in, penalties=np.sqrt(n_samples) / 2, nperms=10, niter=50, standardize=True
+                )["bestpenalties"]
+            except ValueError as e:
+                if "matmul: input operand 1 has a mismatch in its core dimension" in str(e):
+                    raise ValueError("Please ensure that every cell type is represented in every sample.") from e
+                else:
+                    raise
         else:
             penalties = penalties
@@ -756,10 +755,10 @@ class Dialogue:
                         mcps.append(mcp)
                 if len(mcps) == 0:
-                    print(f"[bold red]No shared MCPs between {cell_type_1} and {cell_type_2}.")
+                    logger.warning(f"No shared MCPs between {cell_type_1} and {cell_type_2}.")
                     continue
-                print(f"[bold blue]{len(mcps)} MCPs identified for {cell_type_1} and {cell_type_2}.")
+                logger.info(f"{len(mcps)} MCPs identified for {cell_type_1} and {cell_type_2}.")
                 new_mcp_scores: dict[Any, list[Any]]
                 cca_sig, new_mcp_scores = self._calculate_cca_sig(
@@ -912,9 +911,7 @@ class Dialogue:
             results: dl.MultilevelModeling result object.
             MCP: MCP key of the result object.
             threshold: Number between [0,1]. The fraction of cell types compared against which must have the associated MCP gene.
-                        Defaults to 0.70.
             focal_celltypes: None (compare against all cell types) or a list of other cell types which you want to compare against.
-                             Defaults to None.
         Returns:
             Dict with keys 'up_genes' and 'down_genes' and values of lists of genes
@@ -993,10 +990,8 @@ class Dialogue:
         Args:
             ct_subs: Dialogue output ct_subs dictionary
             mcp: The name of the marker gene expression column.
-                 Defaults to "mcp_0".
             fraction: Fraction of extreme cells to consider for gene ranking.
                       Should be between 0 and 1.
-                      Defaults to 0.1.
         Returns:
             Dictionary where keys are subpopulation names and values are Anndata
@@ -1035,7 +1030,7 @@ class Dialogue:
         Args:
             ct_subs: Dialogue output ct_subs dictionary
             fraction: Fraction of extreme cells to consider for gene ranking.
-                      Should be between 0 and 1. Defaults to 0.1.
+                      Should be between 0 and 1.
         Returns:
             Nested dictionary where keys of the first level are MCPs (of the form "mcp_0" etc)
@@ -1085,7 +1080,7 @@ class Dialogue:
             split_key: Variable in adata.obs used to split the data.
             celltype_key: Key for cell type annotations.
             split_which: Which values of split_key to plot. Required if more than 2 values in split_key.
-            mcp: Key for MCP data. Defaults to "mcp_0".
+            mcp: Key for MCP data.
         Returns:
             A :class:`~matplotlib.axes.Axes` object
@@ -1144,7 +1139,7 @@ class Dialogue:
             celltype_key: Key in `adata.obs` containing cell type annotations.
             color: Key in `adata.obs` for color annotations. This parameter is used as the hue
             sample_id: Key in `adata.obs` for the sample annotations.
-            mcp: Key in `adata.obs` for MCP feature values. Defaults to `"mcp_0"`.
+            mcp: Key in `adata.obs` for MCP feature values.
         Returns:
             Seaborn Pairgrid object.

pertpy/tools/_differential_gene_expression/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+from ._base import ContrastType, LinearModelBase, MethodBase
+from ._dge_comparison import DGEEVAL
+from ._edger import EdgeR
+from ._pydeseq2 import PyDESeq2
+from ._simple_tests import SimpleComparisonBase, TTest, WilcoxonTest
+from ._statsmodels import Statsmodels
+__all__ = [
+    "MethodBase",
+    "LinearModelBase",
+    "EdgeR",
+    "PyDESeq2",
+    "Statsmodels",
+    "SimpleComparisonBase",
+    "WilcoxonTest",
+    "TTest",
+    "ContrastType",
+]
+AVAILABLE_METHODS = [Statsmodels, EdgeR, PyDESeq2, WilcoxonTest, TTest]

pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pertpy 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl