pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pertpy/__init__.py +2 -1
- pertpy/data/__init__.py +61 -0
- pertpy/data/_dataloader.py +27 -23
- pertpy/data/_datasets.py +58 -0
- pertpy/metadata/__init__.py +2 -0
- pertpy/metadata/_cell_line.py +39 -70
- pertpy/metadata/_compound.py +3 -4
- pertpy/metadata/_drug.py +2 -6
- pertpy/metadata/_look_up.py +38 -51
- pertpy/metadata/_metadata.py +7 -10
- pertpy/metadata/_moa.py +2 -6
- pertpy/plot/__init__.py +0 -5
- pertpy/preprocessing/__init__.py +2 -0
- pertpy/preprocessing/_guide_rna.py +2 -3
- pertpy/tools/__init__.py +42 -4
- pertpy/tools/_augur.py +14 -15
- pertpy/tools/_cinemaot.py +2 -2
- pertpy/tools/_coda/_base_coda.py +118 -142
- pertpy/tools/_coda/_sccoda.py +16 -15
- pertpy/tools/_coda/_tasccoda.py +21 -22
- pertpy/tools/_dialogue.py +18 -23
- pertpy/tools/_differential_gene_expression/__init__.py +20 -0
- pertpy/tools/_differential_gene_expression/_base.py +657 -0
- pertpy/tools/_differential_gene_expression/_checks.py +41 -0
- pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
- pertpy/tools/_differential_gene_expression/_edger.py +125 -0
- pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
- pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
- pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
- pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
- pertpy/tools/_distances/_distance_tests.py +21 -16
- pertpy/tools/_distances/_distances.py +406 -70
- pertpy/tools/_enrichment.py +10 -15
- pertpy/tools/_kernel_pca.py +1 -1
- pertpy/tools/_milo.py +76 -53
- pertpy/tools/_mixscape.py +15 -11
- pertpy/tools/_perturbation_space/_clustering.py +5 -2
- pertpy/tools/_perturbation_space/_comparison.py +112 -0
- pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
- pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
- pertpy/tools/_perturbation_space/_simple.py +3 -3
- pertpy/tools/_scgen/__init__.py +1 -1
- pertpy/tools/_scgen/_base_components.py +2 -3
- pertpy/tools/_scgen/_scgen.py +33 -28
- pertpy/tools/_scgen/_utils.py +2 -2
- {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
- pertpy-0.8.0.dist-info/RECORD +57 -0
- {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
- pertpy/plot/_augur.py +0 -171
- pertpy/plot/_coda.py +0 -601
- pertpy/plot/_guide_rna.py +0 -64
- pertpy/plot/_milopy.py +0 -209
- pertpy/plot/_mixscape.py +0 -355
- pertpy/tools/_differential_gene_expression.py +0 -325
- pertpy-0.7.0.dist-info/RECORD +0 -53
- {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
pertpy/tools/_coda/_tasccoda.py
CHANGED
@@ -10,9 +10,9 @@ import numpyro.distributions as npd
|
|
10
10
|
import toytree as tt
|
11
11
|
from anndata import AnnData
|
12
12
|
from jax import config, random
|
13
|
+
from lamin_utils import logger
|
13
14
|
from mudata import MuData
|
14
15
|
from numpyro.infer import Predictive
|
15
|
-
from rich import print
|
16
16
|
|
17
17
|
from pertpy.tools._coda._base_coda import (
|
18
18
|
CompositionalModel2,
|
@@ -85,18 +85,18 @@ class Tasccoda(CompositionalModel2):
|
|
85
85
|
Args:
|
86
86
|
adata: AnnData object.
|
87
87
|
type: Specify the input adata type, which could be either a cell-level AnnData or an aggregated sample-level AnnData.
|
88
|
-
cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
|
89
|
-
sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
|
90
|
-
covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
|
91
|
-
covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
|
92
|
-
covariate_df: If type is "cell_level", specify dataFrame with covariates.
|
93
|
-
dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
|
94
|
-
levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
95
|
-
levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
96
|
-
add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
|
97
|
-
key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
|
98
|
-
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
99
|
-
modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
|
88
|
+
cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
|
89
|
+
sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
|
90
|
+
covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
|
91
|
+
covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
|
92
|
+
covariate_df: If type is "cell_level", specify dataFrame with covariates.
|
93
|
+
dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
|
94
|
+
levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
95
|
+
levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
96
|
+
add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
|
97
|
+
key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
|
98
|
+
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
99
|
+
modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
|
100
100
|
|
101
101
|
Returns:
|
102
102
|
MuData: MuData object with cell-level AnnData (`mudata[modality_key_1]`) and aggregated sample-level AnnData (`mudata[modality_key_2]`).
|
@@ -155,14 +155,13 @@ class Tasccoda(CompositionalModel2):
|
|
155
155
|
To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
|
156
156
|
reference_cell_type: Column name that sets the reference cell type.
|
157
157
|
If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
|
158
|
-
Defaults to "automatic".
|
159
158
|
automatic_reference_absence_threshold: If using reference_cell_type = "automatic",
|
160
159
|
determine the maximum fraction of zero entries for a cell type
|
161
|
-
to be considered as a possible reference cell type.
|
160
|
+
to be considered as a possible reference cell type.
|
162
161
|
tree_key: Key in `adata.uns` that contains the tree structure
|
163
162
|
pen_args: Dictionary with penalty arguments. With `reg="scaled_3"`, the parameters phi (aggregation bias), lambda_1, lambda_0 can be set here.
|
164
163
|
See the tascCODA paper for an explanation of these parameters. Default: lambda_0 = 50, lambda_1 = 5, phi = 0.
|
165
|
-
modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
|
164
|
+
modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
|
166
165
|
|
167
166
|
Returns:
|
168
167
|
Return an AnnData (if input data is an AnnData object) or return a MuData (if input data is a MuData object)
|
@@ -208,7 +207,7 @@ class Tasccoda(CompositionalModel2):
|
|
208
207
|
) from None
|
209
208
|
|
210
209
|
# toytree tree - only for legacy reasons, can be removed in the final version
|
211
|
-
if isinstance(adata.uns[tree_key], tt.
|
210
|
+
if isinstance(adata.uns[tree_key], tt.core.ToyTree):
|
212
211
|
# Collapse singularities in the tree
|
213
212
|
phy_tree = collapse_singularities(adata.uns[tree_key])
|
214
213
|
|
@@ -477,10 +476,10 @@ class Tasccoda(CompositionalModel2):
|
|
477
476
|
|
478
477
|
Args:
|
479
478
|
data: AnnData object or MuData object.
|
480
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
481
|
-
rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
|
482
|
-
num_prior_samples: Number of prior samples calculated.
|
483
|
-
use_posterior_predictive: If True, the posterior predictive will be calculated.
|
479
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
480
|
+
rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
|
481
|
+
num_prior_samples: Number of prior samples calculated.
|
482
|
+
use_posterior_predictive: If True, the posterior predictive will be calculated.
|
484
483
|
|
485
484
|
Returns:
|
486
485
|
arviz.InferenceData: arviz_data
|
@@ -504,7 +503,7 @@ class Tasccoda(CompositionalModel2):
|
|
504
503
|
try:
|
505
504
|
sample_adata = data[modality_key]
|
506
505
|
except IndexError:
|
507
|
-
|
506
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
508
507
|
raise
|
509
508
|
if isinstance(data, AnnData):
|
510
509
|
sample_adata = data
|
pertpy/tools/_dialogue.py
CHANGED
@@ -13,8 +13,8 @@ import seaborn as sns
|
|
13
13
|
import statsmodels.formula.api as smf
|
14
14
|
import statsmodels.stats.multitest as ssm
|
15
15
|
from anndata import AnnData
|
16
|
+
from lamin_utils import logger
|
16
17
|
from pandas import DataFrame
|
17
|
-
from rich import print
|
18
18
|
from rich.console import Group
|
19
19
|
from rich.live import Live
|
20
20
|
from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
|
@@ -291,7 +291,7 @@ class Dialogue:
|
|
291
291
|
mcp_name: Name of mcp which was used for calculation of column value.
|
292
292
|
max_length: Value needed to later decide at what index the threshold value should be extracted from column.
|
293
293
|
min_threshold: Minimal threshold to select final scores by if it is smaller than calculated threshold.
|
294
|
-
index: Column index to use eto calculate the significant genes.
|
294
|
+
index: Column index to use eto calculate the significant genes.
|
295
295
|
|
296
296
|
Returns:
|
297
297
|
According to the values in a df column (default: zscore) the significant up and downregulated gene names
|
@@ -377,12 +377,6 @@ class Dialogue:
|
|
377
377
|
|
378
378
|
`argmin|Ax - y|`
|
379
379
|
|
380
|
-
Args:
|
381
|
-
A_orig:
|
382
|
-
y_orig:
|
383
|
-
feature_ranks:
|
384
|
-
n_iter: Passed to scipy.optimize.nnls. Defaults to 1000.
|
385
|
-
|
386
380
|
Returns:
|
387
381
|
Returns the aggregated coefficients from nnls.
|
388
382
|
"""
|
@@ -572,8 +566,8 @@ class Dialogue:
|
|
572
566
|
Args:
|
573
567
|
adata: AnnData object generate celltype objects for
|
574
568
|
ct_order: The order of cell types
|
575
|
-
agg_pca: Whether to aggregate pseudobulks with PCA or not.
|
576
|
-
normalize: Whether to mimic DIALOGUE behavior or not.
|
569
|
+
agg_pca: Whether to aggregate pseudobulks with PCA or not.
|
570
|
+
normalize: Whether to mimic DIALOGUE behavior or not.
|
577
571
|
|
578
572
|
Returns:
|
579
573
|
A celltype_label:array dictionary.
|
@@ -613,7 +607,6 @@ class Dialogue:
|
|
613
607
|
agg_pca: Whether to calculate cell-averaged PCA components.
|
614
608
|
solver: Which solver to use for PMD. Must be one of "lp" (linear programming) or "bs" (binary search).
|
615
609
|
For differences between these to please refer to https://github.com/theislab/sparsecca/blob/main/examples/linear_programming_multicca.ipynb
|
616
|
-
Defaults to 'bs'.
|
617
610
|
normalize: Whether to mimic DIALOGUE as close as possible
|
618
611
|
|
619
612
|
Returns:
|
@@ -640,9 +633,15 @@ class Dialogue:
|
|
640
633
|
|
641
634
|
n_samples = mcca_in[0].shape[1]
|
642
635
|
if penalties is None:
|
643
|
-
|
644
|
-
|
645
|
-
|
636
|
+
try:
|
637
|
+
penalties = multicca_permute(
|
638
|
+
mcca_in, penalties=np.sqrt(n_samples) / 2, nperms=10, niter=50, standardize=True
|
639
|
+
)["bestpenalties"]
|
640
|
+
except ValueError as e:
|
641
|
+
if "matmul: input operand 1 has a mismatch in its core dimension" in str(e):
|
642
|
+
raise ValueError("Please ensure that every cell type is represented in every sample.") from e
|
643
|
+
else:
|
644
|
+
raise
|
646
645
|
else:
|
647
646
|
penalties = penalties
|
648
647
|
|
@@ -756,10 +755,10 @@ class Dialogue:
|
|
756
755
|
mcps.append(mcp)
|
757
756
|
|
758
757
|
if len(mcps) == 0:
|
759
|
-
|
758
|
+
logger.warning(f"No shared MCPs between {cell_type_1} and {cell_type_2}.")
|
760
759
|
continue
|
761
760
|
|
762
|
-
|
761
|
+
logger.info(f"{len(mcps)} MCPs identified for {cell_type_1} and {cell_type_2}.")
|
763
762
|
|
764
763
|
new_mcp_scores: dict[Any, list[Any]]
|
765
764
|
cca_sig, new_mcp_scores = self._calculate_cca_sig(
|
@@ -912,9 +911,7 @@ class Dialogue:
|
|
912
911
|
results: dl.MultilevelModeling result object.
|
913
912
|
MCP: MCP key of the result object.
|
914
913
|
threshold: Number between [0,1]. The fraction of cell types compared against which must have the associated MCP gene.
|
915
|
-
Defaults to 0.70.
|
916
914
|
focal_celltypes: None (compare against all cell types) or a list of other cell types which you want to compare against.
|
917
|
-
Defaults to None.
|
918
915
|
|
919
916
|
Returns:
|
920
917
|
Dict with keys 'up_genes' and 'down_genes' and values of lists of genes
|
@@ -993,10 +990,8 @@ class Dialogue:
|
|
993
990
|
Args:
|
994
991
|
ct_subs: Dialogue output ct_subs dictionary
|
995
992
|
mcp: The name of the marker gene expression column.
|
996
|
-
Defaults to "mcp_0".
|
997
993
|
fraction: Fraction of extreme cells to consider for gene ranking.
|
998
994
|
Should be between 0 and 1.
|
999
|
-
Defaults to 0.1.
|
1000
995
|
|
1001
996
|
Returns:
|
1002
997
|
Dictionary where keys are subpopulation names and values are Anndata
|
@@ -1035,7 +1030,7 @@ class Dialogue:
|
|
1035
1030
|
Args:
|
1036
1031
|
ct_subs: Dialogue output ct_subs dictionary
|
1037
1032
|
fraction: Fraction of extreme cells to consider for gene ranking.
|
1038
|
-
Should be between 0 and 1.
|
1033
|
+
Should be between 0 and 1.
|
1039
1034
|
|
1040
1035
|
Returns:
|
1041
1036
|
Nested dictionary where keys of the first level are MCPs (of the form "mcp_0" etc)
|
@@ -1085,7 +1080,7 @@ class Dialogue:
|
|
1085
1080
|
split_key: Variable in adata.obs used to split the data.
|
1086
1081
|
celltype_key: Key for cell type annotations.
|
1087
1082
|
split_which: Which values of split_key to plot. Required if more than 2 values in split_key.
|
1088
|
-
mcp: Key for MCP data.
|
1083
|
+
mcp: Key for MCP data.
|
1089
1084
|
|
1090
1085
|
Returns:
|
1091
1086
|
A :class:`~matplotlib.axes.Axes` object
|
@@ -1144,7 +1139,7 @@ class Dialogue:
|
|
1144
1139
|
celltype_key: Key in `adata.obs` containing cell type annotations.
|
1145
1140
|
color: Key in `adata.obs` for color annotations. This parameter is used as the hue
|
1146
1141
|
sample_id: Key in `adata.obs` for the sample annotations.
|
1147
|
-
mcp: Key in `adata.obs` for MCP feature values.
|
1142
|
+
mcp: Key in `adata.obs` for MCP feature values.
|
1148
1143
|
|
1149
1144
|
Returns:
|
1150
1145
|
Seaborn Pairgrid object.
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from ._base import ContrastType, LinearModelBase, MethodBase
|
2
|
+
from ._dge_comparison import DGEEVAL
|
3
|
+
from ._edger import EdgeR
|
4
|
+
from ._pydeseq2 import PyDESeq2
|
5
|
+
from ._simple_tests import SimpleComparisonBase, TTest, WilcoxonTest
|
6
|
+
from ._statsmodels import Statsmodels
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
"MethodBase",
|
10
|
+
"LinearModelBase",
|
11
|
+
"EdgeR",
|
12
|
+
"PyDESeq2",
|
13
|
+
"Statsmodels",
|
14
|
+
"SimpleComparisonBase",
|
15
|
+
"WilcoxonTest",
|
16
|
+
"TTest",
|
17
|
+
"ContrastType",
|
18
|
+
]
|
19
|
+
|
20
|
+
AVAILABLE_METHODS = [Statsmodels, EdgeR, PyDESeq2, WilcoxonTest, TTest]
|