pertpy 0.7.0__py3-none-any.whl → 0.9.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- pertpy/__init__.py +2 -1
- pertpy/data/__init__.py +61 -0
- pertpy/data/_dataloader.py +27 -23
- pertpy/data/_datasets.py +58 -0
- pertpy/metadata/__init__.py +2 -0
- pertpy/metadata/_cell_line.py +39 -70
- pertpy/metadata/_compound.py +3 -4
- pertpy/metadata/_drug.py +2 -6
- pertpy/metadata/_look_up.py +38 -51
- pertpy/metadata/_metadata.py +7 -10
- pertpy/metadata/_moa.py +2 -6
- pertpy/plot/__init__.py +0 -5
- pertpy/preprocessing/__init__.py +2 -0
- pertpy/preprocessing/_guide_rna.py +6 -7
- pertpy/tools/__init__.py +67 -6
- pertpy/tools/_augur.py +14 -15
- pertpy/tools/_cinemaot.py +2 -2
- pertpy/tools/_coda/_base_coda.py +118 -142
- pertpy/tools/_coda/_sccoda.py +16 -15
- pertpy/tools/_coda/_tasccoda.py +21 -22
- pertpy/tools/_dialogue.py +18 -23
- pertpy/tools/_differential_gene_expression/__init__.py +20 -0
- pertpy/tools/_differential_gene_expression/_base.py +657 -0
- pertpy/tools/_differential_gene_expression/_checks.py +41 -0
- pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
- pertpy/tools/_differential_gene_expression/_edger.py +125 -0
- pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
- pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
- pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
- pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
- pertpy/tools/_distances/_distance_tests.py +21 -16
- pertpy/tools/_distances/_distances.py +406 -70
- pertpy/tools/_enrichment.py +10 -15
- pertpy/tools/_kernel_pca.py +1 -1
- pertpy/tools/_milo.py +77 -54
- pertpy/tools/_mixscape.py +15 -11
- pertpy/tools/_perturbation_space/_clustering.py +5 -2
- pertpy/tools/_perturbation_space/_comparison.py +112 -0
- pertpy/tools/_perturbation_space/_discriminator_classifiers.py +21 -23
- pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
- pertpy/tools/_perturbation_space/_simple.py +3 -3
- pertpy/tools/_scgen/__init__.py +1 -1
- pertpy/tools/_scgen/_base_components.py +2 -3
- pertpy/tools/_scgen/_scgen.py +33 -28
- pertpy/tools/_scgen/_utils.py +2 -2
- {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/METADATA +32 -14
- pertpy-0.9.1.dist-info/RECORD +57 -0
- {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/WHEEL +1 -1
- pertpy/plot/_augur.py +0 -171
- pertpy/plot/_coda.py +0 -601
- pertpy/plot/_guide_rna.py +0 -64
- pertpy/plot/_milopy.py +0 -209
- pertpy/plot/_mixscape.py +0 -355
- pertpy/tools/_differential_gene_expression.py +0 -325
- pertpy-0.7.0.dist-info/RECORD +0 -53
- {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/licenses/LICENSE +0 -0
pertpy/tools/_coda/_tasccoda.py
CHANGED
@@ -10,9 +10,9 @@ import numpyro.distributions as npd
|
|
10
10
|
import toytree as tt
|
11
11
|
from anndata import AnnData
|
12
12
|
from jax import config, random
|
13
|
+
from lamin_utils import logger
|
13
14
|
from mudata import MuData
|
14
15
|
from numpyro.infer import Predictive
|
15
|
-
from rich import print
|
16
16
|
|
17
17
|
from pertpy.tools._coda._base_coda import (
|
18
18
|
CompositionalModel2,
|
@@ -85,18 +85,18 @@ class Tasccoda(CompositionalModel2):
|
|
85
85
|
Args:
|
86
86
|
adata: AnnData object.
|
87
87
|
type: Specify the input adata type, which could be either a cell-level AnnData or an aggregated sample-level AnnData.
|
88
|
-
cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
|
89
|
-
sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
|
90
|
-
covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
|
91
|
-
covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
|
92
|
-
covariate_df: If type is "cell_level", specify dataFrame with covariates.
|
93
|
-
dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
|
94
|
-
levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
95
|
-
levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
96
|
-
add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
|
97
|
-
key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
|
98
|
-
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
99
|
-
modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
|
88
|
+
cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
|
89
|
+
sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
|
90
|
+
covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
|
91
|
+
covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
|
92
|
+
covariate_df: If type is "cell_level", specify dataFrame with covariates.
|
93
|
+
dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
|
94
|
+
levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
95
|
+
levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
96
|
+
add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
|
97
|
+
key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
|
98
|
+
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
99
|
+
modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
|
100
100
|
|
101
101
|
Returns:
|
102
102
|
MuData: MuData object with cell-level AnnData (`mudata[modality_key_1]`) and aggregated sample-level AnnData (`mudata[modality_key_2]`).
|
@@ -155,14 +155,13 @@ class Tasccoda(CompositionalModel2):
|
|
155
155
|
To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
|
156
156
|
reference_cell_type: Column name that sets the reference cell type.
|
157
157
|
If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
|
158
|
-
Defaults to "automatic".
|
159
158
|
automatic_reference_absence_threshold: If using reference_cell_type = "automatic",
|
160
159
|
determine the maximum fraction of zero entries for a cell type
|
161
|
-
to be considered as a possible reference cell type.
|
160
|
+
to be considered as a possible reference cell type.
|
162
161
|
tree_key: Key in `adata.uns` that contains the tree structure
|
163
162
|
pen_args: Dictionary with penalty arguments. With `reg="scaled_3"`, the parameters phi (aggregation bias), lambda_1, lambda_0 can be set here.
|
164
163
|
See the tascCODA paper for an explanation of these parameters. Default: lambda_0 = 50, lambda_1 = 5, phi = 0.
|
165
|
-
modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
|
164
|
+
modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
|
166
165
|
|
167
166
|
Returns:
|
168
167
|
Return an AnnData (if input data is an AnnData object) or return a MuData (if input data is a MuData object)
|
@@ -208,7 +207,7 @@ class Tasccoda(CompositionalModel2):
|
|
208
207
|
) from None
|
209
208
|
|
210
209
|
# toytree tree - only for legacy reasons, can be removed in the final version
|
211
|
-
if isinstance(adata.uns[tree_key], tt.
|
210
|
+
if isinstance(adata.uns[tree_key], tt.core.ToyTree):
|
212
211
|
# Collapse singularities in the tree
|
213
212
|
phy_tree = collapse_singularities(adata.uns[tree_key])
|
214
213
|
|
@@ -477,10 +476,10 @@ class Tasccoda(CompositionalModel2):
|
|
477
476
|
|
478
477
|
Args:
|
479
478
|
data: AnnData object or MuData object.
|
480
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
481
|
-
rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
|
482
|
-
num_prior_samples: Number of prior samples calculated.
|
483
|
-
use_posterior_predictive: If True, the posterior predictive will be calculated.
|
479
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
480
|
+
rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
|
481
|
+
num_prior_samples: Number of prior samples calculated.
|
482
|
+
use_posterior_predictive: If True, the posterior predictive will be calculated.
|
484
483
|
|
485
484
|
Returns:
|
486
485
|
arviz.InferenceData: arviz_data
|
@@ -504,7 +503,7 @@ class Tasccoda(CompositionalModel2):
|
|
504
503
|
try:
|
505
504
|
sample_adata = data[modality_key]
|
506
505
|
except IndexError:
|
507
|
-
|
506
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
508
507
|
raise
|
509
508
|
if isinstance(data, AnnData):
|
510
509
|
sample_adata = data
|
pertpy/tools/_dialogue.py
CHANGED
@@ -13,8 +13,8 @@ import seaborn as sns
|
|
13
13
|
import statsmodels.formula.api as smf
|
14
14
|
import statsmodels.stats.multitest as ssm
|
15
15
|
from anndata import AnnData
|
16
|
+
from lamin_utils import logger
|
16
17
|
from pandas import DataFrame
|
17
|
-
from rich import print
|
18
18
|
from rich.console import Group
|
19
19
|
from rich.live import Live
|
20
20
|
from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
|
@@ -291,7 +291,7 @@ class Dialogue:
|
|
291
291
|
mcp_name: Name of mcp which was used for calculation of column value.
|
292
292
|
max_length: Value needed to later decide at what index the threshold value should be extracted from column.
|
293
293
|
min_threshold: Minimal threshold to select final scores by if it is smaller than calculated threshold.
|
294
|
-
index: Column index to use eto calculate the significant genes.
|
294
|
+
index: Column index to use eto calculate the significant genes.
|
295
295
|
|
296
296
|
Returns:
|
297
297
|
According to the values in a df column (default: zscore) the significant up and downregulated gene names
|
@@ -377,12 +377,6 @@ class Dialogue:
|
|
377
377
|
|
378
378
|
`argmin|Ax - y|`
|
379
379
|
|
380
|
-
Args:
|
381
|
-
A_orig:
|
382
|
-
y_orig:
|
383
|
-
feature_ranks:
|
384
|
-
n_iter: Passed to scipy.optimize.nnls. Defaults to 1000.
|
385
|
-
|
386
380
|
Returns:
|
387
381
|
Returns the aggregated coefficients from nnls.
|
388
382
|
"""
|
@@ -572,8 +566,8 @@ class Dialogue:
|
|
572
566
|
Args:
|
573
567
|
adata: AnnData object generate celltype objects for
|
574
568
|
ct_order: The order of cell types
|
575
|
-
agg_pca: Whether to aggregate pseudobulks with PCA or not.
|
576
|
-
normalize: Whether to mimic DIALOGUE behavior or not.
|
569
|
+
agg_pca: Whether to aggregate pseudobulks with PCA or not.
|
570
|
+
normalize: Whether to mimic DIALOGUE behavior or not.
|
577
571
|
|
578
572
|
Returns:
|
579
573
|
A celltype_label:array dictionary.
|
@@ -613,7 +607,6 @@ class Dialogue:
|
|
613
607
|
agg_pca: Whether to calculate cell-averaged PCA components.
|
614
608
|
solver: Which solver to use for PMD. Must be one of "lp" (linear programming) or "bs" (binary search).
|
615
609
|
For differences between these to please refer to https://github.com/theislab/sparsecca/blob/main/examples/linear_programming_multicca.ipynb
|
616
|
-
Defaults to 'bs'.
|
617
610
|
normalize: Whether to mimic DIALOGUE as close as possible
|
618
611
|
|
619
612
|
Returns:
|
@@ -640,9 +633,15 @@ class Dialogue:
|
|
640
633
|
|
641
634
|
n_samples = mcca_in[0].shape[1]
|
642
635
|
if penalties is None:
|
643
|
-
|
644
|
-
|
645
|
-
|
636
|
+
try:
|
637
|
+
penalties = multicca_permute(
|
638
|
+
mcca_in, penalties=np.sqrt(n_samples) / 2, nperms=10, niter=50, standardize=True
|
639
|
+
)["bestpenalties"]
|
640
|
+
except ValueError as e:
|
641
|
+
if "matmul: input operand 1 has a mismatch in its core dimension" in str(e):
|
642
|
+
raise ValueError("Please ensure that every cell type is represented in every sample.") from e
|
643
|
+
else:
|
644
|
+
raise
|
646
645
|
else:
|
647
646
|
penalties = penalties
|
648
647
|
|
@@ -756,10 +755,10 @@ class Dialogue:
|
|
756
755
|
mcps.append(mcp)
|
757
756
|
|
758
757
|
if len(mcps) == 0:
|
759
|
-
|
758
|
+
logger.warning(f"No shared MCPs between {cell_type_1} and {cell_type_2}.")
|
760
759
|
continue
|
761
760
|
|
762
|
-
|
761
|
+
logger.info(f"{len(mcps)} MCPs identified for {cell_type_1} and {cell_type_2}.")
|
763
762
|
|
764
763
|
new_mcp_scores: dict[Any, list[Any]]
|
765
764
|
cca_sig, new_mcp_scores = self._calculate_cca_sig(
|
@@ -912,9 +911,7 @@ class Dialogue:
|
|
912
911
|
results: dl.MultilevelModeling result object.
|
913
912
|
MCP: MCP key of the result object.
|
914
913
|
threshold: Number between [0,1]. The fraction of cell types compared against which must have the associated MCP gene.
|
915
|
-
Defaults to 0.70.
|
916
914
|
focal_celltypes: None (compare against all cell types) or a list of other cell types which you want to compare against.
|
917
|
-
Defaults to None.
|
918
915
|
|
919
916
|
Returns:
|
920
917
|
Dict with keys 'up_genes' and 'down_genes' and values of lists of genes
|
@@ -993,10 +990,8 @@ class Dialogue:
|
|
993
990
|
Args:
|
994
991
|
ct_subs: Dialogue output ct_subs dictionary
|
995
992
|
mcp: The name of the marker gene expression column.
|
996
|
-
Defaults to "mcp_0".
|
997
993
|
fraction: Fraction of extreme cells to consider for gene ranking.
|
998
994
|
Should be between 0 and 1.
|
999
|
-
Defaults to 0.1.
|
1000
995
|
|
1001
996
|
Returns:
|
1002
997
|
Dictionary where keys are subpopulation names and values are Anndata
|
@@ -1035,7 +1030,7 @@ class Dialogue:
|
|
1035
1030
|
Args:
|
1036
1031
|
ct_subs: Dialogue output ct_subs dictionary
|
1037
1032
|
fraction: Fraction of extreme cells to consider for gene ranking.
|
1038
|
-
Should be between 0 and 1.
|
1033
|
+
Should be between 0 and 1.
|
1039
1034
|
|
1040
1035
|
Returns:
|
1041
1036
|
Nested dictionary where keys of the first level are MCPs (of the form "mcp_0" etc)
|
@@ -1085,7 +1080,7 @@ class Dialogue:
|
|
1085
1080
|
split_key: Variable in adata.obs used to split the data.
|
1086
1081
|
celltype_key: Key for cell type annotations.
|
1087
1082
|
split_which: Which values of split_key to plot. Required if more than 2 values in split_key.
|
1088
|
-
mcp: Key for MCP data.
|
1083
|
+
mcp: Key for MCP data.
|
1089
1084
|
|
1090
1085
|
Returns:
|
1091
1086
|
A :class:`~matplotlib.axes.Axes` object
|
@@ -1144,7 +1139,7 @@ class Dialogue:
|
|
1144
1139
|
celltype_key: Key in `adata.obs` containing cell type annotations.
|
1145
1140
|
color: Key in `adata.obs` for color annotations. This parameter is used as the hue
|
1146
1141
|
sample_id: Key in `adata.obs` for the sample annotations.
|
1147
|
-
mcp: Key in `adata.obs` for MCP feature values.
|
1142
|
+
mcp: Key in `adata.obs` for MCP feature values.
|
1148
1143
|
|
1149
1144
|
Returns:
|
1150
1145
|
Seaborn Pairgrid object.
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from ._base import ContrastType, LinearModelBase, MethodBase
|
2
|
+
from ._dge_comparison import DGEEVAL
|
3
|
+
from ._edger import EdgeR
|
4
|
+
from ._pydeseq2 import PyDESeq2
|
5
|
+
from ._simple_tests import SimpleComparisonBase, TTest, WilcoxonTest
|
6
|
+
from ._statsmodels import Statsmodels
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
"MethodBase",
|
10
|
+
"LinearModelBase",
|
11
|
+
"EdgeR",
|
12
|
+
"PyDESeq2",
|
13
|
+
"Statsmodels",
|
14
|
+
"SimpleComparisonBase",
|
15
|
+
"WilcoxonTest",
|
16
|
+
"TTest",
|
17
|
+
"ContrastType",
|
18
|
+
]
|
19
|
+
|
20
|
+
AVAILABLE_METHODS = [Statsmodels, EdgeR, PyDESeq2, WilcoxonTest, TTest]
|