pertpy 0.7.0__py3-none-any.whl → 0.9.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- pertpy/__init__.py +2 -1
- pertpy/data/__init__.py +61 -0
- pertpy/data/_dataloader.py +27 -23
- pertpy/data/_datasets.py +58 -0
- pertpy/metadata/__init__.py +2 -0
- pertpy/metadata/_cell_line.py +39 -70
- pertpy/metadata/_compound.py +3 -4
- pertpy/metadata/_drug.py +2 -6
- pertpy/metadata/_look_up.py +38 -51
- pertpy/metadata/_metadata.py +7 -10
- pertpy/metadata/_moa.py +2 -6
- pertpy/plot/__init__.py +0 -5
- pertpy/preprocessing/__init__.py +2 -0
- pertpy/preprocessing/_guide_rna.py +6 -7
- pertpy/tools/__init__.py +67 -6
- pertpy/tools/_augur.py +14 -15
- pertpy/tools/_cinemaot.py +2 -2
- pertpy/tools/_coda/_base_coda.py +118 -142
- pertpy/tools/_coda/_sccoda.py +16 -15
- pertpy/tools/_coda/_tasccoda.py +21 -22
- pertpy/tools/_dialogue.py +18 -23
- pertpy/tools/_differential_gene_expression/__init__.py +20 -0
- pertpy/tools/_differential_gene_expression/_base.py +657 -0
- pertpy/tools/_differential_gene_expression/_checks.py +41 -0
- pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
- pertpy/tools/_differential_gene_expression/_edger.py +125 -0
- pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
- pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
- pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
- pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
- pertpy/tools/_distances/_distance_tests.py +21 -16
- pertpy/tools/_distances/_distances.py +406 -70
- pertpy/tools/_enrichment.py +10 -15
- pertpy/tools/_kernel_pca.py +1 -1
- pertpy/tools/_milo.py +77 -54
- pertpy/tools/_mixscape.py +15 -11
- pertpy/tools/_perturbation_space/_clustering.py +5 -2
- pertpy/tools/_perturbation_space/_comparison.py +112 -0
- pertpy/tools/_perturbation_space/_discriminator_classifiers.py +21 -23
- pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
- pertpy/tools/_perturbation_space/_simple.py +3 -3
- pertpy/tools/_scgen/__init__.py +1 -1
- pertpy/tools/_scgen/_base_components.py +2 -3
- pertpy/tools/_scgen/_scgen.py +33 -28
- pertpy/tools/_scgen/_utils.py +2 -2
- {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/METADATA +32 -14
- pertpy-0.9.1.dist-info/RECORD +57 -0
- {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/WHEEL +1 -1
- pertpy/plot/_augur.py +0 -171
- pertpy/plot/_coda.py +0 -601
- pertpy/plot/_guide_rna.py +0 -64
- pertpy/plot/_milopy.py +0 -209
- pertpy/plot/_mixscape.py +0 -355
- pertpy/tools/_differential_gene_expression.py +0 -325
- pertpy-0.7.0.dist-info/RECORD +0 -53
- {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,72 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
import scanpy as sc
|
4
|
+
import statsmodels
|
5
|
+
import statsmodels.api as sm
|
6
|
+
from tqdm.auto import tqdm
|
7
|
+
|
8
|
+
from ._base import LinearModelBase
|
9
|
+
from ._checks import check_is_integer_matrix
|
10
|
+
|
11
|
+
|
12
|
+
class Statsmodels(LinearModelBase):
|
13
|
+
"""Differential expression test using a statsmodels linear regression"""
|
14
|
+
|
15
|
+
def _check_counts(self):
|
16
|
+
check_is_integer_matrix(self.data)
|
17
|
+
|
18
|
+
def fit(
|
19
|
+
self,
|
20
|
+
regression_model: type[sm.OLS] | type[sm.GLM] = sm.OLS,
|
21
|
+
**kwargs,
|
22
|
+
) -> None:
|
23
|
+
"""Fit the specified regression model.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
regression_model: A statsmodels regression model class, either OLS or GLM.
|
27
|
+
**kwargs: Additional arguments for fitting the specific method. In particular, this
|
28
|
+
is where you can specify the family for GLM.
|
29
|
+
|
30
|
+
Examples:
|
31
|
+
>>> import statsmodels.api as sm
|
32
|
+
>>> import pertpy as pt
|
33
|
+
>>> model = pt.tl.Statsmodels(adata, design="~condition")
|
34
|
+
>>> model.fit(sm.GLM, family=sm.families.NegativeBinomial(link=sm.families.links.Log()))
|
35
|
+
>>> results = model.test_contrasts(np.array([0, 1]))
|
36
|
+
"""
|
37
|
+
self.models = []
|
38
|
+
for var in tqdm(self.adata.var_names):
|
39
|
+
mod = regression_model(
|
40
|
+
sc.get.obs_df(self.adata, keys=[var], layer=self.layer)[var],
|
41
|
+
self.design,
|
42
|
+
**kwargs,
|
43
|
+
)
|
44
|
+
mod = mod.fit()
|
45
|
+
self.models.append(mod)
|
46
|
+
|
47
|
+
def _test_single_contrast(self, contrast, **kwargs) -> pd.DataFrame:
|
48
|
+
res = []
|
49
|
+
for var, mod in zip(tqdm(self.adata.var_names), self.models, strict=False):
|
50
|
+
t_test = mod.t_test(contrast)
|
51
|
+
res.append(
|
52
|
+
{
|
53
|
+
"variable": var,
|
54
|
+
"p_value": t_test.pvalue,
|
55
|
+
"t_value": t_test.tvalue.item(),
|
56
|
+
"sd": t_test.sd.item(),
|
57
|
+
"log_fc": t_test.effect.item(),
|
58
|
+
"adj_p_value": statsmodels.stats.multitest.fdrcorrection(np.array([t_test.pvalue]))[1].item(),
|
59
|
+
}
|
60
|
+
)
|
61
|
+
return pd.DataFrame(res).sort_values("p_value")
|
62
|
+
|
63
|
+
def contrast(self, column: str, baseline: str, group_to_compare: str) -> np.ndarray:
|
64
|
+
"""Build a simple contrast for pairwise comparisons.
|
65
|
+
|
66
|
+
This is equivalent to
|
67
|
+
|
68
|
+
```
|
69
|
+
model.cond(<column> = baseline) - model.cond(<column> = group_to_compare)
|
70
|
+
```
|
71
|
+
"""
|
72
|
+
return self.cond(**{column: baseline}) - self.cond(**{column: group_to_compare})
|
@@ -23,20 +23,20 @@ class DistanceTest:
|
|
23
23
|
|
24
24
|
Args:
|
25
25
|
metric: Distance metric to use between groups of cells.
|
26
|
-
n_perms: Number of permutations to run.
|
26
|
+
n_perms: Number of permutations to run.
|
27
27
|
layer_key: Name of the counts layer containing raw counts to calculate distances for.
|
28
28
|
Mutually exclusive with 'obsm_key'.
|
29
|
-
|
29
|
+
If equal to `None` the parameter is ignored.
|
30
30
|
obsm_key: Name of embedding in adata.obsm to use.
|
31
31
|
Mutually exclusive with 'counts_layer_key'.
|
32
32
|
Defaults to None, but is set to "X_pca" if not set explicitly internally.
|
33
|
-
alpha: Significance level.
|
34
|
-
correction: Multiple testing correction method.
|
35
|
-
cell_wise_metric: Metric to use between single cells.
|
33
|
+
alpha: Significance level.
|
34
|
+
correction: Multiple testing correction method.
|
35
|
+
cell_wise_metric: Metric to use between single cells.
|
36
36
|
|
37
37
|
Examples:
|
38
38
|
>>> import pertpy as pt
|
39
|
-
>>> adata = pt.dt.
|
39
|
+
>>> adata = pt.dt.distance_example()
|
40
40
|
>>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
|
41
41
|
>>> tab = distance_test(adata, groupby="perturbation", contrast="control")
|
42
42
|
"""
|
@@ -66,11 +66,14 @@ class DistanceTest:
|
|
66
66
|
self.alpha = alpha
|
67
67
|
self.correction = correction
|
68
68
|
self.cell_wise_metric = (
|
69
|
-
cell_wise_metric if cell_wise_metric else Distance(self.metric, self.obsm_key).cell_wise_metric
|
69
|
+
cell_wise_metric if cell_wise_metric else Distance(self.metric, obsm_key=self.obsm_key).cell_wise_metric
|
70
70
|
)
|
71
71
|
|
72
72
|
self.distance = Distance(
|
73
|
-
self.metric,
|
73
|
+
self.metric,
|
74
|
+
layer_key=self.layer_key,
|
75
|
+
obsm_key=self.obsm_key,
|
76
|
+
cell_wise_metric=self.cell_wise_metric,
|
74
77
|
)
|
75
78
|
|
76
79
|
def __call__(
|
@@ -87,7 +90,7 @@ class DistanceTest:
|
|
87
90
|
adata: Annotated data matrix.
|
88
91
|
groupby: Key in adata.obs for grouping cells.
|
89
92
|
contrast: Name of the contrast group.
|
90
|
-
show_progressbar: Whether to print progress.
|
93
|
+
show_progressbar: Whether to print progress.
|
91
94
|
|
92
95
|
Returns:
|
93
96
|
pandas.DataFrame: Results of the permutation test, with columns:
|
@@ -99,7 +102,7 @@ class DistanceTest:
|
|
99
102
|
|
100
103
|
Examples:
|
101
104
|
>>> import pertpy as pt
|
102
|
-
>>> adata = pt.dt.
|
105
|
+
>>> adata = pt.dt.distance_example()
|
103
106
|
>>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
|
104
107
|
>>> tab = distance_test(adata, groupby="perturbation", contrast="control")
|
105
108
|
"""
|
@@ -121,7 +124,7 @@ class DistanceTest:
|
|
121
124
|
adata: Annotated data matrix.
|
122
125
|
groupby: Key in adata.obs for grouping cells.
|
123
126
|
contrast: Name of the contrast group.
|
124
|
-
show_progressbar: Whether to print progress.
|
127
|
+
show_progressbar: Whether to print progress.
|
125
128
|
|
126
129
|
Returns:
|
127
130
|
pandas.DataFrame: Results of the permutation test, with columns:
|
@@ -133,7 +136,7 @@ class DistanceTest:
|
|
133
136
|
|
134
137
|
Examples:
|
135
138
|
>>> import pertpy as pt
|
136
|
-
>>> adata = pt.dt.
|
139
|
+
>>> adata = pt.dt.distance_example()
|
137
140
|
>>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
|
138
141
|
>>> test_results = distance_test.test_xy(adata, groupby="perturbation", contrast="control")
|
139
142
|
"""
|
@@ -176,7 +179,8 @@ class DistanceTest:
|
|
176
179
|
# Evaluate the test
|
177
180
|
# count times shuffling resulted in larger distance
|
178
181
|
comparison_results = np.array(
|
179
|
-
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
|
182
|
+
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
|
183
|
+
dtype=int,
|
180
184
|
)
|
181
185
|
n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
|
182
186
|
pvalues = n_failures / self.n_perms
|
@@ -213,7 +217,7 @@ class DistanceTest:
|
|
213
217
|
groupby: Key in adata.obs for grouping cells.
|
214
218
|
contrast: Name of the contrast group.
|
215
219
|
cell_wise_metric: Metric to use for pairwise distances.
|
216
|
-
verbose: Whether to print progress.
|
220
|
+
verbose: Whether to print progress.
|
217
221
|
|
218
222
|
Returns:
|
219
223
|
pandas.DataFrame: Results of the permutation test, with columns:
|
@@ -225,7 +229,7 @@ class DistanceTest:
|
|
225
229
|
|
226
230
|
Examples:
|
227
231
|
>>> import pertpy as pt
|
228
|
-
>>> adata = pt.dt.
|
232
|
+
>>> adata = pt.dt.distance_example()
|
229
233
|
>>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
|
230
234
|
>>> test_results = distance_test.test_precomputed(adata, groupby="perturbation", contrast="control")
|
231
235
|
"""
|
@@ -284,7 +288,8 @@ class DistanceTest:
|
|
284
288
|
# Evaluate the test
|
285
289
|
# count times shuffling resulted in larger distance
|
286
290
|
comparison_results = np.array(
|
287
|
-
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
|
291
|
+
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
|
292
|
+
dtype=int,
|
288
293
|
)
|
289
294
|
n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
|
290
295
|
pvalues = n_failures / self.n_perms
|