pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pertpy/__init__.py +2 -1
- pertpy/data/__init__.py +61 -0
- pertpy/data/_dataloader.py +27 -23
- pertpy/data/_datasets.py +58 -0
- pertpy/metadata/__init__.py +2 -0
- pertpy/metadata/_cell_line.py +39 -70
- pertpy/metadata/_compound.py +3 -4
- pertpy/metadata/_drug.py +2 -6
- pertpy/metadata/_look_up.py +38 -51
- pertpy/metadata/_metadata.py +7 -10
- pertpy/metadata/_moa.py +2 -6
- pertpy/plot/__init__.py +0 -5
- pertpy/preprocessing/__init__.py +2 -0
- pertpy/preprocessing/_guide_rna.py +2 -3
- pertpy/tools/__init__.py +42 -4
- pertpy/tools/_augur.py +14 -15
- pertpy/tools/_cinemaot.py +2 -2
- pertpy/tools/_coda/_base_coda.py +118 -142
- pertpy/tools/_coda/_sccoda.py +16 -15
- pertpy/tools/_coda/_tasccoda.py +21 -22
- pertpy/tools/_dialogue.py +18 -23
- pertpy/tools/_differential_gene_expression/__init__.py +20 -0
- pertpy/tools/_differential_gene_expression/_base.py +657 -0
- pertpy/tools/_differential_gene_expression/_checks.py +41 -0
- pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
- pertpy/tools/_differential_gene_expression/_edger.py +125 -0
- pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
- pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
- pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
- pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
- pertpy/tools/_distances/_distance_tests.py +21 -16
- pertpy/tools/_distances/_distances.py +406 -70
- pertpy/tools/_enrichment.py +10 -15
- pertpy/tools/_kernel_pca.py +1 -1
- pertpy/tools/_milo.py +76 -53
- pertpy/tools/_mixscape.py +15 -11
- pertpy/tools/_perturbation_space/_clustering.py +5 -2
- pertpy/tools/_perturbation_space/_comparison.py +112 -0
- pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
- pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
- pertpy/tools/_perturbation_space/_simple.py +3 -3
- pertpy/tools/_scgen/__init__.py +1 -1
- pertpy/tools/_scgen/_base_components.py +2 -3
- pertpy/tools/_scgen/_scgen.py +33 -28
- pertpy/tools/_scgen/_utils.py +2 -2
- {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
- pertpy-0.8.0.dist-info/RECORD +57 -0
- {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
- pertpy/plot/_augur.py +0 -171
- pertpy/plot/_coda.py +0 -601
- pertpy/plot/_guide_rna.py +0 -64
- pertpy/plot/_milopy.py +0 -209
- pertpy/plot/_mixscape.py +0 -355
- pertpy/tools/_differential_gene_expression.py +0 -325
- pertpy-0.7.0.dist-info/RECORD +0 -53
- {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,72 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
import scanpy as sc
|
4
|
+
import statsmodels
|
5
|
+
import statsmodels.api as sm
|
6
|
+
from tqdm.auto import tqdm
|
7
|
+
|
8
|
+
from ._base import LinearModelBase
|
9
|
+
from ._checks import check_is_integer_matrix
|
10
|
+
|
11
|
+
|
12
|
+
class Statsmodels(LinearModelBase):
|
13
|
+
"""Differential expression test using a statsmodels linear regression"""
|
14
|
+
|
15
|
+
def _check_counts(self):
|
16
|
+
check_is_integer_matrix(self.data)
|
17
|
+
|
18
|
+
def fit(
|
19
|
+
self,
|
20
|
+
regression_model: type[sm.OLS] | type[sm.GLM] = sm.OLS,
|
21
|
+
**kwargs,
|
22
|
+
) -> None:
|
23
|
+
"""Fit the specified regression model.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
regression_model: A statsmodels regression model class, either OLS or GLM.
|
27
|
+
**kwargs: Additional arguments for fitting the specific method. In particular, this
|
28
|
+
is where you can specify the family for GLM.
|
29
|
+
|
30
|
+
Examples:
|
31
|
+
>>> import statsmodels.api as sm
|
32
|
+
>>> import pertpy as pt
|
33
|
+
>>> model = pt.tl.Statsmodels(adata, design="~condition")
|
34
|
+
>>> model.fit(sm.GLM, family=sm.families.NegativeBinomial(link=sm.families.links.Log()))
|
35
|
+
>>> results = model.test_contrasts(np.array([0, 1]))
|
36
|
+
"""
|
37
|
+
self.models = []
|
38
|
+
for var in tqdm(self.adata.var_names):
|
39
|
+
mod = regression_model(
|
40
|
+
sc.get.obs_df(self.adata, keys=[var], layer=self.layer)[var],
|
41
|
+
self.design,
|
42
|
+
**kwargs,
|
43
|
+
)
|
44
|
+
mod = mod.fit()
|
45
|
+
self.models.append(mod)
|
46
|
+
|
47
|
+
def _test_single_contrast(self, contrast, **kwargs) -> pd.DataFrame:
|
48
|
+
res = []
|
49
|
+
for var, mod in zip(tqdm(self.adata.var_names), self.models, strict=False):
|
50
|
+
t_test = mod.t_test(contrast)
|
51
|
+
res.append(
|
52
|
+
{
|
53
|
+
"variable": var,
|
54
|
+
"p_value": t_test.pvalue,
|
55
|
+
"t_value": t_test.tvalue.item(),
|
56
|
+
"sd": t_test.sd.item(),
|
57
|
+
"log_fc": t_test.effect.item(),
|
58
|
+
"adj_p_value": statsmodels.stats.multitest.fdrcorrection(np.array([t_test.pvalue]))[1].item(),
|
59
|
+
}
|
60
|
+
)
|
61
|
+
return pd.DataFrame(res).sort_values("p_value")
|
62
|
+
|
63
|
+
def contrast(self, column: str, baseline: str, group_to_compare: str) -> np.ndarray:
|
64
|
+
"""Build a simple contrast for pairwise comparisons.
|
65
|
+
|
66
|
+
This is equivalent to
|
67
|
+
|
68
|
+
```
|
69
|
+
model.cond(<column> = baseline) - model.cond(<column> = group_to_compare)
|
70
|
+
```
|
71
|
+
"""
|
72
|
+
return self.cond(**{column: baseline}) - self.cond(**{column: group_to_compare})
|
@@ -23,20 +23,20 @@ class DistanceTest:
|
|
23
23
|
|
24
24
|
Args:
|
25
25
|
metric: Distance metric to use between groups of cells.
|
26
|
-
n_perms: Number of permutations to run.
|
26
|
+
n_perms: Number of permutations to run.
|
27
27
|
layer_key: Name of the counts layer containing raw counts to calculate distances for.
|
28
28
|
Mutually exclusive with 'obsm_key'.
|
29
|
-
|
29
|
+
If equal to `None` the parameter is ignored.
|
30
30
|
obsm_key: Name of embedding in adata.obsm to use.
|
31
31
|
Mutually exclusive with 'counts_layer_key'.
|
32
32
|
Defaults to None, but is set to "X_pca" if not set explicitly internally.
|
33
|
-
alpha: Significance level.
|
34
|
-
correction: Multiple testing correction method.
|
35
|
-
cell_wise_metric: Metric to use between single cells.
|
33
|
+
alpha: Significance level.
|
34
|
+
correction: Multiple testing correction method.
|
35
|
+
cell_wise_metric: Metric to use between single cells.
|
36
36
|
|
37
37
|
Examples:
|
38
38
|
>>> import pertpy as pt
|
39
|
-
>>> adata = pt.dt.
|
39
|
+
>>> adata = pt.dt.distance_example()
|
40
40
|
>>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
|
41
41
|
>>> tab = distance_test(adata, groupby="perturbation", contrast="control")
|
42
42
|
"""
|
@@ -66,11 +66,14 @@ class DistanceTest:
|
|
66
66
|
self.alpha = alpha
|
67
67
|
self.correction = correction
|
68
68
|
self.cell_wise_metric = (
|
69
|
-
cell_wise_metric if cell_wise_metric else Distance(self.metric, self.obsm_key).cell_wise_metric
|
69
|
+
cell_wise_metric if cell_wise_metric else Distance(self.metric, obsm_key=self.obsm_key).cell_wise_metric
|
70
70
|
)
|
71
71
|
|
72
72
|
self.distance = Distance(
|
73
|
-
self.metric,
|
73
|
+
self.metric,
|
74
|
+
layer_key=self.layer_key,
|
75
|
+
obsm_key=self.obsm_key,
|
76
|
+
cell_wise_metric=self.cell_wise_metric,
|
74
77
|
)
|
75
78
|
|
76
79
|
def __call__(
|
@@ -87,7 +90,7 @@ class DistanceTest:
|
|
87
90
|
adata: Annotated data matrix.
|
88
91
|
groupby: Key in adata.obs for grouping cells.
|
89
92
|
contrast: Name of the contrast group.
|
90
|
-
show_progressbar: Whether to print progress.
|
93
|
+
show_progressbar: Whether to print progress.
|
91
94
|
|
92
95
|
Returns:
|
93
96
|
pandas.DataFrame: Results of the permutation test, with columns:
|
@@ -99,7 +102,7 @@ class DistanceTest:
|
|
99
102
|
|
100
103
|
Examples:
|
101
104
|
>>> import pertpy as pt
|
102
|
-
>>> adata = pt.dt.
|
105
|
+
>>> adata = pt.dt.distance_example()
|
103
106
|
>>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
|
104
107
|
>>> tab = distance_test(adata, groupby="perturbation", contrast="control")
|
105
108
|
"""
|
@@ -121,7 +124,7 @@ class DistanceTest:
|
|
121
124
|
adata: Annotated data matrix.
|
122
125
|
groupby: Key in adata.obs for grouping cells.
|
123
126
|
contrast: Name of the contrast group.
|
124
|
-
show_progressbar: Whether to print progress.
|
127
|
+
show_progressbar: Whether to print progress.
|
125
128
|
|
126
129
|
Returns:
|
127
130
|
pandas.DataFrame: Results of the permutation test, with columns:
|
@@ -133,7 +136,7 @@ class DistanceTest:
|
|
133
136
|
|
134
137
|
Examples:
|
135
138
|
>>> import pertpy as pt
|
136
|
-
>>> adata = pt.dt.
|
139
|
+
>>> adata = pt.dt.distance_example()
|
137
140
|
>>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
|
138
141
|
>>> test_results = distance_test.test_xy(adata, groupby="perturbation", contrast="control")
|
139
142
|
"""
|
@@ -176,7 +179,8 @@ class DistanceTest:
|
|
176
179
|
# Evaluate the test
|
177
180
|
# count times shuffling resulted in larger distance
|
178
181
|
comparison_results = np.array(
|
179
|
-
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
|
182
|
+
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
|
183
|
+
dtype=int,
|
180
184
|
)
|
181
185
|
n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
|
182
186
|
pvalues = n_failures / self.n_perms
|
@@ -213,7 +217,7 @@ class DistanceTest:
|
|
213
217
|
groupby: Key in adata.obs for grouping cells.
|
214
218
|
contrast: Name of the contrast group.
|
215
219
|
cell_wise_metric: Metric to use for pairwise distances.
|
216
|
-
verbose: Whether to print progress.
|
220
|
+
verbose: Whether to print progress.
|
217
221
|
|
218
222
|
Returns:
|
219
223
|
pandas.DataFrame: Results of the permutation test, with columns:
|
@@ -225,7 +229,7 @@ class DistanceTest:
|
|
225
229
|
|
226
230
|
Examples:
|
227
231
|
>>> import pertpy as pt
|
228
|
-
>>> adata = pt.dt.
|
232
|
+
>>> adata = pt.dt.distance_example()
|
229
233
|
>>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
|
230
234
|
>>> test_results = distance_test.test_precomputed(adata, groupby="perturbation", contrast="control")
|
231
235
|
"""
|
@@ -284,7 +288,8 @@ class DistanceTest:
|
|
284
288
|
# Evaluate the test
|
285
289
|
# count times shuffling resulted in larger distance
|
286
290
|
comparison_results = np.array(
|
287
|
-
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
|
291
|
+
pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
|
292
|
+
dtype=int,
|
288
293
|
)
|
289
294
|
n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
|
290
295
|
pvalues = n_failures / self.n_perms
|