pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. pertpy/__init__.py +2 -1
  2. pertpy/data/__init__.py +61 -0
  3. pertpy/data/_dataloader.py +27 -23
  4. pertpy/data/_datasets.py +58 -0
  5. pertpy/metadata/__init__.py +2 -0
  6. pertpy/metadata/_cell_line.py +39 -70
  7. pertpy/metadata/_compound.py +3 -4
  8. pertpy/metadata/_drug.py +2 -6
  9. pertpy/metadata/_look_up.py +38 -51
  10. pertpy/metadata/_metadata.py +7 -10
  11. pertpy/metadata/_moa.py +2 -6
  12. pertpy/plot/__init__.py +0 -5
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +2 -3
  15. pertpy/tools/__init__.py +42 -4
  16. pertpy/tools/_augur.py +14 -15
  17. pertpy/tools/_cinemaot.py +2 -2
  18. pertpy/tools/_coda/_base_coda.py +118 -142
  19. pertpy/tools/_coda/_sccoda.py +16 -15
  20. pertpy/tools/_coda/_tasccoda.py +21 -22
  21. pertpy/tools/_dialogue.py +18 -23
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +21 -16
  32. pertpy/tools/_distances/_distances.py +406 -70
  33. pertpy/tools/_enrichment.py +10 -15
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +76 -53
  36. pertpy/tools/_mixscape.py +15 -11
  37. pertpy/tools/_perturbation_space/_clustering.py +5 -2
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
  41. pertpy/tools/_perturbation_space/_simple.py +3 -3
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +33 -28
  45. pertpy/tools/_scgen/_utils.py +2 -2
  46. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
  47. pertpy-0.8.0.dist-info/RECORD +57 -0
  48. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
  49. pertpy/plot/_augur.py +0 -171
  50. pertpy/plot/_coda.py +0 -601
  51. pertpy/plot/_guide_rna.py +0 -64
  52. pertpy/plot/_milopy.py +0 -209
  53. pertpy/plot/_mixscape.py +0 -355
  54. pertpy/tools/_differential_gene_expression.py +0 -325
  55. pertpy-0.7.0.dist-info/RECORD +0 -53
  56. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,72 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import scanpy as sc
4
+ import statsmodels
5
+ import statsmodels.api as sm
6
+ from tqdm.auto import tqdm
7
+
8
+ from ._base import LinearModelBase
9
+ from ._checks import check_is_integer_matrix
10
+
11
+
12
+ class Statsmodels(LinearModelBase):
13
+ """Differential expression test using a statsmodels linear regression"""
14
+
15
+ def _check_counts(self):
16
+ check_is_integer_matrix(self.data)
17
+
18
+ def fit(
19
+ self,
20
+ regression_model: type[sm.OLS] | type[sm.GLM] = sm.OLS,
21
+ **kwargs,
22
+ ) -> None:
23
+ """Fit the specified regression model.
24
+
25
+ Args:
26
+ regression_model: A statsmodels regression model class, either OLS or GLM.
27
+ **kwargs: Additional arguments for fitting the specific method. In particular, this
28
+ is where you can specify the family for GLM.
29
+
30
+ Examples:
31
+ >>> import statsmodels.api as sm
32
+ >>> import pertpy as pt
33
+ >>> model = pt.tl.Statsmodels(adata, design="~condition")
34
+ >>> model.fit(sm.GLM, family=sm.families.NegativeBinomial(link=sm.families.links.Log()))
35
+ >>> results = model.test_contrasts(np.array([0, 1]))
36
+ """
37
+ self.models = []
38
+ for var in tqdm(self.adata.var_names):
39
+ mod = regression_model(
40
+ sc.get.obs_df(self.adata, keys=[var], layer=self.layer)[var],
41
+ self.design,
42
+ **kwargs,
43
+ )
44
+ mod = mod.fit()
45
+ self.models.append(mod)
46
+
47
+ def _test_single_contrast(self, contrast, **kwargs) -> pd.DataFrame:
48
+ res = []
49
+ for var, mod in zip(tqdm(self.adata.var_names), self.models, strict=False):
50
+ t_test = mod.t_test(contrast)
51
+ res.append(
52
+ {
53
+ "variable": var,
54
+ "p_value": t_test.pvalue,
55
+ "t_value": t_test.tvalue.item(),
56
+ "sd": t_test.sd.item(),
57
+ "log_fc": t_test.effect.item(),
58
+ "adj_p_value": statsmodels.stats.multitest.fdrcorrection(np.array([t_test.pvalue]))[1].item(),
59
+ }
60
+ )
61
+ return pd.DataFrame(res).sort_values("p_value")
62
+
63
+ def contrast(self, column: str, baseline: str, group_to_compare: str) -> np.ndarray:
64
+ """Build a simple contrast for pairwise comparisons.
65
+
66
+ This is equivalent to
67
+
68
+ ```
69
+ model.cond(<column> = baseline) - model.cond(<column> = group_to_compare)
70
+ ```
71
+ """
72
+ return self.cond(**{column: baseline}) - self.cond(**{column: group_to_compare})
@@ -23,20 +23,20 @@ class DistanceTest:
23
23
 
24
24
  Args:
25
25
  metric: Distance metric to use between groups of cells.
26
- n_perms: Number of permutations to run. Defaults to 1000.
26
+ n_perms: Number of permutations to run.
27
27
  layer_key: Name of the counts layer containing raw counts to calculate distances for.
28
28
  Mutually exclusive with 'obsm_key'.
29
- Defaults to None and is then not used.
29
+ If equal to `None` the parameter is ignored.
30
30
  obsm_key: Name of embedding in adata.obsm to use.
31
31
  Mutually exclusive with 'counts_layer_key'.
32
32
  Defaults to None, but is set to "X_pca" if not set explicitly internally.
33
- alpha: Significance level. Defaults to 0.05.
34
- correction: Multiple testing correction method. Defaults to 'holm-sidak'.
35
- cell_wise_metric: Metric to use between single cells. Defaults to "euclidean".
33
+ alpha: Significance level.
34
+ correction: Multiple testing correction method.
35
+ cell_wise_metric: Metric to use between single cells.
36
36
 
37
37
  Examples:
38
38
  >>> import pertpy as pt
39
- >>> adata = pt.dt.distance_example_data()
39
+ >>> adata = pt.dt.distance_example()
40
40
  >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
41
41
  >>> tab = distance_test(adata, groupby="perturbation", contrast="control")
42
42
  """
@@ -66,11 +66,14 @@ class DistanceTest:
66
66
  self.alpha = alpha
67
67
  self.correction = correction
68
68
  self.cell_wise_metric = (
69
- cell_wise_metric if cell_wise_metric else Distance(self.metric, self.obsm_key).cell_wise_metric
69
+ cell_wise_metric if cell_wise_metric else Distance(self.metric, obsm_key=self.obsm_key).cell_wise_metric
70
70
  )
71
71
 
72
72
  self.distance = Distance(
73
- self.metric, layer_key=self.layer_key, obsm_key=self.obsm_key, cell_wise_metric=self.cell_wise_metric
73
+ self.metric,
74
+ layer_key=self.layer_key,
75
+ obsm_key=self.obsm_key,
76
+ cell_wise_metric=self.cell_wise_metric,
74
77
  )
75
78
 
76
79
  def __call__(
@@ -87,7 +90,7 @@ class DistanceTest:
87
90
  adata: Annotated data matrix.
88
91
  groupby: Key in adata.obs for grouping cells.
89
92
  contrast: Name of the contrast group.
90
- show_progressbar: Whether to print progress. Defaults to True.
93
+ show_progressbar: Whether to print progress.
91
94
 
92
95
  Returns:
93
96
  pandas.DataFrame: Results of the permutation test, with columns:
@@ -99,7 +102,7 @@ class DistanceTest:
99
102
 
100
103
  Examples:
101
104
  >>> import pertpy as pt
102
- >>> adata = pt.dt.distance_example_data()
105
+ >>> adata = pt.dt.distance_example()
103
106
  >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
104
107
  >>> tab = distance_test(adata, groupby="perturbation", contrast="control")
105
108
  """
@@ -121,7 +124,7 @@ class DistanceTest:
121
124
  adata: Annotated data matrix.
122
125
  groupby: Key in adata.obs for grouping cells.
123
126
  contrast: Name of the contrast group.
124
- show_progressbar: Whether to print progress. Defaults to True.
127
+ show_progressbar: Whether to print progress.
125
128
 
126
129
  Returns:
127
130
  pandas.DataFrame: Results of the permutation test, with columns:
@@ -133,7 +136,7 @@ class DistanceTest:
133
136
 
134
137
  Examples:
135
138
  >>> import pertpy as pt
136
- >>> adata = pt.dt.distance_example_data()
139
+ >>> adata = pt.dt.distance_example()
137
140
  >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
138
141
  >>> test_results = distance_test.test_xy(adata, groupby="perturbation", contrast="control")
139
142
  """
@@ -176,7 +179,8 @@ class DistanceTest:
176
179
  # Evaluate the test
177
180
  # count times shuffling resulted in larger distance
178
181
  comparison_results = np.array(
179
- pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0, dtype=int
182
+ pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
183
+ dtype=int,
180
184
  )
181
185
  n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
182
186
  pvalues = n_failures / self.n_perms
@@ -213,7 +217,7 @@ class DistanceTest:
213
217
  groupby: Key in adata.obs for grouping cells.
214
218
  contrast: Name of the contrast group.
215
219
  cell_wise_metric: Metric to use for pairwise distances.
216
- verbose: Whether to print progress. Defaults to True.
220
+ verbose: Whether to print progress.
217
221
 
218
222
  Returns:
219
223
  pandas.DataFrame: Results of the permutation test, with columns:
@@ -225,7 +229,7 @@ class DistanceTest:
225
229
 
226
230
  Examples:
227
231
  >>> import pertpy as pt
228
- >>> adata = pt.dt.distance_example_data()
232
+ >>> adata = pt.dt.distance_example()
229
233
  >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
230
234
  >>> test_results = distance_test.test_precomputed(adata, groupby="perturbation", contrast="control")
231
235
  """
@@ -284,7 +288,8 @@ class DistanceTest:
284
288
  # Evaluate the test
285
289
  # count times shuffling resulted in larger distance
286
290
  comparison_results = np.array(
287
- pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0, dtype=int
291
+ pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
292
+ dtype=int,
288
293
  )
289
294
  n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
290
295
  pvalues = n_failures / self.n_perms