pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. pertpy/__init__.py +2 -1
  2. pertpy/data/__init__.py +61 -0
  3. pertpy/data/_dataloader.py +27 -23
  4. pertpy/data/_datasets.py +58 -0
  5. pertpy/metadata/__init__.py +2 -0
  6. pertpy/metadata/_cell_line.py +39 -70
  7. pertpy/metadata/_compound.py +3 -4
  8. pertpy/metadata/_drug.py +2 -6
  9. pertpy/metadata/_look_up.py +38 -51
  10. pertpy/metadata/_metadata.py +7 -10
  11. pertpy/metadata/_moa.py +2 -6
  12. pertpy/plot/__init__.py +0 -5
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +2 -3
  15. pertpy/tools/__init__.py +42 -4
  16. pertpy/tools/_augur.py +14 -15
  17. pertpy/tools/_cinemaot.py +2 -2
  18. pertpy/tools/_coda/_base_coda.py +118 -142
  19. pertpy/tools/_coda/_sccoda.py +16 -15
  20. pertpy/tools/_coda/_tasccoda.py +21 -22
  21. pertpy/tools/_dialogue.py +18 -23
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +21 -16
  32. pertpy/tools/_distances/_distances.py +406 -70
  33. pertpy/tools/_enrichment.py +10 -15
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +76 -53
  36. pertpy/tools/_mixscape.py +15 -11
  37. pertpy/tools/_perturbation_space/_clustering.py +5 -2
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
  41. pertpy/tools/_perturbation_space/_simple.py +3 -3
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +33 -28
  45. pertpy/tools/_scgen/_utils.py +2 -2
  46. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
  47. pertpy-0.8.0.dist-info/RECORD +57 -0
  48. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
  49. pertpy/plot/_augur.py +0 -171
  50. pertpy/plot/_coda.py +0 -601
  51. pertpy/plot/_guide_rna.py +0 -64
  52. pertpy/plot/_milopy.py +0 -209
  53. pertpy/plot/_mixscape.py +0 -355
  54. pertpy/tools/_differential_gene_expression.py +0 -325
  55. pertpy-0.7.0.dist-info/RECORD +0 -53
  56. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,72 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import scanpy as sc
4
+ import statsmodels
5
+ import statsmodels.api as sm
6
+ from tqdm.auto import tqdm
7
+
8
+ from ._base import LinearModelBase
9
+ from ._checks import check_is_integer_matrix
10
+
11
+
12
+ class Statsmodels(LinearModelBase):
13
+ """Differential expression test using a statsmodels linear regression"""
14
+
15
+ def _check_counts(self):
16
+ check_is_integer_matrix(self.data)
17
+
18
+ def fit(
19
+ self,
20
+ regression_model: type[sm.OLS] | type[sm.GLM] = sm.OLS,
21
+ **kwargs,
22
+ ) -> None:
23
+ """Fit the specified regression model.
24
+
25
+ Args:
26
+ regression_model: A statsmodels regression model class, either OLS or GLM.
27
+ **kwargs: Additional arguments for fitting the specific method. In particular, this
28
+ is where you can specify the family for GLM.
29
+
30
+ Examples:
31
+ >>> import statsmodels.api as sm
32
+ >>> import pertpy as pt
33
+ >>> model = pt.tl.Statsmodels(adata, design="~condition")
34
+ >>> model.fit(sm.GLM, family=sm.families.NegativeBinomial(link=sm.families.links.Log()))
35
+ >>> results = model.test_contrasts(np.array([0, 1]))
36
+ """
37
+ self.models = []
38
+ for var in tqdm(self.adata.var_names):
39
+ mod = regression_model(
40
+ sc.get.obs_df(self.adata, keys=[var], layer=self.layer)[var],
41
+ self.design,
42
+ **kwargs,
43
+ )
44
+ mod = mod.fit()
45
+ self.models.append(mod)
46
+
47
+ def _test_single_contrast(self, contrast, **kwargs) -> pd.DataFrame:
48
+ res = []
49
+ for var, mod in zip(tqdm(self.adata.var_names), self.models, strict=False):
50
+ t_test = mod.t_test(contrast)
51
+ res.append(
52
+ {
53
+ "variable": var,
54
+ "p_value": t_test.pvalue,
55
+ "t_value": t_test.tvalue.item(),
56
+ "sd": t_test.sd.item(),
57
+ "log_fc": t_test.effect.item(),
58
+ "adj_p_value": statsmodels.stats.multitest.fdrcorrection(np.array([t_test.pvalue]))[1].item(),
59
+ }
60
+ )
61
+ return pd.DataFrame(res).sort_values("p_value")
62
+
63
+ def contrast(self, column: str, baseline: str, group_to_compare: str) -> np.ndarray:
64
+ """Build a simple contrast for pairwise comparisons.
65
+
66
+ This is equivalent to
67
+
68
+ ```
69
+ model.cond(<column> = baseline) - model.cond(<column> = group_to_compare)
70
+ ```
71
+ """
72
+ return self.cond(**{column: baseline}) - self.cond(**{column: group_to_compare})
@@ -23,20 +23,20 @@ class DistanceTest:
23
23
 
24
24
  Args:
25
25
  metric: Distance metric to use between groups of cells.
26
- n_perms: Number of permutations to run. Defaults to 1000.
26
+ n_perms: Number of permutations to run.
27
27
  layer_key: Name of the counts layer containing raw counts to calculate distances for.
28
28
  Mutually exclusive with 'obsm_key'.
29
- Defaults to None and is then not used.
29
+ If equal to `None` the parameter is ignored.
30
30
  obsm_key: Name of embedding in adata.obsm to use.
31
31
  Mutually exclusive with 'counts_layer_key'.
32
32
  Defaults to None, but is set to "X_pca" if not set explicitly internally.
33
- alpha: Significance level. Defaults to 0.05.
34
- correction: Multiple testing correction method. Defaults to 'holm-sidak'.
35
- cell_wise_metric: Metric to use between single cells. Defaults to "euclidean".
33
+ alpha: Significance level.
34
+ correction: Multiple testing correction method.
35
+ cell_wise_metric: Metric to use between single cells.
36
36
 
37
37
  Examples:
38
38
  >>> import pertpy as pt
39
- >>> adata = pt.dt.distance_example_data()
39
+ >>> adata = pt.dt.distance_example()
40
40
  >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
41
41
  >>> tab = distance_test(adata, groupby="perturbation", contrast="control")
42
42
  """
@@ -66,11 +66,14 @@ class DistanceTest:
66
66
  self.alpha = alpha
67
67
  self.correction = correction
68
68
  self.cell_wise_metric = (
69
- cell_wise_metric if cell_wise_metric else Distance(self.metric, self.obsm_key).cell_wise_metric
69
+ cell_wise_metric if cell_wise_metric else Distance(self.metric, obsm_key=self.obsm_key).cell_wise_metric
70
70
  )
71
71
 
72
72
  self.distance = Distance(
73
- self.metric, layer_key=self.layer_key, obsm_key=self.obsm_key, cell_wise_metric=self.cell_wise_metric
73
+ self.metric,
74
+ layer_key=self.layer_key,
75
+ obsm_key=self.obsm_key,
76
+ cell_wise_metric=self.cell_wise_metric,
74
77
  )
75
78
 
76
79
  def __call__(
@@ -87,7 +90,7 @@ class DistanceTest:
87
90
  adata: Annotated data matrix.
88
91
  groupby: Key in adata.obs for grouping cells.
89
92
  contrast: Name of the contrast group.
90
- show_progressbar: Whether to print progress. Defaults to True.
93
+ show_progressbar: Whether to print progress.
91
94
 
92
95
  Returns:
93
96
  pandas.DataFrame: Results of the permutation test, with columns:
@@ -99,7 +102,7 @@ class DistanceTest:
99
102
 
100
103
  Examples:
101
104
  >>> import pertpy as pt
102
- >>> adata = pt.dt.distance_example_data()
105
+ >>> adata = pt.dt.distance_example()
103
106
  >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
104
107
  >>> tab = distance_test(adata, groupby="perturbation", contrast="control")
105
108
  """
@@ -121,7 +124,7 @@ class DistanceTest:
121
124
  adata: Annotated data matrix.
122
125
  groupby: Key in adata.obs for grouping cells.
123
126
  contrast: Name of the contrast group.
124
- show_progressbar: Whether to print progress. Defaults to True.
127
+ show_progressbar: Whether to print progress.
125
128
 
126
129
  Returns:
127
130
  pandas.DataFrame: Results of the permutation test, with columns:
@@ -133,7 +136,7 @@ class DistanceTest:
133
136
 
134
137
  Examples:
135
138
  >>> import pertpy as pt
136
- >>> adata = pt.dt.distance_example_data()
139
+ >>> adata = pt.dt.distance_example()
137
140
  >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
138
141
  >>> test_results = distance_test.test_xy(adata, groupby="perturbation", contrast="control")
139
142
  """
@@ -176,7 +179,8 @@ class DistanceTest:
176
179
  # Evaluate the test
177
180
  # count times shuffling resulted in larger distance
178
181
  comparison_results = np.array(
179
- pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0, dtype=int
182
+ pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
183
+ dtype=int,
180
184
  )
181
185
  n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
182
186
  pvalues = n_failures / self.n_perms
@@ -213,7 +217,7 @@ class DistanceTest:
213
217
  groupby: Key in adata.obs for grouping cells.
214
218
  contrast: Name of the contrast group.
215
219
  cell_wise_metric: Metric to use for pairwise distances.
216
- verbose: Whether to print progress. Defaults to True.
220
+ verbose: Whether to print progress.
217
221
 
218
222
  Returns:
219
223
  pandas.DataFrame: Results of the permutation test, with columns:
@@ -225,7 +229,7 @@ class DistanceTest:
225
229
 
226
230
  Examples:
227
231
  >>> import pertpy as pt
228
- >>> adata = pt.dt.distance_example_data()
232
+ >>> adata = pt.dt.distance_example()
229
233
  >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
230
234
  >>> test_results = distance_test.test_precomputed(adata, groupby="perturbation", contrast="control")
231
235
  """
@@ -284,7 +288,8 @@ class DistanceTest:
284
288
  # Evaluate the test
285
289
  # count times shuffling resulted in larger distance
286
290
  comparison_results = np.array(
287
- pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0, dtype=int
291
+ pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
292
+ dtype=int,
288
293
  )
289
294
  n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
290
295
  pvalues = n_failures / self.n_perms