pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. pertpy/__init__.py +4 -2
  2. pertpy/data/__init__.py +66 -1
  3. pertpy/data/_dataloader.py +28 -26
  4. pertpy/data/_datasets.py +261 -92
  5. pertpy/metadata/__init__.py +6 -0
  6. pertpy/metadata/_cell_line.py +795 -0
  7. pertpy/metadata/_compound.py +128 -0
  8. pertpy/metadata/_drug.py +238 -0
  9. pertpy/metadata/_look_up.py +569 -0
  10. pertpy/metadata/_metadata.py +70 -0
  11. pertpy/metadata/_moa.py +125 -0
  12. pertpy/plot/__init__.py +0 -13
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +89 -6
  15. pertpy/tools/__init__.py +48 -15
  16. pertpy/tools/_augur.py +329 -32
  17. pertpy/tools/_cinemaot.py +145 -6
  18. pertpy/tools/_coda/_base_coda.py +1237 -116
  19. pertpy/tools/_coda/_sccoda.py +66 -36
  20. pertpy/tools/_coda/_tasccoda.py +46 -39
  21. pertpy/tools/_dialogue.py +180 -77
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +29 -24
  32. pertpy/tools/_distances/_distances.py +584 -98
  33. pertpy/tools/_enrichment.py +460 -0
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +406 -49
  36. pertpy/tools/_mixscape.py +677 -55
  37. pertpy/tools/_perturbation_space/_clustering.py +10 -3
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
  41. pertpy/tools/_perturbation_space/_simple.py +52 -11
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +706 -0
  45. pertpy/tools/_scgen/_utils.py +3 -5
  46. pertpy/tools/decoupler_LICENSE +674 -0
  47. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
  48. pertpy-0.8.0.dist-info/RECORD +57 -0
  49. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
  50. pertpy/plot/_augur.py +0 -234
  51. pertpy/plot/_cinemaot.py +0 -81
  52. pertpy/plot/_coda.py +0 -1001
  53. pertpy/plot/_dialogue.py +0 -91
  54. pertpy/plot/_guide_rna.py +0 -82
  55. pertpy/plot/_milopy.py +0 -284
  56. pertpy/plot/_mixscape.py +0 -594
  57. pertpy/plot/_scgen.py +0 -337
  58. pertpy/tools/_differential_gene_expression.py +0 -99
  59. pertpy/tools/_metadata/__init__.py +0 -0
  60. pertpy/tools/_metadata/_cell_line.py +0 -613
  61. pertpy/tools/_metadata/_look_up.py +0 -342
  62. pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
  63. pertpy/tools/_scgen/_jax_scgen.py +0 -370
  64. pertpy-0.6.0.dist-info/RECORD +0 -50
  65. /pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
  66. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,72 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import scanpy as sc
4
+ import statsmodels
5
+ import statsmodels.api as sm
6
+ from tqdm.auto import tqdm
7
+
8
+ from ._base import LinearModelBase
9
+ from ._checks import check_is_integer_matrix
10
+
11
+
12
+ class Statsmodels(LinearModelBase):
13
+ """Differential expression test using a statsmodels linear regression"""
14
+
15
+ def _check_counts(self):
16
+ check_is_integer_matrix(self.data)
17
+
18
+ def fit(
19
+ self,
20
+ regression_model: type[sm.OLS] | type[sm.GLM] = sm.OLS,
21
+ **kwargs,
22
+ ) -> None:
23
+ """Fit the specified regression model.
24
+
25
+ Args:
26
+ regression_model: A statsmodels regression model class, either OLS or GLM.
27
+ **kwargs: Additional arguments for fitting the specific method. In particular, this
28
+ is where you can specify the family for GLM.
29
+
30
+ Examples:
31
+ >>> import statsmodels.api as sm
32
+ >>> import pertpy as pt
33
+ >>> model = pt.tl.Statsmodels(adata, design="~condition")
34
+ >>> model.fit(sm.GLM, family=sm.families.NegativeBinomial(link=sm.families.links.Log()))
35
+ >>> results = model.test_contrasts(np.array([0, 1]))
36
+ """
37
+ self.models = []
38
+ for var in tqdm(self.adata.var_names):
39
+ mod = regression_model(
40
+ sc.get.obs_df(self.adata, keys=[var], layer=self.layer)[var],
41
+ self.design,
42
+ **kwargs,
43
+ )
44
+ mod = mod.fit()
45
+ self.models.append(mod)
46
+
47
+ def _test_single_contrast(self, contrast, **kwargs) -> pd.DataFrame:
48
+ res = []
49
+ for var, mod in zip(tqdm(self.adata.var_names), self.models, strict=False):
50
+ t_test = mod.t_test(contrast)
51
+ res.append(
52
+ {
53
+ "variable": var,
54
+ "p_value": t_test.pvalue,
55
+ "t_value": t_test.tvalue.item(),
56
+ "sd": t_test.sd.item(),
57
+ "log_fc": t_test.effect.item(),
58
+ "adj_p_value": statsmodels.stats.multitest.fdrcorrection(np.array([t_test.pvalue]))[1].item(),
59
+ }
60
+ )
61
+ return pd.DataFrame(res).sort_values("p_value")
62
+
63
+ def contrast(self, column: str, baseline: str, group_to_compare: str) -> np.ndarray:
64
+ """Build a simple contrast for pairwise comparisons.
65
+
66
+ This is equivalent to
67
+
68
+ ```
69
+ model.cond(<column> = baseline) - model.cond(<column> = group_to_compare)
70
+ ```
71
+ """
72
+ return self.cond(**{column: baseline}) - self.cond(**{column: group_to_compare})
@@ -23,22 +23,22 @@ class DistanceTest:
23
23
 
24
24
  Args:
25
25
  metric: Distance metric to use between groups of cells.
26
- n_perms: Number of permutations to run. Defaults to 1000.
26
+ n_perms: Number of permutations to run.
27
27
  layer_key: Name of the counts layer containing raw counts to calculate distances for.
28
28
  Mutually exclusive with 'obsm_key'.
29
- Defaults to None and is then not used.
29
+ If equal to `None` the parameter is ignored.
30
30
  obsm_key: Name of embedding in adata.obsm to use.
31
31
  Mutually exclusive with 'counts_layer_key'.
32
32
  Defaults to None, but is set to "X_pca" if not set explicitly internally.
33
- alpha: Significance level. Defaults to 0.05.
34
- correction: Multiple testing correction method. Defaults to 'holm-sidak'.
35
- cell_wise_metric: Metric to use between single cells. Defaults to "euclidean".
33
+ alpha: Significance level.
34
+ correction: Multiple testing correction method.
35
+ cell_wise_metric: Metric to use between single cells.
36
36
 
37
37
  Examples:
38
38
  >>> import pertpy as pt
39
- >>> adata = pt.dt.distance_example_data()
40
- >>> distance_test = pt.tl.DistanceTest('edistance', n_perms=1000)
41
- >>> tab = distance_test(adata, groupby='perturbation', contrast='control')
39
+ >>> adata = pt.dt.distance_example()
40
+ >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
41
+ >>> tab = distance_test(adata, groupby="perturbation", contrast="control")
42
42
  """
43
43
 
44
44
  def __init__(
@@ -66,11 +66,14 @@ class DistanceTest:
66
66
  self.alpha = alpha
67
67
  self.correction = correction
68
68
  self.cell_wise_metric = (
69
- cell_wise_metric if cell_wise_metric else Distance(self.metric, self.obsm_key).cell_wise_metric
69
+ cell_wise_metric if cell_wise_metric else Distance(self.metric, obsm_key=self.obsm_key).cell_wise_metric
70
70
  )
71
71
 
72
72
  self.distance = Distance(
73
- self.metric, layer_key=self.layer_key, obsm_key=self.obsm_key, cell_wise_metric=self.cell_wise_metric
73
+ self.metric,
74
+ layer_key=self.layer_key,
75
+ obsm_key=self.obsm_key,
76
+ cell_wise_metric=self.cell_wise_metric,
74
77
  )
75
78
 
76
79
  def __call__(
@@ -87,7 +90,7 @@ class DistanceTest:
87
90
  adata: Annotated data matrix.
88
91
  groupby: Key in adata.obs for grouping cells.
89
92
  contrast: Name of the contrast group.
90
- show_progressbar: Whether to print progress. Defaults to True.
93
+ show_progressbar: Whether to print progress.
91
94
 
92
95
  Returns:
93
96
  pandas.DataFrame: Results of the permutation test, with columns:
@@ -99,9 +102,9 @@ class DistanceTest:
99
102
 
100
103
  Examples:
101
104
  >>> import pertpy as pt
102
- >>> adata = pt.dt.distance_example_data()
103
- >>> distance_test = pt.tl.DistanceTest('edistance', n_perms=1000)
104
- >>> tab = distance_test(adata, groupby='perturbation', contrast='control')
105
+ >>> adata = pt.dt.distance_example()
106
+ >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
107
+ >>> tab = distance_test(adata, groupby="perturbation", contrast="control")
105
108
  """
106
109
  if self.distance.metric_fct.accepts_precomputed:
107
110
  # Much faster if the metric can be called on the precomputed
@@ -121,7 +124,7 @@ class DistanceTest:
121
124
  adata: Annotated data matrix.
122
125
  groupby: Key in adata.obs for grouping cells.
123
126
  contrast: Name of the contrast group.
124
- show_progressbar: Whether to print progress. Defaults to True.
127
+ show_progressbar: Whether to print progress.
125
128
 
126
129
  Returns:
127
130
  pandas.DataFrame: Results of the permutation test, with columns:
@@ -133,9 +136,9 @@ class DistanceTest:
133
136
 
134
137
  Examples:
135
138
  >>> import pertpy as pt
136
- >>> adata = pt.dt.distance_example_data()
137
- >>> distance_test = pt.tl.DistanceTest('edistance', n_perms=1000)
138
- >>> test_results = distance_test.test_xy(adata, groupby='perturbation', contrast='control')
139
+ >>> adata = pt.dt.distance_example()
140
+ >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
141
+ >>> test_results = distance_test.test_xy(adata, groupby="perturbation", contrast="control")
139
142
  """
140
143
  groups = adata.obs[groupby].unique()
141
144
  if contrast not in groups:
@@ -176,7 +179,8 @@ class DistanceTest:
176
179
  # Evaluate the test
177
180
  # count times shuffling resulted in larger distance
178
181
  comparison_results = np.array(
179
- pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0, dtype=int
182
+ pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
183
+ dtype=int,
180
184
  )
181
185
  n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
182
186
  pvalues = n_failures / self.n_perms
@@ -213,7 +217,7 @@ class DistanceTest:
213
217
  groupby: Key in adata.obs for grouping cells.
214
218
  contrast: Name of the contrast group.
215
219
  cell_wise_metric: Metric to use for pairwise distances.
216
- verbose: Whether to print progress. Defaults to True.
220
+ verbose: Whether to print progress.
217
221
 
218
222
  Returns:
219
223
  pandas.DataFrame: Results of the permutation test, with columns:
@@ -225,9 +229,9 @@ class DistanceTest:
225
229
 
226
230
  Examples:
227
231
  >>> import pertpy as pt
228
- >>> adata = pt.dt.distance_example_data()
229
- >>> distance_test = pt.tl.DistanceTest('edistance', n_perms=1000)
230
- >>> test_results = distance_test.test_precomputed(adata, groupby='perturbation', contrast='control')
232
+ >>> adata = pt.dt.distance_example()
233
+ >>> distance_test = pt.tl.DistanceTest("edistance", n_perms=1000)
234
+ >>> test_results = distance_test.test_precomputed(adata, groupby="perturbation", contrast="control")
231
235
  """
232
236
  if not self.distance.metric_fct.accepts_precomputed:
233
237
  raise ValueError(f"Metric {self.metric} does not accept precomputed distances.")
@@ -284,7 +288,8 @@ class DistanceTest:
284
288
  # Evaluate the test
285
289
  # count times shuffling resulted in larger distance
286
290
  comparison_results = np.array(
287
- pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0, dtype=int
291
+ pd.concat([r["distance"] - df["distance"] for r in results], axis=1) > 0,
292
+ dtype=int,
288
293
  )
289
294
  n_failures = pd.Series(np.clip(np.sum(comparison_results, axis=1), 1, np.inf), index=df.index)
290
295
  pvalues = n_failures / self.n_perms