pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. pertpy/__init__.py +4 -2
  2. pertpy/data/__init__.py +66 -1
  3. pertpy/data/_dataloader.py +28 -26
  4. pertpy/data/_datasets.py +261 -92
  5. pertpy/metadata/__init__.py +6 -0
  6. pertpy/metadata/_cell_line.py +795 -0
  7. pertpy/metadata/_compound.py +128 -0
  8. pertpy/metadata/_drug.py +238 -0
  9. pertpy/metadata/_look_up.py +569 -0
  10. pertpy/metadata/_metadata.py +70 -0
  11. pertpy/metadata/_moa.py +125 -0
  12. pertpy/plot/__init__.py +0 -13
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +89 -6
  15. pertpy/tools/__init__.py +48 -15
  16. pertpy/tools/_augur.py +329 -32
  17. pertpy/tools/_cinemaot.py +145 -6
  18. pertpy/tools/_coda/_base_coda.py +1237 -116
  19. pertpy/tools/_coda/_sccoda.py +66 -36
  20. pertpy/tools/_coda/_tasccoda.py +46 -39
  21. pertpy/tools/_dialogue.py +180 -77
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +29 -24
  32. pertpy/tools/_distances/_distances.py +584 -98
  33. pertpy/tools/_enrichment.py +460 -0
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +406 -49
  36. pertpy/tools/_mixscape.py +677 -55
  37. pertpy/tools/_perturbation_space/_clustering.py +10 -3
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
  41. pertpy/tools/_perturbation_space/_simple.py +52 -11
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +706 -0
  45. pertpy/tools/_scgen/_utils.py +3 -5
  46. pertpy/tools/decoupler_LICENSE +674 -0
  47. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
  48. pertpy-0.8.0.dist-info/RECORD +57 -0
  49. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
  50. pertpy/plot/_augur.py +0 -234
  51. pertpy/plot/_cinemaot.py +0 -81
  52. pertpy/plot/_coda.py +0 -1001
  53. pertpy/plot/_dialogue.py +0 -91
  54. pertpy/plot/_guide_rna.py +0 -82
  55. pertpy/plot/_milopy.py +0 -284
  56. pertpy/plot/_mixscape.py +0 -594
  57. pertpy/plot/_scgen.py +0 -337
  58. pertpy/tools/_differential_gene_expression.py +0 -99
  59. pertpy/tools/_metadata/__init__.py +0 -0
  60. pertpy/tools/_metadata/_cell_line.py +0 -613
  61. pertpy/tools/_metadata/_look_up.py +0 -342
  62. pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
  63. pertpy/tools/_scgen/_jax_scgen.py +0 -370
  64. pertpy-0.6.0.dist-info/RECORD +0 -50
  65. /pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
  66. {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,125 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from scanpy import settings
9
+
10
+ from pertpy.data._dataloader import _download
11
+
12
+ from ._look_up import LookUp
13
+ from ._metadata import MetaData
14
+
15
+ if TYPE_CHECKING:
16
+ from anndata import AnnData
17
+
18
+
19
+ class Moa(MetaData):
20
+ """Utilities to fetch metadata for mechanism of action studies."""
21
+
22
+ def __init__(self):
23
+ self.clue = None
24
+
25
+ def _download_clue(self) -> None:
26
+ clue_path = Path(settings.cachedir) / "repurposing_drugs_20200324.txt"
27
+ if not Path(clue_path).exists():
28
+ _download(
29
+ url="https://s3.amazonaws.com/data.clue.io/repurposing/downloads/repurposing_drugs_20200324.txt",
30
+ output_file_name="repurposing_drugs_20200324.txt",
31
+ output_path=settings.cachedir,
32
+ block_size=4096,
33
+ is_zip=False,
34
+ )
35
+ self.clue = pd.read_csv(clue_path, sep=" ", skiprows=9)
36
+ self.clue = self.clue[["pert_iname", "moa", "target"]]
37
+
38
+ def annotate(
39
+ self,
40
+ adata: AnnData,
41
+ query_id: str = "perturbation",
42
+ target: str | None = None,
43
+ verbosity: int | str = 5,
44
+ copy: bool = False,
45
+ ) -> AnnData:
46
+ """Annotate cells affected by perturbations by mechanism of action.
47
+
48
+ For each cell, we fetch the mechanism of action and molecular targets of the compounds sourced from clue.io.
49
+
50
+ Args:
51
+ adata: The data object to annotate.
52
+ query_id: The column of `.obs` with the name of a perturbagen.
53
+ target: The column of `.obs` with target information. If set to None, all MoAs are retrieved without comparing molecular targets.
54
+ verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
55
+ copy: Determines whether a copy of the `adata` is returned.
56
+
57
+ Returns:
58
+ Returns an AnnData object with MoA annotation.
59
+ """
60
+ if copy:
61
+ adata = adata.copy()
62
+
63
+ if query_id not in adata.obs.columns:
64
+ raise ValueError(f"The requested query_id {query_id} is not in `adata.obs`.\n" "Please check again.")
65
+
66
+ if self.clue is None:
67
+ self._download_clue()
68
+
69
+ identifier_num_all = len(adata.obs[query_id].unique())
70
+ not_matched_identifiers = list(set(adata.obs[query_id].str.lower()) - set(self.clue["pert_iname"].str.lower()))
71
+ self._warn_unmatch(
72
+ total_identifiers=identifier_num_all,
73
+ unmatched_identifiers=not_matched_identifiers,
74
+ query_id=query_id,
75
+ reference_id="pert_iname",
76
+ metadata_type="moa",
77
+ verbosity=verbosity,
78
+ )
79
+
80
+ adata.obs = (
81
+ adata.obs.merge(
82
+ self.clue,
83
+ left_on=adata.obs[query_id].str.lower(),
84
+ right_on=self.clue["pert_iname"].str.lower(),
85
+ how="left",
86
+ suffixes=("", "_fromMeta"),
87
+ )
88
+ .set_index(adata.obs.index)
89
+ .drop("key_0", axis=1)
90
+ )
91
+
92
+ # If target column is given, check whether it is one of the targets listed in the metadata
93
+ # If inconsistent, treat this perturbagen as unmatched and overwrite the annotated metadata with NaN
94
+ if target is not None:
95
+ target_meta = "target" if target != "target" else "target_fromMeta"
96
+ adata.obs[target_meta] = adata.obs[target_meta].mask(
97
+ ~adata.obs.apply(lambda row: str(row[target]) in str(row[target_meta]), axis=1)
98
+ )
99
+ pertname_meta = "pert_iname" if query_id != "pert_iname" else "pert_iname_fromMeta"
100
+ adata.obs.loc[adata.obs[target_meta].isna(), [pertname_meta, "moa"]] = np.nan
101
+
102
+ # If query_id and reference_id have different names, there will be a column for each of them after merging
103
+ # which is redundant as they refer to the same information.
104
+ if query_id != "pert_iname":
105
+ del adata.obs["pert_iname"]
106
+
107
+ return adata
108
+
109
+ def lookup(self) -> LookUp:
110
+ """Generate LookUp object for Moa metadata.
111
+
112
+ The LookUp object provides an overview of the metadata to annotate.
113
+ annotate_moa function has a corresponding lookup function in the LookUp object,
114
+ where users can search the query_ids and targets in the metadata.
115
+
116
+ Returns:
117
+ Returns a LookUp object specific for MoA annotation.
118
+ """
119
+ if self.clue is None:
120
+ self._download_clue()
121
+
122
+ return LookUp(
123
+ type="moa",
124
+ transfer_metadata=[self.clue],
125
+ )
pertpy/plot/__init__.py CHANGED
@@ -1,13 +0,0 @@
1
- from pertpy.plot._augur import AugurpyPlot as ag
2
- from pertpy.plot._dialogue import DialoguePlot as dl
3
-
4
- try:
5
- from pertpy.plot._coda import CodaPlot as coda
6
- except ImportError:
7
- pass
8
-
9
- from pertpy.plot._cinemaot import CinemaotPlot as cot
10
- from pertpy.plot._guide_rna import GuideRnaPlot as guide
11
- from pertpy.plot._milopy import MilopyPlot as milo
12
- from pertpy.plot._mixscape import MixscapePlot as ms
13
- from pertpy.plot._scgen import JaxscgenPlot as scg
@@ -1 +1,3 @@
1
1
  from ._guide_rna import GuideAssignment
2
+
3
+ __all__ = ["GuideAssignment"]
@@ -1,12 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import uuid
3
4
  from typing import TYPE_CHECKING
4
5
 
5
6
  import numpy as np
7
+ import pandas as pd
8
+ import scanpy as sc
6
9
  import scipy
7
10
 
8
11
  if TYPE_CHECKING:
9
12
  from anndata import AnnData
13
+ from matplotlib.axes import Axes
10
14
 
11
15
 
12
16
  class GuideAssignment:
@@ -30,16 +34,15 @@ class GuideAssignment:
30
34
  assignment_threshold: The count threshold that is required for an assignment to be viable.
31
35
  layer: Key to the layer containing raw count values of the gRNAs.
32
36
  adata.X is used if layer is None. Expects count data.
33
- output_layer: Assigned guide will be saved on adata.layers[output_key]. Defaults to `assigned_guides`.
37
+ output_layer: Assigned guide will be saved on adata.layers[output_key].
34
38
  only_return_results: If True, input AnnData is not modified and the result is returned as an np.ndarray.
35
- Defaults to False.
36
39
 
37
40
  Examples:
38
41
  Each cell is assigned to gRNA that occurs at least 5 times in the respective cell.
39
42
 
40
43
  >>> import pertpy as pt
41
44
  >>> mdata = pt.data.papalexi_2021()
42
- >>> gdo = mdata.mod['gdo']
45
+ >>> gdo = mdata.mod["gdo"]
43
46
  >>> ga = pt.pp.GuideAssignment()
44
47
  >>> ga.assign_by_threshold(gdo, assignment_threshold=5)
45
48
  """
@@ -71,7 +74,6 @@ class GuideAssignment:
71
74
 
72
75
  Args:
73
76
  adata: Annotated data matrix containing gRNA values
74
- assignment_threshold: If a gRNA is available for at least `assignment_threshold`, it will be recognized as assigned.
75
77
  assignment_threshold: The count threshold that is required for an assignment to be viable.
76
78
  layer: Key to the layer containing raw count values of the gRNAs.
77
79
  adata.X is used if layer is None. Expects count data.
@@ -83,8 +85,8 @@ class GuideAssignment:
83
85
  Each cell is assigned to the most expressed gRNA if it has at least 5 counts.
84
86
 
85
87
  >>> import pertpy as pt
86
- >>> mdata = pt.data.papalexi_2021()
87
- >>> gdo = mdata.mod['gdo']
88
+ >>> mdata = pt.dt.papalexi_2021()
89
+ >>> gdo = mdata.mod["gdo"]
88
90
  >>> ga = pt.pp.GuideAssignment()
89
91
  >>> ga.assign_to_max_guide(gdo, assignment_threshold=5)
90
92
  """
@@ -103,3 +105,84 @@ class GuideAssignment:
103
105
  adata.obs[output_key] = assigned_grna
104
106
 
105
107
  return None
108
+
109
+ def plot_heatmap(
110
+ self,
111
+ adata: AnnData,
112
+ layer: str | None = None,
113
+ order_by: np.ndarray | str | None = None,
114
+ key_to_save_order: str = None,
115
+ **kwargs,
116
+ ) -> list[Axes]:
117
+ """Heatmap plotting of guide RNA expression matrix.
118
+
119
+ Assuming guides have sparse expression, this function reorders cells
120
+ and plots guide RNA expression so that a nice sparse representation is achieved.
121
+ The cell ordering can be stored and reused in future plots to obtain consistent
122
+ plots before and after analysis of the guide RNA expression.
123
+ Note: This function expects a log-normalized or binary data.
124
+
125
+ Args:
126
+ adata: Annotated data matrix containing gRNA values
127
+ layer: Key to the layer containing log normalized count values of the gRNAs.
128
+ adata.X is used if layer is None.
129
+ order_by: The order of cells in y axis.
130
+ If None, cells will be reordered to have a nice sparse representation.
131
+ If a string is provided, adata.obs[order_by] will be used as the order.
132
+ If a numpy array is provided, the array will be used for ordering.
133
+ key_to_save_order: The obs key to save cell orders in the current plot. Only saves if not None.
134
+ kwargs: Are passed to sc.pl.heatmap.
135
+
136
+ Returns:
137
+ List of Axes. Alternatively you can pass save or show parameters as they will be passed to sc.pl.heatmap.
138
+ Order of cells in the y-axis will be saved on adata.obs[key_to_save_order] if provided.
139
+
140
+ Examples:
141
+ Each cell is assigned to gRNA that occurs at least 5 times in the respective cell, which is then
142
+ visualized using a heatmap.
143
+
144
+ >>> import pertpy as pt
145
+ >>> mdata = pt.dt.papalexi_2021()
146
+ >>> gdo = mdata.mod["gdo"]
147
+ >>> ga = pt.pp.GuideAssignment()
148
+ >>> ga.assign_by_threshold(gdo, assignment_threshold=5)
149
+ >>> ga.plot_heatmap(gdo)
150
+ """
151
+ data = adata.X if layer is None else adata.layers[layer]
152
+
153
+ if order_by is None:
154
+ if scipy.sparse.issparse(data):
155
+ max_values = data.max(axis=1).A.squeeze()
156
+ data_argmax = data.argmax(axis=1).A.squeeze()
157
+ max_guide_index = np.where(max_values != data.min(axis=1).A.squeeze(), data_argmax, -1)
158
+ else:
159
+ max_guide_index = np.where(
160
+ data.max(axis=1).squeeze() != data.min(axis=1).squeeze(), data.argmax(axis=1).squeeze(), -1
161
+ )
162
+ order = np.argsort(max_guide_index)
163
+ elif isinstance(order_by, str):
164
+ order = np.argsort(adata.obs[order_by])
165
+ else:
166
+ order = order_by
167
+
168
+ temp_col_name = f"_tmp_pertpy_grna_plot_{uuid.uuid4()}"
169
+ adata.obs[temp_col_name] = pd.Categorical(["" for _ in range(adata.shape[0])])
170
+
171
+ if key_to_save_order is not None:
172
+ adata.obs[key_to_save_order] = pd.Categorical(order)
173
+
174
+ try:
175
+ axis_group = sc.pl.heatmap(
176
+ adata[order, :],
177
+ var_names=adata.var.index.tolist(),
178
+ groupby=temp_col_name,
179
+ cmap="viridis",
180
+ use_raw=False,
181
+ dendrogram=False,
182
+ layer=layer,
183
+ **kwargs,
184
+ )
185
+ finally:
186
+ del adata.obs[temp_col_name]
187
+
188
+ return axis_group
pertpy/tools/__init__.py CHANGED
@@ -1,24 +1,57 @@
1
- from rich import print
2
-
3
1
  from pertpy.tools._augur import Augur
4
2
  from pertpy.tools._cinemaot import Cinemaot
3
+ from pertpy.tools._coda._sccoda import Sccoda
4
+ from pertpy.tools._coda._tasccoda import Tasccoda
5
5
  from pertpy.tools._dialogue import Dialogue
6
- from pertpy.tools._differential_gene_expression import DifferentialGeneExpression
6
+ from pertpy.tools._differential_gene_expression import (
7
+ DGEEVAL,
8
+ EdgeR,
9
+ PyDESeq2,
10
+ Statsmodels,
11
+ TTest,
12
+ WilcoxonTest,
13
+ )
7
14
  from pertpy.tools._distances._distance_tests import DistanceTest
8
15
  from pertpy.tools._distances._distances import Distance
9
- from pertpy.tools._metadata._cell_line import CellLineMetaData
16
+ from pertpy.tools._enrichment import Enrichment
10
17
  from pertpy.tools._milo import Milo
11
18
  from pertpy.tools._mixscape import Mixscape
12
19
  from pertpy.tools._perturbation_space._clustering import ClusteringSpace
13
- from pertpy.tools._perturbation_space._discriminator_classifier import DiscriminatorClassifierSpace
14
- from pertpy.tools._perturbation_space._simple import CentroidSpace, DBSCANSpace, KMeansSpace, PseudobulkSpace
15
- from pertpy.tools._scgen import SCGEN
20
+ from pertpy.tools._perturbation_space._comparison import PerturbationComparison
21
+ from pertpy.tools._perturbation_space._discriminator_classifiers import (
22
+ LRClassifierSpace,
23
+ MLPClassifierSpace,
24
+ )
25
+ from pertpy.tools._perturbation_space._simple import (
26
+ CentroidSpace,
27
+ DBSCANSpace,
28
+ KMeansSpace,
29
+ PseudobulkSpace,
30
+ )
31
+ from pertpy.tools._scgen import Scgen
16
32
 
17
- try:
18
- from pertpy.tools._coda._sccoda import Sccoda
19
- from pertpy.tools._coda._tasccoda import Tasccoda
20
- except ImportError as e:
21
- if "ete3" in str(e):
22
- print("[bold yellow]To use sccoda or tasccoda please install ete3 with [green]pip install ete3")
23
- else:
24
- raise e
33
+ __all__ = [
34
+ "Augur",
35
+ "Cinemaot",
36
+ "Sccoda",
37
+ "Tasccoda",
38
+ "Dialogue",
39
+ "EdgeR",
40
+ "PyDESeq2",
41
+ "WilcoxonTest",
42
+ "TTest",
43
+ "Statsmodels",
44
+ "DistanceTest",
45
+ "Distance",
46
+ "Enrichment",
47
+ "Milo",
48
+ "Mixscape",
49
+ "ClusteringSpace",
50
+ "LRClassifierSpace",
51
+ "MLPClassifierSpace",
52
+ "CentroidSpace",
53
+ "DBSCANSpace",
54
+ "KMeansSpace",
55
+ "PseudobulkSpace",
56
+ "Scgen",
57
+ ]