pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- pertpy/__init__.py +4 -2
- pertpy/data/__init__.py +66 -1
- pertpy/data/_dataloader.py +28 -26
- pertpy/data/_datasets.py +261 -92
- pertpy/metadata/__init__.py +6 -0
- pertpy/metadata/_cell_line.py +795 -0
- pertpy/metadata/_compound.py +128 -0
- pertpy/metadata/_drug.py +238 -0
- pertpy/metadata/_look_up.py +569 -0
- pertpy/metadata/_metadata.py +70 -0
- pertpy/metadata/_moa.py +125 -0
- pertpy/plot/__init__.py +0 -13
- pertpy/preprocessing/__init__.py +2 -0
- pertpy/preprocessing/_guide_rna.py +89 -6
- pertpy/tools/__init__.py +48 -15
- pertpy/tools/_augur.py +329 -32
- pertpy/tools/_cinemaot.py +145 -6
- pertpy/tools/_coda/_base_coda.py +1237 -116
- pertpy/tools/_coda/_sccoda.py +66 -36
- pertpy/tools/_coda/_tasccoda.py +46 -39
- pertpy/tools/_dialogue.py +180 -77
- pertpy/tools/_differential_gene_expression/__init__.py +20 -0
- pertpy/tools/_differential_gene_expression/_base.py +657 -0
- pertpy/tools/_differential_gene_expression/_checks.py +41 -0
- pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
- pertpy/tools/_differential_gene_expression/_edger.py +125 -0
- pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
- pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
- pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
- pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
- pertpy/tools/_distances/_distance_tests.py +29 -24
- pertpy/tools/_distances/_distances.py +584 -98
- pertpy/tools/_enrichment.py +460 -0
- pertpy/tools/_kernel_pca.py +1 -1
- pertpy/tools/_milo.py +406 -49
- pertpy/tools/_mixscape.py +677 -55
- pertpy/tools/_perturbation_space/_clustering.py +10 -3
- pertpy/tools/_perturbation_space/_comparison.py +112 -0
- pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
- pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
- pertpy/tools/_perturbation_space/_simple.py +52 -11
- pertpy/tools/_scgen/__init__.py +1 -1
- pertpy/tools/_scgen/_base_components.py +2 -3
- pertpy/tools/_scgen/_scgen.py +706 -0
- pertpy/tools/_scgen/_utils.py +3 -5
- pertpy/tools/decoupler_LICENSE +674 -0
- {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
- pertpy-0.8.0.dist-info/RECORD +57 -0
- {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
- pertpy/plot/_augur.py +0 -234
- pertpy/plot/_cinemaot.py +0 -81
- pertpy/plot/_coda.py +0 -1001
- pertpy/plot/_dialogue.py +0 -91
- pertpy/plot/_guide_rna.py +0 -82
- pertpy/plot/_milopy.py +0 -284
- pertpy/plot/_mixscape.py +0 -594
- pertpy/plot/_scgen.py +0 -337
- pertpy/tools/_differential_gene_expression.py +0 -99
- pertpy/tools/_metadata/__init__.py +0 -0
- pertpy/tools/_metadata/_cell_line.py +0 -613
- pertpy/tools/_metadata/_look_up.py +0 -342
- pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
- pertpy/tools/_scgen/_jax_scgen.py +0 -370
- pertpy-0.6.0.dist-info/RECORD +0 -50
- /pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
- {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
pertpy/metadata/_moa.py
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
from scanpy import settings
|
9
|
+
|
10
|
+
from pertpy.data._dataloader import _download
|
11
|
+
|
12
|
+
from ._look_up import LookUp
|
13
|
+
from ._metadata import MetaData
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from anndata import AnnData
|
17
|
+
|
18
|
+
|
19
|
+
class Moa(MetaData):
|
20
|
+
"""Utilities to fetch metadata for mechanism of action studies."""
|
21
|
+
|
22
|
+
def __init__(self):
|
23
|
+
self.clue = None
|
24
|
+
|
25
|
+
def _download_clue(self) -> None:
|
26
|
+
clue_path = Path(settings.cachedir) / "repurposing_drugs_20200324.txt"
|
27
|
+
if not Path(clue_path).exists():
|
28
|
+
_download(
|
29
|
+
url="https://s3.amazonaws.com/data.clue.io/repurposing/downloads/repurposing_drugs_20200324.txt",
|
30
|
+
output_file_name="repurposing_drugs_20200324.txt",
|
31
|
+
output_path=settings.cachedir,
|
32
|
+
block_size=4096,
|
33
|
+
is_zip=False,
|
34
|
+
)
|
35
|
+
self.clue = pd.read_csv(clue_path, sep=" ", skiprows=9)
|
36
|
+
self.clue = self.clue[["pert_iname", "moa", "target"]]
|
37
|
+
|
38
|
+
def annotate(
|
39
|
+
self,
|
40
|
+
adata: AnnData,
|
41
|
+
query_id: str = "perturbation",
|
42
|
+
target: str | None = None,
|
43
|
+
verbosity: int | str = 5,
|
44
|
+
copy: bool = False,
|
45
|
+
) -> AnnData:
|
46
|
+
"""Annotate cells affected by perturbations by mechanism of action.
|
47
|
+
|
48
|
+
For each cell, we fetch the mechanism of action and molecular targets of the compounds sourced from clue.io.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
adata: The data object to annotate.
|
52
|
+
query_id: The column of `.obs` with the name of a perturbagen.
|
53
|
+
target: The column of `.obs` with target information. If set to None, all MoAs are retrieved without comparing molecular targets.
|
54
|
+
verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
|
55
|
+
copy: Determines whether a copy of the `adata` is returned.
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
Returns an AnnData object with MoA annotation.
|
59
|
+
"""
|
60
|
+
if copy:
|
61
|
+
adata = adata.copy()
|
62
|
+
|
63
|
+
if query_id not in adata.obs.columns:
|
64
|
+
raise ValueError(f"The requested query_id {query_id} is not in `adata.obs`.\n" "Please check again.")
|
65
|
+
|
66
|
+
if self.clue is None:
|
67
|
+
self._download_clue()
|
68
|
+
|
69
|
+
identifier_num_all = len(adata.obs[query_id].unique())
|
70
|
+
not_matched_identifiers = list(set(adata.obs[query_id].str.lower()) - set(self.clue["pert_iname"].str.lower()))
|
71
|
+
self._warn_unmatch(
|
72
|
+
total_identifiers=identifier_num_all,
|
73
|
+
unmatched_identifiers=not_matched_identifiers,
|
74
|
+
query_id=query_id,
|
75
|
+
reference_id="pert_iname",
|
76
|
+
metadata_type="moa",
|
77
|
+
verbosity=verbosity,
|
78
|
+
)
|
79
|
+
|
80
|
+
adata.obs = (
|
81
|
+
adata.obs.merge(
|
82
|
+
self.clue,
|
83
|
+
left_on=adata.obs[query_id].str.lower(),
|
84
|
+
right_on=self.clue["pert_iname"].str.lower(),
|
85
|
+
how="left",
|
86
|
+
suffixes=("", "_fromMeta"),
|
87
|
+
)
|
88
|
+
.set_index(adata.obs.index)
|
89
|
+
.drop("key_0", axis=1)
|
90
|
+
)
|
91
|
+
|
92
|
+
# If target column is given, check whether it is one of the targets listed in the metadata
|
93
|
+
# If inconsistent, treat this perturbagen as unmatched and overwrite the annotated metadata with NaN
|
94
|
+
if target is not None:
|
95
|
+
target_meta = "target" if target != "target" else "target_fromMeta"
|
96
|
+
adata.obs[target_meta] = adata.obs[target_meta].mask(
|
97
|
+
~adata.obs.apply(lambda row: str(row[target]) in str(row[target_meta]), axis=1)
|
98
|
+
)
|
99
|
+
pertname_meta = "pert_iname" if query_id != "pert_iname" else "pert_iname_fromMeta"
|
100
|
+
adata.obs.loc[adata.obs[target_meta].isna(), [pertname_meta, "moa"]] = np.nan
|
101
|
+
|
102
|
+
# If query_id and reference_id have different names, there will be a column for each of them after merging
|
103
|
+
# which is redundant as they refer to the same information.
|
104
|
+
if query_id != "pert_iname":
|
105
|
+
del adata.obs["pert_iname"]
|
106
|
+
|
107
|
+
return adata
|
108
|
+
|
109
|
+
def lookup(self) -> LookUp:
|
110
|
+
"""Generate LookUp object for Moa metadata.
|
111
|
+
|
112
|
+
The LookUp object provides an overview of the metadata to annotate.
|
113
|
+
annotate_moa function has a corresponding lookup function in the LookUp object,
|
114
|
+
where users can search the query_ids and targets in the metadata.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
Returns a LookUp object specific for MoA annotation.
|
118
|
+
"""
|
119
|
+
if self.clue is None:
|
120
|
+
self._download_clue()
|
121
|
+
|
122
|
+
return LookUp(
|
123
|
+
type="moa",
|
124
|
+
transfer_metadata=[self.clue],
|
125
|
+
)
|
pertpy/plot/__init__.py
CHANGED
@@ -1,13 +0,0 @@
|
|
1
|
-
from pertpy.plot._augur import AugurpyPlot as ag
|
2
|
-
from pertpy.plot._dialogue import DialoguePlot as dl
|
3
|
-
|
4
|
-
try:
|
5
|
-
from pertpy.plot._coda import CodaPlot as coda
|
6
|
-
except ImportError:
|
7
|
-
pass
|
8
|
-
|
9
|
-
from pertpy.plot._cinemaot import CinemaotPlot as cot
|
10
|
-
from pertpy.plot._guide_rna import GuideRnaPlot as guide
|
11
|
-
from pertpy.plot._milopy import MilopyPlot as milo
|
12
|
-
from pertpy.plot._mixscape import MixscapePlot as ms
|
13
|
-
from pertpy.plot._scgen import JaxscgenPlot as scg
|
pertpy/preprocessing/__init__.py
CHANGED
@@ -1,12 +1,16 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import uuid
|
3
4
|
from typing import TYPE_CHECKING
|
4
5
|
|
5
6
|
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
import scanpy as sc
|
6
9
|
import scipy
|
7
10
|
|
8
11
|
if TYPE_CHECKING:
|
9
12
|
from anndata import AnnData
|
13
|
+
from matplotlib.axes import Axes
|
10
14
|
|
11
15
|
|
12
16
|
class GuideAssignment:
|
@@ -30,16 +34,15 @@ class GuideAssignment:
|
|
30
34
|
assignment_threshold: The count threshold that is required for an assignment to be viable.
|
31
35
|
layer: Key to the layer containing raw count values of the gRNAs.
|
32
36
|
adata.X is used if layer is None. Expects count data.
|
33
|
-
output_layer: Assigned guide will be saved on adata.layers[output_key].
|
37
|
+
output_layer: Assigned guide will be saved on adata.layers[output_key].
|
34
38
|
only_return_results: If True, input AnnData is not modified and the result is returned as an np.ndarray.
|
35
|
-
Defaults to False.
|
36
39
|
|
37
40
|
Examples:
|
38
41
|
Each cell is assigned to gRNA that occurs at least 5 times in the respective cell.
|
39
42
|
|
40
43
|
>>> import pertpy as pt
|
41
44
|
>>> mdata = pt.data.papalexi_2021()
|
42
|
-
>>> gdo = mdata.mod[
|
45
|
+
>>> gdo = mdata.mod["gdo"]
|
43
46
|
>>> ga = pt.pp.GuideAssignment()
|
44
47
|
>>> ga.assign_by_threshold(gdo, assignment_threshold=5)
|
45
48
|
"""
|
@@ -71,7 +74,6 @@ class GuideAssignment:
|
|
71
74
|
|
72
75
|
Args:
|
73
76
|
adata: Annotated data matrix containing gRNA values
|
74
|
-
assignment_threshold: If a gRNA is available for at least `assignment_threshold`, it will be recognized as assigned.
|
75
77
|
assignment_threshold: The count threshold that is required for an assignment to be viable.
|
76
78
|
layer: Key to the layer containing raw count values of the gRNAs.
|
77
79
|
adata.X is used if layer is None. Expects count data.
|
@@ -83,8 +85,8 @@ class GuideAssignment:
|
|
83
85
|
Each cell is assigned to the most expressed gRNA if it has at least 5 counts.
|
84
86
|
|
85
87
|
>>> import pertpy as pt
|
86
|
-
>>> mdata = pt.
|
87
|
-
>>> gdo = mdata.mod[
|
88
|
+
>>> mdata = pt.dt.papalexi_2021()
|
89
|
+
>>> gdo = mdata.mod["gdo"]
|
88
90
|
>>> ga = pt.pp.GuideAssignment()
|
89
91
|
>>> ga.assign_to_max_guide(gdo, assignment_threshold=5)
|
90
92
|
"""
|
@@ -103,3 +105,84 @@ class GuideAssignment:
|
|
103
105
|
adata.obs[output_key] = assigned_grna
|
104
106
|
|
105
107
|
return None
|
108
|
+
|
109
|
+
def plot_heatmap(
|
110
|
+
self,
|
111
|
+
adata: AnnData,
|
112
|
+
layer: str | None = None,
|
113
|
+
order_by: np.ndarray | str | None = None,
|
114
|
+
key_to_save_order: str = None,
|
115
|
+
**kwargs,
|
116
|
+
) -> list[Axes]:
|
117
|
+
"""Heatmap plotting of guide RNA expression matrix.
|
118
|
+
|
119
|
+
Assuming guides have sparse expression, this function reorders cells
|
120
|
+
and plots guide RNA expression so that a nice sparse representation is achieved.
|
121
|
+
The cell ordering can be stored and reused in future plots to obtain consistent
|
122
|
+
plots before and after analysis of the guide RNA expression.
|
123
|
+
Note: This function expects a log-normalized or binary data.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
adata: Annotated data matrix containing gRNA values
|
127
|
+
layer: Key to the layer containing log normalized count values of the gRNAs.
|
128
|
+
adata.X is used if layer is None.
|
129
|
+
order_by: The order of cells in y axis.
|
130
|
+
If None, cells will be reordered to have a nice sparse representation.
|
131
|
+
If a string is provided, adata.obs[order_by] will be used as the order.
|
132
|
+
If a numpy array is provided, the array will be used for ordering.
|
133
|
+
key_to_save_order: The obs key to save cell orders in the current plot. Only saves if not None.
|
134
|
+
kwargs: Are passed to sc.pl.heatmap.
|
135
|
+
|
136
|
+
Returns:
|
137
|
+
List of Axes. Alternatively you can pass save or show parameters as they will be passed to sc.pl.heatmap.
|
138
|
+
Order of cells in the y-axis will be saved on adata.obs[key_to_save_order] if provided.
|
139
|
+
|
140
|
+
Examples:
|
141
|
+
Each cell is assigned to gRNA that occurs at least 5 times in the respective cell, which is then
|
142
|
+
visualized using a heatmap.
|
143
|
+
|
144
|
+
>>> import pertpy as pt
|
145
|
+
>>> mdata = pt.dt.papalexi_2021()
|
146
|
+
>>> gdo = mdata.mod["gdo"]
|
147
|
+
>>> ga = pt.pp.GuideAssignment()
|
148
|
+
>>> ga.assign_by_threshold(gdo, assignment_threshold=5)
|
149
|
+
>>> ga.plot_heatmap(gdo)
|
150
|
+
"""
|
151
|
+
data = adata.X if layer is None else adata.layers[layer]
|
152
|
+
|
153
|
+
if order_by is None:
|
154
|
+
if scipy.sparse.issparse(data):
|
155
|
+
max_values = data.max(axis=1).A.squeeze()
|
156
|
+
data_argmax = data.argmax(axis=1).A.squeeze()
|
157
|
+
max_guide_index = np.where(max_values != data.min(axis=1).A.squeeze(), data_argmax, -1)
|
158
|
+
else:
|
159
|
+
max_guide_index = np.where(
|
160
|
+
data.max(axis=1).squeeze() != data.min(axis=1).squeeze(), data.argmax(axis=1).squeeze(), -1
|
161
|
+
)
|
162
|
+
order = np.argsort(max_guide_index)
|
163
|
+
elif isinstance(order_by, str):
|
164
|
+
order = np.argsort(adata.obs[order_by])
|
165
|
+
else:
|
166
|
+
order = order_by
|
167
|
+
|
168
|
+
temp_col_name = f"_tmp_pertpy_grna_plot_{uuid.uuid4()}"
|
169
|
+
adata.obs[temp_col_name] = pd.Categorical(["" for _ in range(adata.shape[0])])
|
170
|
+
|
171
|
+
if key_to_save_order is not None:
|
172
|
+
adata.obs[key_to_save_order] = pd.Categorical(order)
|
173
|
+
|
174
|
+
try:
|
175
|
+
axis_group = sc.pl.heatmap(
|
176
|
+
adata[order, :],
|
177
|
+
var_names=adata.var.index.tolist(),
|
178
|
+
groupby=temp_col_name,
|
179
|
+
cmap="viridis",
|
180
|
+
use_raw=False,
|
181
|
+
dendrogram=False,
|
182
|
+
layer=layer,
|
183
|
+
**kwargs,
|
184
|
+
)
|
185
|
+
finally:
|
186
|
+
del adata.obs[temp_col_name]
|
187
|
+
|
188
|
+
return axis_group
|
pertpy/tools/__init__.py
CHANGED
@@ -1,24 +1,57 @@
|
|
1
|
-
from rich import print
|
2
|
-
|
3
1
|
from pertpy.tools._augur import Augur
|
4
2
|
from pertpy.tools._cinemaot import Cinemaot
|
3
|
+
from pertpy.tools._coda._sccoda import Sccoda
|
4
|
+
from pertpy.tools._coda._tasccoda import Tasccoda
|
5
5
|
from pertpy.tools._dialogue import Dialogue
|
6
|
-
from pertpy.tools._differential_gene_expression import
|
6
|
+
from pertpy.tools._differential_gene_expression import (
|
7
|
+
DGEEVAL,
|
8
|
+
EdgeR,
|
9
|
+
PyDESeq2,
|
10
|
+
Statsmodels,
|
11
|
+
TTest,
|
12
|
+
WilcoxonTest,
|
13
|
+
)
|
7
14
|
from pertpy.tools._distances._distance_tests import DistanceTest
|
8
15
|
from pertpy.tools._distances._distances import Distance
|
9
|
-
from pertpy.tools.
|
16
|
+
from pertpy.tools._enrichment import Enrichment
|
10
17
|
from pertpy.tools._milo import Milo
|
11
18
|
from pertpy.tools._mixscape import Mixscape
|
12
19
|
from pertpy.tools._perturbation_space._clustering import ClusteringSpace
|
13
|
-
from pertpy.tools._perturbation_space.
|
14
|
-
from pertpy.tools._perturbation_space.
|
15
|
-
|
20
|
+
from pertpy.tools._perturbation_space._comparison import PerturbationComparison
|
21
|
+
from pertpy.tools._perturbation_space._discriminator_classifiers import (
|
22
|
+
LRClassifierSpace,
|
23
|
+
MLPClassifierSpace,
|
24
|
+
)
|
25
|
+
from pertpy.tools._perturbation_space._simple import (
|
26
|
+
CentroidSpace,
|
27
|
+
DBSCANSpace,
|
28
|
+
KMeansSpace,
|
29
|
+
PseudobulkSpace,
|
30
|
+
)
|
31
|
+
from pertpy.tools._scgen import Scgen
|
16
32
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
33
|
+
__all__ = [
|
34
|
+
"Augur",
|
35
|
+
"Cinemaot",
|
36
|
+
"Sccoda",
|
37
|
+
"Tasccoda",
|
38
|
+
"Dialogue",
|
39
|
+
"EdgeR",
|
40
|
+
"PyDESeq2",
|
41
|
+
"WilcoxonTest",
|
42
|
+
"TTest",
|
43
|
+
"Statsmodels",
|
44
|
+
"DistanceTest",
|
45
|
+
"Distance",
|
46
|
+
"Enrichment",
|
47
|
+
"Milo",
|
48
|
+
"Mixscape",
|
49
|
+
"ClusteringSpace",
|
50
|
+
"LRClassifierSpace",
|
51
|
+
"MLPClassifierSpace",
|
52
|
+
"CentroidSpace",
|
53
|
+
"DBSCANSpace",
|
54
|
+
"KMeansSpace",
|
55
|
+
"PseudobulkSpace",
|
56
|
+
"Scgen",
|
57
|
+
]
|