pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pertpy/__init__.py +4 -2
- pertpy/data/__init__.py +66 -1
- pertpy/data/_dataloader.py +28 -26
- pertpy/data/_datasets.py +261 -92
- pertpy/metadata/__init__.py +6 -0
- pertpy/metadata/_cell_line.py +795 -0
- pertpy/metadata/_compound.py +128 -0
- pertpy/metadata/_drug.py +238 -0
- pertpy/metadata/_look_up.py +569 -0
- pertpy/metadata/_metadata.py +70 -0
- pertpy/metadata/_moa.py +125 -0
- pertpy/plot/__init__.py +0 -13
- pertpy/preprocessing/__init__.py +2 -0
- pertpy/preprocessing/_guide_rna.py +89 -6
- pertpy/tools/__init__.py +48 -15
- pertpy/tools/_augur.py +329 -32
- pertpy/tools/_cinemaot.py +145 -6
- pertpy/tools/_coda/_base_coda.py +1237 -116
- pertpy/tools/_coda/_sccoda.py +66 -36
- pertpy/tools/_coda/_tasccoda.py +46 -39
- pertpy/tools/_dialogue.py +180 -77
- pertpy/tools/_differential_gene_expression/__init__.py +20 -0
- pertpy/tools/_differential_gene_expression/_base.py +657 -0
- pertpy/tools/_differential_gene_expression/_checks.py +41 -0
- pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
- pertpy/tools/_differential_gene_expression/_edger.py +125 -0
- pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
- pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
- pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
- pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
- pertpy/tools/_distances/_distance_tests.py +29 -24
- pertpy/tools/_distances/_distances.py +584 -98
- pertpy/tools/_enrichment.py +460 -0
- pertpy/tools/_kernel_pca.py +1 -1
- pertpy/tools/_milo.py +406 -49
- pertpy/tools/_mixscape.py +677 -55
- pertpy/tools/_perturbation_space/_clustering.py +10 -3
- pertpy/tools/_perturbation_space/_comparison.py +112 -0
- pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
- pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
- pertpy/tools/_perturbation_space/_simple.py +52 -11
- pertpy/tools/_scgen/__init__.py +1 -1
- pertpy/tools/_scgen/_base_components.py +2 -3
- pertpy/tools/_scgen/_scgen.py +706 -0
- pertpy/tools/_scgen/_utils.py +3 -5
- pertpy/tools/decoupler_LICENSE +674 -0
- {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
- pertpy-0.8.0.dist-info/RECORD +57 -0
- {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
- pertpy/plot/_augur.py +0 -234
- pertpy/plot/_cinemaot.py +0 -81
- pertpy/plot/_coda.py +0 -1001
- pertpy/plot/_dialogue.py +0 -91
- pertpy/plot/_guide_rna.py +0 -82
- pertpy/plot/_milopy.py +0 -284
- pertpy/plot/_mixscape.py +0 -594
- pertpy/plot/_scgen.py +0 -337
- pertpy/tools/_differential_gene_expression.py +0 -99
- pertpy/tools/_metadata/__init__.py +0 -0
- pertpy/tools/_metadata/_cell_line.py +0 -613
- pertpy/tools/_metadata/_look_up.py +0 -342
- pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
- pertpy/tools/_scgen/_jax_scgen.py +0 -370
- pertpy-0.6.0.dist-info/RECORD +0 -50
- /pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
- {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
pertpy/metadata/_moa.py
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
from scanpy import settings
|
9
|
+
|
10
|
+
from pertpy.data._dataloader import _download
|
11
|
+
|
12
|
+
from ._look_up import LookUp
|
13
|
+
from ._metadata import MetaData
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from anndata import AnnData
|
17
|
+
|
18
|
+
|
19
|
+
class Moa(MetaData):
|
20
|
+
"""Utilities to fetch metadata for mechanism of action studies."""
|
21
|
+
|
22
|
+
def __init__(self):
|
23
|
+
self.clue = None
|
24
|
+
|
25
|
+
def _download_clue(self) -> None:
|
26
|
+
clue_path = Path(settings.cachedir) / "repurposing_drugs_20200324.txt"
|
27
|
+
if not Path(clue_path).exists():
|
28
|
+
_download(
|
29
|
+
url="https://s3.amazonaws.com/data.clue.io/repurposing/downloads/repurposing_drugs_20200324.txt",
|
30
|
+
output_file_name="repurposing_drugs_20200324.txt",
|
31
|
+
output_path=settings.cachedir,
|
32
|
+
block_size=4096,
|
33
|
+
is_zip=False,
|
34
|
+
)
|
35
|
+
self.clue = pd.read_csv(clue_path, sep=" ", skiprows=9)
|
36
|
+
self.clue = self.clue[["pert_iname", "moa", "target"]]
|
37
|
+
|
38
|
+
def annotate(
|
39
|
+
self,
|
40
|
+
adata: AnnData,
|
41
|
+
query_id: str = "perturbation",
|
42
|
+
target: str | None = None,
|
43
|
+
verbosity: int | str = 5,
|
44
|
+
copy: bool = False,
|
45
|
+
) -> AnnData:
|
46
|
+
"""Annotate cells affected by perturbations by mechanism of action.
|
47
|
+
|
48
|
+
For each cell, we fetch the mechanism of action and molecular targets of the compounds sourced from clue.io.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
adata: The data object to annotate.
|
52
|
+
query_id: The column of `.obs` with the name of a perturbagen.
|
53
|
+
target: The column of `.obs` with target information. If set to None, all MoAs are retrieved without comparing molecular targets.
|
54
|
+
verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
|
55
|
+
copy: Determines whether a copy of the `adata` is returned.
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
Returns an AnnData object with MoA annotation.
|
59
|
+
"""
|
60
|
+
if copy:
|
61
|
+
adata = adata.copy()
|
62
|
+
|
63
|
+
if query_id not in adata.obs.columns:
|
64
|
+
raise ValueError(f"The requested query_id {query_id} is not in `adata.obs`.\n" "Please check again.")
|
65
|
+
|
66
|
+
if self.clue is None:
|
67
|
+
self._download_clue()
|
68
|
+
|
69
|
+
identifier_num_all = len(adata.obs[query_id].unique())
|
70
|
+
not_matched_identifiers = list(set(adata.obs[query_id].str.lower()) - set(self.clue["pert_iname"].str.lower()))
|
71
|
+
self._warn_unmatch(
|
72
|
+
total_identifiers=identifier_num_all,
|
73
|
+
unmatched_identifiers=not_matched_identifiers,
|
74
|
+
query_id=query_id,
|
75
|
+
reference_id="pert_iname",
|
76
|
+
metadata_type="moa",
|
77
|
+
verbosity=verbosity,
|
78
|
+
)
|
79
|
+
|
80
|
+
adata.obs = (
|
81
|
+
adata.obs.merge(
|
82
|
+
self.clue,
|
83
|
+
left_on=adata.obs[query_id].str.lower(),
|
84
|
+
right_on=self.clue["pert_iname"].str.lower(),
|
85
|
+
how="left",
|
86
|
+
suffixes=("", "_fromMeta"),
|
87
|
+
)
|
88
|
+
.set_index(adata.obs.index)
|
89
|
+
.drop("key_0", axis=1)
|
90
|
+
)
|
91
|
+
|
92
|
+
# If target column is given, check whether it is one of the targets listed in the metadata
|
93
|
+
# If inconsistent, treat this perturbagen as unmatched and overwrite the annotated metadata with NaN
|
94
|
+
if target is not None:
|
95
|
+
target_meta = "target" if target != "target" else "target_fromMeta"
|
96
|
+
adata.obs[target_meta] = adata.obs[target_meta].mask(
|
97
|
+
~adata.obs.apply(lambda row: str(row[target]) in str(row[target_meta]), axis=1)
|
98
|
+
)
|
99
|
+
pertname_meta = "pert_iname" if query_id != "pert_iname" else "pert_iname_fromMeta"
|
100
|
+
adata.obs.loc[adata.obs[target_meta].isna(), [pertname_meta, "moa"]] = np.nan
|
101
|
+
|
102
|
+
# If query_id and reference_id have different names, there will be a column for each of them after merging
|
103
|
+
# which is redundant as they refer to the same information.
|
104
|
+
if query_id != "pert_iname":
|
105
|
+
del adata.obs["pert_iname"]
|
106
|
+
|
107
|
+
return adata
|
108
|
+
|
109
|
+
def lookup(self) -> LookUp:
|
110
|
+
"""Generate LookUp object for Moa metadata.
|
111
|
+
|
112
|
+
The LookUp object provides an overview of the metadata to annotate.
|
113
|
+
annotate_moa function has a corresponding lookup function in the LookUp object,
|
114
|
+
where users can search the query_ids and targets in the metadata.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
Returns a LookUp object specific for MoA annotation.
|
118
|
+
"""
|
119
|
+
if self.clue is None:
|
120
|
+
self._download_clue()
|
121
|
+
|
122
|
+
return LookUp(
|
123
|
+
type="moa",
|
124
|
+
transfer_metadata=[self.clue],
|
125
|
+
)
|
pertpy/plot/__init__.py
CHANGED
@@ -1,13 +0,0 @@
|
|
1
|
-
from pertpy.plot._augur import AugurpyPlot as ag
|
2
|
-
from pertpy.plot._dialogue import DialoguePlot as dl
|
3
|
-
|
4
|
-
try:
|
5
|
-
from pertpy.plot._coda import CodaPlot as coda
|
6
|
-
except ImportError:
|
7
|
-
pass
|
8
|
-
|
9
|
-
from pertpy.plot._cinemaot import CinemaotPlot as cot
|
10
|
-
from pertpy.plot._guide_rna import GuideRnaPlot as guide
|
11
|
-
from pertpy.plot._milopy import MilopyPlot as milo
|
12
|
-
from pertpy.plot._mixscape import MixscapePlot as ms
|
13
|
-
from pertpy.plot._scgen import JaxscgenPlot as scg
|
pertpy/preprocessing/__init__.py
CHANGED
@@ -1,12 +1,16 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import uuid
|
3
4
|
from typing import TYPE_CHECKING
|
4
5
|
|
5
6
|
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
import scanpy as sc
|
6
9
|
import scipy
|
7
10
|
|
8
11
|
if TYPE_CHECKING:
|
9
12
|
from anndata import AnnData
|
13
|
+
from matplotlib.axes import Axes
|
10
14
|
|
11
15
|
|
12
16
|
class GuideAssignment:
|
@@ -30,16 +34,15 @@ class GuideAssignment:
|
|
30
34
|
assignment_threshold: The count threshold that is required for an assignment to be viable.
|
31
35
|
layer: Key to the layer containing raw count values of the gRNAs.
|
32
36
|
adata.X is used if layer is None. Expects count data.
|
33
|
-
output_layer: Assigned guide will be saved on adata.layers[output_key].
|
37
|
+
output_layer: Assigned guide will be saved on adata.layers[output_key].
|
34
38
|
only_return_results: If True, input AnnData is not modified and the result is returned as an np.ndarray.
|
35
|
-
Defaults to False.
|
36
39
|
|
37
40
|
Examples:
|
38
41
|
Each cell is assigned to gRNA that occurs at least 5 times in the respective cell.
|
39
42
|
|
40
43
|
>>> import pertpy as pt
|
41
44
|
>>> mdata = pt.data.papalexi_2021()
|
42
|
-
>>> gdo = mdata.mod[
|
45
|
+
>>> gdo = mdata.mod["gdo"]
|
43
46
|
>>> ga = pt.pp.GuideAssignment()
|
44
47
|
>>> ga.assign_by_threshold(gdo, assignment_threshold=5)
|
45
48
|
"""
|
@@ -71,7 +74,6 @@ class GuideAssignment:
|
|
71
74
|
|
72
75
|
Args:
|
73
76
|
adata: Annotated data matrix containing gRNA values
|
74
|
-
assignment_threshold: If a gRNA is available for at least `assignment_threshold`, it will be recognized as assigned.
|
75
77
|
assignment_threshold: The count threshold that is required for an assignment to be viable.
|
76
78
|
layer: Key to the layer containing raw count values of the gRNAs.
|
77
79
|
adata.X is used if layer is None. Expects count data.
|
@@ -83,8 +85,8 @@ class GuideAssignment:
|
|
83
85
|
Each cell is assigned to the most expressed gRNA if it has at least 5 counts.
|
84
86
|
|
85
87
|
>>> import pertpy as pt
|
86
|
-
>>> mdata = pt.
|
87
|
-
>>> gdo = mdata.mod[
|
88
|
+
>>> mdata = pt.dt.papalexi_2021()
|
89
|
+
>>> gdo = mdata.mod["gdo"]
|
88
90
|
>>> ga = pt.pp.GuideAssignment()
|
89
91
|
>>> ga.assign_to_max_guide(gdo, assignment_threshold=5)
|
90
92
|
"""
|
@@ -103,3 +105,84 @@ class GuideAssignment:
|
|
103
105
|
adata.obs[output_key] = assigned_grna
|
104
106
|
|
105
107
|
return None
|
108
|
+
|
109
|
+
def plot_heatmap(
|
110
|
+
self,
|
111
|
+
adata: AnnData,
|
112
|
+
layer: str | None = None,
|
113
|
+
order_by: np.ndarray | str | None = None,
|
114
|
+
key_to_save_order: str = None,
|
115
|
+
**kwargs,
|
116
|
+
) -> list[Axes]:
|
117
|
+
"""Heatmap plotting of guide RNA expression matrix.
|
118
|
+
|
119
|
+
Assuming guides have sparse expression, this function reorders cells
|
120
|
+
and plots guide RNA expression so that a nice sparse representation is achieved.
|
121
|
+
The cell ordering can be stored and reused in future plots to obtain consistent
|
122
|
+
plots before and after analysis of the guide RNA expression.
|
123
|
+
Note: This function expects a log-normalized or binary data.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
adata: Annotated data matrix containing gRNA values
|
127
|
+
layer: Key to the layer containing log normalized count values of the gRNAs.
|
128
|
+
adata.X is used if layer is None.
|
129
|
+
order_by: The order of cells in y axis.
|
130
|
+
If None, cells will be reordered to have a nice sparse representation.
|
131
|
+
If a string is provided, adata.obs[order_by] will be used as the order.
|
132
|
+
If a numpy array is provided, the array will be used for ordering.
|
133
|
+
key_to_save_order: The obs key to save cell orders in the current plot. Only saves if not None.
|
134
|
+
kwargs: Are passed to sc.pl.heatmap.
|
135
|
+
|
136
|
+
Returns:
|
137
|
+
List of Axes. Alternatively you can pass save or show parameters as they will be passed to sc.pl.heatmap.
|
138
|
+
Order of cells in the y-axis will be saved on adata.obs[key_to_save_order] if provided.
|
139
|
+
|
140
|
+
Examples:
|
141
|
+
Each cell is assigned to gRNA that occurs at least 5 times in the respective cell, which is then
|
142
|
+
visualized using a heatmap.
|
143
|
+
|
144
|
+
>>> import pertpy as pt
|
145
|
+
>>> mdata = pt.dt.papalexi_2021()
|
146
|
+
>>> gdo = mdata.mod["gdo"]
|
147
|
+
>>> ga = pt.pp.GuideAssignment()
|
148
|
+
>>> ga.assign_by_threshold(gdo, assignment_threshold=5)
|
149
|
+
>>> ga.plot_heatmap(gdo)
|
150
|
+
"""
|
151
|
+
data = adata.X if layer is None else adata.layers[layer]
|
152
|
+
|
153
|
+
if order_by is None:
|
154
|
+
if scipy.sparse.issparse(data):
|
155
|
+
max_values = data.max(axis=1).A.squeeze()
|
156
|
+
data_argmax = data.argmax(axis=1).A.squeeze()
|
157
|
+
max_guide_index = np.where(max_values != data.min(axis=1).A.squeeze(), data_argmax, -1)
|
158
|
+
else:
|
159
|
+
max_guide_index = np.where(
|
160
|
+
data.max(axis=1).squeeze() != data.min(axis=1).squeeze(), data.argmax(axis=1).squeeze(), -1
|
161
|
+
)
|
162
|
+
order = np.argsort(max_guide_index)
|
163
|
+
elif isinstance(order_by, str):
|
164
|
+
order = np.argsort(adata.obs[order_by])
|
165
|
+
else:
|
166
|
+
order = order_by
|
167
|
+
|
168
|
+
temp_col_name = f"_tmp_pertpy_grna_plot_{uuid.uuid4()}"
|
169
|
+
adata.obs[temp_col_name] = pd.Categorical(["" for _ in range(adata.shape[0])])
|
170
|
+
|
171
|
+
if key_to_save_order is not None:
|
172
|
+
adata.obs[key_to_save_order] = pd.Categorical(order)
|
173
|
+
|
174
|
+
try:
|
175
|
+
axis_group = sc.pl.heatmap(
|
176
|
+
adata[order, :],
|
177
|
+
var_names=adata.var.index.tolist(),
|
178
|
+
groupby=temp_col_name,
|
179
|
+
cmap="viridis",
|
180
|
+
use_raw=False,
|
181
|
+
dendrogram=False,
|
182
|
+
layer=layer,
|
183
|
+
**kwargs,
|
184
|
+
)
|
185
|
+
finally:
|
186
|
+
del adata.obs[temp_col_name]
|
187
|
+
|
188
|
+
return axis_group
|
pertpy/tools/__init__.py
CHANGED
@@ -1,24 +1,57 @@
|
|
1
|
-
from rich import print
|
2
|
-
|
3
1
|
from pertpy.tools._augur import Augur
|
4
2
|
from pertpy.tools._cinemaot import Cinemaot
|
3
|
+
from pertpy.tools._coda._sccoda import Sccoda
|
4
|
+
from pertpy.tools._coda._tasccoda import Tasccoda
|
5
5
|
from pertpy.tools._dialogue import Dialogue
|
6
|
-
from pertpy.tools._differential_gene_expression import
|
6
|
+
from pertpy.tools._differential_gene_expression import (
|
7
|
+
DGEEVAL,
|
8
|
+
EdgeR,
|
9
|
+
PyDESeq2,
|
10
|
+
Statsmodels,
|
11
|
+
TTest,
|
12
|
+
WilcoxonTest,
|
13
|
+
)
|
7
14
|
from pertpy.tools._distances._distance_tests import DistanceTest
|
8
15
|
from pertpy.tools._distances._distances import Distance
|
9
|
-
from pertpy.tools.
|
16
|
+
from pertpy.tools._enrichment import Enrichment
|
10
17
|
from pertpy.tools._milo import Milo
|
11
18
|
from pertpy.tools._mixscape import Mixscape
|
12
19
|
from pertpy.tools._perturbation_space._clustering import ClusteringSpace
|
13
|
-
from pertpy.tools._perturbation_space.
|
14
|
-
from pertpy.tools._perturbation_space.
|
15
|
-
|
20
|
+
from pertpy.tools._perturbation_space._comparison import PerturbationComparison
|
21
|
+
from pertpy.tools._perturbation_space._discriminator_classifiers import (
|
22
|
+
LRClassifierSpace,
|
23
|
+
MLPClassifierSpace,
|
24
|
+
)
|
25
|
+
from pertpy.tools._perturbation_space._simple import (
|
26
|
+
CentroidSpace,
|
27
|
+
DBSCANSpace,
|
28
|
+
KMeansSpace,
|
29
|
+
PseudobulkSpace,
|
30
|
+
)
|
31
|
+
from pertpy.tools._scgen import Scgen
|
16
32
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
33
|
+
__all__ = [
|
34
|
+
"Augur",
|
35
|
+
"Cinemaot",
|
36
|
+
"Sccoda",
|
37
|
+
"Tasccoda",
|
38
|
+
"Dialogue",
|
39
|
+
"EdgeR",
|
40
|
+
"PyDESeq2",
|
41
|
+
"WilcoxonTest",
|
42
|
+
"TTest",
|
43
|
+
"Statsmodels",
|
44
|
+
"DistanceTest",
|
45
|
+
"Distance",
|
46
|
+
"Enrichment",
|
47
|
+
"Milo",
|
48
|
+
"Mixscape",
|
49
|
+
"ClusteringSpace",
|
50
|
+
"LRClassifierSpace",
|
51
|
+
"MLPClassifierSpace",
|
52
|
+
"CentroidSpace",
|
53
|
+
"DBSCANSpace",
|
54
|
+
"KMeansSpace",
|
55
|
+
"PseudobulkSpace",
|
56
|
+
"Scgen",
|
57
|
+
]
|