pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pertpy/__init__.py +4 -2
- pertpy/data/__init__.py +66 -1
- pertpy/data/_dataloader.py +28 -26
- pertpy/data/_datasets.py +261 -92
- pertpy/metadata/__init__.py +6 -0
- pertpy/metadata/_cell_line.py +795 -0
- pertpy/metadata/_compound.py +128 -0
- pertpy/metadata/_drug.py +238 -0
- pertpy/metadata/_look_up.py +569 -0
- pertpy/metadata/_metadata.py +70 -0
- pertpy/metadata/_moa.py +125 -0
- pertpy/plot/__init__.py +0 -13
- pertpy/preprocessing/__init__.py +2 -0
- pertpy/preprocessing/_guide_rna.py +89 -6
- pertpy/tools/__init__.py +48 -15
- pertpy/tools/_augur.py +329 -32
- pertpy/tools/_cinemaot.py +145 -6
- pertpy/tools/_coda/_base_coda.py +1237 -116
- pertpy/tools/_coda/_sccoda.py +66 -36
- pertpy/tools/_coda/_tasccoda.py +46 -39
- pertpy/tools/_dialogue.py +180 -77
- pertpy/tools/_differential_gene_expression/__init__.py +20 -0
- pertpy/tools/_differential_gene_expression/_base.py +657 -0
- pertpy/tools/_differential_gene_expression/_checks.py +41 -0
- pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
- pertpy/tools/_differential_gene_expression/_edger.py +125 -0
- pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
- pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
- pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
- pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
- pertpy/tools/_distances/_distance_tests.py +29 -24
- pertpy/tools/_distances/_distances.py +584 -98
- pertpy/tools/_enrichment.py +460 -0
- pertpy/tools/_kernel_pca.py +1 -1
- pertpy/tools/_milo.py +406 -49
- pertpy/tools/_mixscape.py +677 -55
- pertpy/tools/_perturbation_space/_clustering.py +10 -3
- pertpy/tools/_perturbation_space/_comparison.py +112 -0
- pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
- pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
- pertpy/tools/_perturbation_space/_simple.py +52 -11
- pertpy/tools/_scgen/__init__.py +1 -1
- pertpy/tools/_scgen/_base_components.py +2 -3
- pertpy/tools/_scgen/_scgen.py +706 -0
- pertpy/tools/_scgen/_utils.py +3 -5
- pertpy/tools/decoupler_LICENSE +674 -0
- {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
- pertpy-0.8.0.dist-info/RECORD +57 -0
- {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
- pertpy/plot/_augur.py +0 -234
- pertpy/plot/_cinemaot.py +0 -81
- pertpy/plot/_coda.py +0 -1001
- pertpy/plot/_dialogue.py +0 -91
- pertpy/plot/_guide_rna.py +0 -82
- pertpy/plot/_milopy.py +0 -284
- pertpy/plot/_mixscape.py +0 -594
- pertpy/plot/_scgen.py +0 -337
- pertpy/tools/_differential_gene_expression.py +0 -99
- pertpy/tools/_metadata/__init__.py +0 -0
- pertpy/tools/_metadata/_cell_line.py +0 -613
- pertpy/tools/_metadata/_look_up.py +0 -342
- pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
- pertpy/tools/_scgen/_jax_scgen.py +0 -370
- pertpy-0.6.0.dist-info/RECORD +0 -50
- /pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
- {pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
pertpy/tools/_milo.py
CHANGED
@@ -3,21 +3,24 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import random
|
5
5
|
import re
|
6
|
-
from typing import Literal
|
6
|
+
from typing import TYPE_CHECKING, Literal
|
7
7
|
|
8
|
+
import matplotlib.pyplot as plt
|
8
9
|
import numpy as np
|
9
10
|
import pandas as pd
|
11
|
+
import scanpy as sc
|
12
|
+
import seaborn as sns
|
10
13
|
from anndata import AnnData
|
14
|
+
from lamin_utils import logger
|
11
15
|
from mudata import MuData
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
from
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
)
|
16
|
+
|
17
|
+
if TYPE_CHECKING:
|
18
|
+
from collections.abc import Sequence
|
19
|
+
|
20
|
+
from matplotlib.axes import Axes
|
21
|
+
from matplotlib.colors import Colormap
|
22
|
+
from matplotlib.figure import Figure
|
23
|
+
|
21
24
|
from scipy.sparse import csr_matrix
|
22
25
|
from sklearn.metrics.pairwise import euclidean_distances
|
23
26
|
|
@@ -26,7 +29,16 @@ class Milo:
|
|
26
29
|
"""Python implementation of Milo."""
|
27
30
|
|
28
31
|
def __init__(self):
|
29
|
-
|
32
|
+
try:
|
33
|
+
from rpy2.robjects import conversion, numpy2ri, pandas2ri
|
34
|
+
from rpy2.robjects.packages import STAP, PackageNotInstalledError, importr
|
35
|
+
except ModuleNotFoundError:
|
36
|
+
raise ImportError("milo requires rpy2 to be installed.") from None
|
37
|
+
|
38
|
+
try:
|
39
|
+
importr("edgeR")
|
40
|
+
except ImportError as e:
|
41
|
+
raise ImportError("milo requires a valid R installation with edger installed:\n") from e
|
30
42
|
|
31
43
|
def load(
|
32
44
|
self,
|
@@ -39,7 +51,7 @@ class Milo:
|
|
39
51
|
input: AnnData
|
40
52
|
feature_key: Key to store the cell-level AnnData object in the MuData object
|
41
53
|
Returns:
|
42
|
-
MuData: MuData object with original AnnData
|
54
|
+
MuData: MuData object with original AnnData.
|
43
55
|
|
44
56
|
Examples:
|
45
57
|
>>> import pertpy as pt
|
@@ -71,11 +83,10 @@ class Milo:
|
|
71
83
|
neighbors_key: The key in `adata.obsp` or `mdata[feature_key].obsp` to use as KNN graph.
|
72
84
|
If not specified, `make_nhoods` looks .obsp[‘connectivities’] for connectivities (default storage places for `scanpy.pp.neighbors`).
|
73
85
|
If specified, it looks at .obsp[.uns[neighbors_key][‘connectivities_key’]] for connectivities.
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
copy: Determines whether a copy of the `adata` is returned. (default: False)
|
86
|
+
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
87
|
+
prop: Fraction of cells to sample for neighbourhood index search.
|
88
|
+
seed: Random seed for cell sampling.
|
89
|
+
copy: Determines whether a copy of the `adata` is returned.
|
79
90
|
|
80
91
|
Returns:
|
81
92
|
If `copy=True`, returns the copy of `adata` with the result in `.obs`, `.obsm`, and `.uns`.
|
@@ -119,7 +130,7 @@ class Milo:
|
|
119
130
|
try:
|
120
131
|
knn_graph = adata.obsp["connectivities"].copy()
|
121
132
|
except KeyError:
|
122
|
-
|
133
|
+
logger.error('No "connectivities" slot in adata.obsp -- please run scanpy.pp.neighbors(adata) first')
|
123
134
|
raise
|
124
135
|
else:
|
125
136
|
try:
|
@@ -174,6 +185,7 @@ class Milo:
|
|
174
185
|
dist_mat = knn_dists[nhood_ixs, :]
|
175
186
|
k_distances = dist_mat.max(1).toarray().ravel()
|
176
187
|
adata.obs["nhood_kth_distance"] = 0
|
188
|
+
adata.obs["nhood_kth_distance"] = adata.obs["nhood_kth_distance"].astype(float)
|
177
189
|
adata.obs.loc[adata.obs["nhood_ixs_refined"] == 1, "nhood_kth_distance"] = k_distances
|
178
190
|
|
179
191
|
if copy:
|
@@ -190,7 +202,7 @@ class Milo:
|
|
190
202
|
Args:
|
191
203
|
data: AnnData object with neighbourhoods defined in `obsm['nhoods']` or MuData object with a modality with neighbourhoods defined in `obsm['nhoods']`
|
192
204
|
sample_col: Column in adata.obs that contains sample information
|
193
|
-
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
205
|
+
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
194
206
|
|
195
207
|
Returns:
|
196
208
|
MuData object storing the original (i.e. rna) AnnData in `mudata[feature_key]`
|
@@ -221,7 +233,7 @@ class Milo:
|
|
221
233
|
try:
|
222
234
|
nhoods = adata.obsm["nhoods"]
|
223
235
|
except KeyError:
|
224
|
-
|
236
|
+
logger.error('Cannot find "nhoods" slot in adata.obsm -- please run milopy.make_nhoods(adata)')
|
225
237
|
raise
|
226
238
|
# Make nhood abundance matrix
|
227
239
|
sample_dummies = pd.get_dummies(adata.obs[sample_col])
|
@@ -229,7 +241,7 @@ class Milo:
|
|
229
241
|
sample_dummies = csr_matrix(sample_dummies.values)
|
230
242
|
nhood_count_mat = nhoods.T.dot(sample_dummies)
|
231
243
|
sample_obs = pd.DataFrame(index=all_samples)
|
232
|
-
sample_adata = AnnData(X=nhood_count_mat.T, obs=sample_obs
|
244
|
+
sample_adata = AnnData(X=nhood_count_mat.T, obs=sample_obs)
|
233
245
|
sample_adata.uns["sample_col"] = sample_col
|
234
246
|
# Save nhood index info
|
235
247
|
sample_adata.var["index_cell"] = adata.obs_names[adata.obs["nhood_ixs_refined"] == 1]
|
@@ -261,10 +273,10 @@ class Milo:
|
|
261
273
|
design: Formula for the test, following glm syntax from R (e.g. '~ condition').
|
262
274
|
Terms should be columns in `milo_mdata[feature_key].obs`.
|
263
275
|
model_contrasts: A string vector that defines the contrasts used to perform DA testing, following glm syntax from R (e.g. "conditionDisease - conditionControl").
|
264
|
-
If no contrast is specified (default), then the last categorical level in condition of interest is used as the test group.
|
265
|
-
subset_samples: subset of samples (obs in `milo_mdata['milo']`) to use for the test.
|
266
|
-
add_intercept: whether to include an intercept in the model. If False, this is equivalent to adding + 0 in the design formula. When model_contrasts is specified, this is set to False by default.
|
267
|
-
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
276
|
+
If no contrast is specified (default), then the last categorical level in condition of interest is used as the test group.
|
277
|
+
subset_samples: subset of samples (obs in `milo_mdata['milo']`) to use for the test.
|
278
|
+
add_intercept: whether to include an intercept in the model. If False, this is equivalent to adding + 0 in the design formula. When model_contrasts is specified, this is set to False by default.
|
279
|
+
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
268
280
|
solver: The solver to fit the model to. One of "edger" (requires R, rpy2 and edgeR to be installed) or "batchglm"
|
269
281
|
|
270
282
|
Returns:
|
@@ -288,8 +300,8 @@ class Milo:
|
|
288
300
|
try:
|
289
301
|
sample_adata = mdata["milo"]
|
290
302
|
except KeyError:
|
291
|
-
|
292
|
-
"
|
303
|
+
logger.error(
|
304
|
+
"milo_mdata should be a MuData object with two slots:"
|
293
305
|
" feature_key and 'milo' - please run milopy.count_nhoods() first"
|
294
306
|
)
|
295
307
|
raise
|
@@ -303,7 +315,7 @@ class Milo:
|
|
303
315
|
sample_obs = adata.obs[covariates + [sample_col]].drop_duplicates()
|
304
316
|
except KeyError:
|
305
317
|
missing_cov = [x for x in covariates if x not in sample_adata.obs.columns]
|
306
|
-
|
318
|
+
logger.warning("Covariates {c} are not columns in adata.obs".format(c=" ".join(missing_cov)))
|
307
319
|
raise
|
308
320
|
sample_obs = sample_obs[covariates + [sample_col]]
|
309
321
|
sample_obs.index = sample_obs[sample_col].astype("str")
|
@@ -311,7 +323,7 @@ class Milo:
|
|
311
323
|
try:
|
312
324
|
assert sample_obs.loc[sample_adata.obs_names].shape[0] == len(sample_adata.obs_names)
|
313
325
|
except AssertionError:
|
314
|
-
|
326
|
+
logger.warning(
|
315
327
|
f"Values in mdata[{feature_key}].obs[{covariates}] cannot be unambiguously assigned to each sample"
|
316
328
|
f" -- each sample value should match a single covariate value"
|
317
329
|
)
|
@@ -323,7 +335,9 @@ class Milo:
|
|
323
335
|
design_df = sample_adata.obs[covariates]
|
324
336
|
except KeyError:
|
325
337
|
missing_cov = [x for x in covariates if x not in sample_adata.obs.columns]
|
326
|
-
|
338
|
+
logger.error(
|
339
|
+
'Covariates {c} are not columns in adata.uns["sample_adata"].obs'.format(c=" ".join(missing_cov))
|
340
|
+
)
|
327
341
|
raise
|
328
342
|
# Get count matrix
|
329
343
|
count_mat = sample_adata.X.T.toarray()
|
@@ -367,6 +381,8 @@ class Milo:
|
|
367
381
|
return(colnames(m))
|
368
382
|
}
|
369
383
|
"""
|
384
|
+
from rpy2.robjects.packages import STAP
|
385
|
+
|
370
386
|
get_model_cols = STAP(r_str, "get_model_cols")
|
371
387
|
model_mat_cols = get_model_cols.get_model_cols(design_df, design)
|
372
388
|
model_df = pd.DataFrame(model)
|
@@ -374,13 +390,16 @@ class Milo:
|
|
374
390
|
try:
|
375
391
|
mod_contrast = limma.makeContrasts(contrasts=model_contrasts, levels=model_df)
|
376
392
|
except ValueError:
|
377
|
-
|
393
|
+
logger.error("Model contrasts must be in the form 'A-B' or 'A+B'")
|
378
394
|
raise
|
379
395
|
res = base.as_data_frame(
|
380
396
|
edgeR.topTags(edgeR.glmQLFTest(fit, contrast=mod_contrast), sort_by="none", n=np.inf)
|
381
397
|
)
|
382
398
|
else:
|
383
399
|
res = base.as_data_frame(edgeR.topTags(edgeR.glmQLFTest(fit, coef=n_coef), sort_by="none", n=np.inf))
|
400
|
+
|
401
|
+
from rpy2.robjects import conversion
|
402
|
+
|
384
403
|
res = conversion.rpy2py(res)
|
385
404
|
if not isinstance(res, pd.DataFrame):
|
386
405
|
res = pd.DataFrame(res)
|
@@ -405,7 +424,7 @@ class Milo:
|
|
405
424
|
Args:
|
406
425
|
mdata: MuData object
|
407
426
|
anno_col: Column in adata.obs containing the cell annotations to use for nhood labelling
|
408
|
-
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
427
|
+
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
409
428
|
|
410
429
|
Returns:
|
411
430
|
None. Adds in place:
|
@@ -423,12 +442,12 @@ class Milo:
|
|
423
442
|
>>> sc.pp.neighbors(mdata["rna"])
|
424
443
|
>>> milo.make_nhoods(mdata["rna"])
|
425
444
|
>>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
|
426
|
-
>>> milo.annotate_nhoods(mdata, anno_col=
|
445
|
+
>>> milo.annotate_nhoods(mdata, anno_col="cell_type")
|
427
446
|
"""
|
428
447
|
try:
|
429
448
|
sample_adata = mdata["milo"]
|
430
449
|
except KeyError:
|
431
|
-
|
450
|
+
logger.error(
|
432
451
|
"milo_mdata should be a MuData object with two slots: feature_key and 'milo' - please run milopy.count_nhoods(adata) first"
|
433
452
|
)
|
434
453
|
raise
|
@@ -459,7 +478,7 @@ class Milo:
|
|
459
478
|
Args:
|
460
479
|
mdata: MuData object
|
461
480
|
anno_col: Column in adata.obs containing the cell annotations to use for nhood labelling
|
462
|
-
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
481
|
+
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
463
482
|
|
464
483
|
Returns:
|
465
484
|
None. Adds in place:
|
@@ -474,7 +493,7 @@ class Milo:
|
|
474
493
|
>>> sc.pp.neighbors(mdata["rna"])
|
475
494
|
>>> milo.make_nhoods(mdata["rna"])
|
476
495
|
>>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
|
477
|
-
>>> milo.annotate_nhoods_continuous(mdata, anno_col=
|
496
|
+
>>> milo.annotate_nhoods_continuous(mdata, anno_col="nUMI")
|
478
497
|
"""
|
479
498
|
if "milo" not in mdata.mod:
|
480
499
|
raise ValueError(
|
@@ -500,7 +519,7 @@ class Milo:
|
|
500
519
|
Args:
|
501
520
|
mdata: MuData object
|
502
521
|
new_covariates: columns in `milo_mdata[feature_key].obs` to add to `milo_mdata['milo'].obs`.
|
503
|
-
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
522
|
+
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
504
523
|
|
505
524
|
Returns:
|
506
525
|
None, adds columns to `milo_mdata['milo']` in place
|
@@ -519,7 +538,7 @@ class Milo:
|
|
519
538
|
try:
|
520
539
|
sample_adata = mdata["milo"]
|
521
540
|
except KeyError:
|
522
|
-
|
541
|
+
logger.error(
|
523
542
|
"milo_mdata should be a MuData object with two slots: feature_key and 'milo' - please run milopy.count_nhoods(adata) first"
|
524
543
|
)
|
525
544
|
raise
|
@@ -533,14 +552,14 @@ class Milo:
|
|
533
552
|
sample_obs = adata.obs[covariates + [sample_col]].drop_duplicates()
|
534
553
|
except KeyError:
|
535
554
|
missing_cov = [covar for covar in covariates if covar not in sample_adata.obs.columns]
|
536
|
-
|
555
|
+
logger.error("Covariates {c} are not columns in adata.obs".format(c=" ".join(missing_cov)))
|
537
556
|
raise
|
538
557
|
sample_obs = sample_obs[covariates + [sample_col]].astype("str")
|
539
558
|
sample_obs.index = sample_obs[sample_col]
|
540
559
|
try:
|
541
560
|
assert sample_obs.loc[sample_adata.obs_names].shape[0] == len(sample_adata.obs_names)
|
542
561
|
except ValueError:
|
543
|
-
|
562
|
+
logger.error(
|
544
563
|
"Covariates cannot be unambiguously assigned to each sample -- each sample value should match a single covariate value"
|
545
564
|
)
|
546
565
|
raise
|
@@ -551,8 +570,8 @@ class Milo:
|
|
551
570
|
|
552
571
|
Args:
|
553
572
|
mdata: MuData object
|
554
|
-
basis: Name of the obsm basis to use for layout of neighbourhoods (key in `adata.obsm`).
|
555
|
-
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
573
|
+
basis: Name of the obsm basis to use for layout of neighbourhoods (key in `adata.obsm`).
|
574
|
+
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
556
575
|
|
557
576
|
Returns:
|
558
577
|
- `milo_mdata['milo'].varp['nhood_connectivities']`: graph of overlap between neighbourhoods (i.e. no of shared cells)
|
@@ -584,13 +603,13 @@ class Milo:
|
|
584
603
|
"distances_key": "",
|
585
604
|
}
|
586
605
|
|
587
|
-
def add_nhood_expression(self, mdata: MuData, layer: str | None = None, feature_key: str | None = "rna"):
|
606
|
+
def add_nhood_expression(self, mdata: MuData, layer: str | None = None, feature_key: str | None = "rna") -> None:
|
588
607
|
"""Calculates the mean expression in neighbourhoods of each feature.
|
589
608
|
|
590
609
|
Args:
|
591
610
|
mdata: MuData object
|
592
|
-
layer: If provided, use `milo_mdata[feature_key][layer]` as expression matrix instead of `milo_mdata[feature_key].X`.
|
593
|
-
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
611
|
+
layer: If provided, use `milo_mdata[feature_key][layer]` as expression matrix instead of `milo_mdata[feature_key].X`.
|
612
|
+
feature_key: If input data is MuData, specify key to cell-level AnnData object.
|
594
613
|
|
595
614
|
Returns:
|
596
615
|
Updates adata in place to store the matrix of average expression in each neighbourhood in `milo_mdata['milo'].varm['expr']`
|
@@ -609,7 +628,7 @@ class Milo:
|
|
609
628
|
try:
|
610
629
|
sample_adata = mdata["milo"]
|
611
630
|
except KeyError:
|
612
|
-
|
631
|
+
logger.error(
|
613
632
|
"milo_mdata should be a MuData object with two slots:"
|
614
633
|
" feature_key and 'milo' - please run milopy.count_nhoods(adata) first"
|
615
634
|
)
|
@@ -633,6 +652,9 @@ class Milo:
|
|
633
652
|
self,
|
634
653
|
):
|
635
654
|
"""Set up rpy2 to run edgeR"""
|
655
|
+
from rpy2.robjects import numpy2ri, pandas2ri
|
656
|
+
from rpy2.robjects.packages import importr
|
657
|
+
|
636
658
|
numpy2ri.activate()
|
637
659
|
pandas2ri.activate()
|
638
660
|
edgeR = self._try_import_bioc_library("edgeR")
|
@@ -651,11 +673,13 @@ class Milo:
|
|
651
673
|
Args:
|
652
674
|
name (str): R packages name
|
653
675
|
"""
|
676
|
+
from rpy2.robjects.packages import PackageNotInstalledError, importr
|
677
|
+
|
654
678
|
try:
|
655
679
|
_r_lib = importr(name)
|
656
680
|
return _r_lib
|
657
681
|
except PackageNotInstalledError:
|
658
|
-
|
682
|
+
logger.error(f"Install Bioconductor library `{name!r}` first as `BiocManager::install({name!r}).`")
|
659
683
|
raise
|
660
684
|
|
661
685
|
def _graph_spatial_fdr(
|
@@ -663,11 +687,13 @@ class Milo:
|
|
663
687
|
sample_adata: AnnData,
|
664
688
|
neighbors_key: str | None = None,
|
665
689
|
):
|
666
|
-
"""FDR correction weighted on inverse of connectivity of neighbourhoods.
|
690
|
+
"""FDR correction weighted on inverse of connectivity of neighbourhoods.
|
691
|
+
|
692
|
+
The distance to the k-th nearest neighbor is used as a measure of connectivity.
|
667
693
|
|
668
694
|
Args:
|
669
695
|
sample_adata: Sample-level AnnData.
|
670
|
-
neighbors_key: The key in `adata.obsp` to use as KNN graph.
|
696
|
+
neighbors_key: The key in `adata.obsp` to use as KNN graph.
|
671
697
|
"""
|
672
698
|
# use 1/connectivity as the weighting for the weighted BH adjustment from Cydar
|
673
699
|
w = 1 / sample_adata.var["kth_distance"]
|
@@ -686,3 +712,334 @@ class Milo:
|
|
686
712
|
|
687
713
|
sample_adata.var["SpatialFDR"] = np.nan
|
688
714
|
sample_adata.var.loc[keep_nhoods, "SpatialFDR"] = adjp
|
715
|
+
|
716
|
+
def plot_nhood_graph(
|
717
|
+
self,
|
718
|
+
mdata: MuData,
|
719
|
+
alpha: float = 0.1,
|
720
|
+
min_logFC: float = 0,
|
721
|
+
min_size: int = 10,
|
722
|
+
plot_edges: bool = False,
|
723
|
+
title: str = "DA log-Fold Change",
|
724
|
+
color_map: Colormap | str | None = None,
|
725
|
+
palette: str | Sequence[str] | None = None,
|
726
|
+
ax: Axes | None = None,
|
727
|
+
show: bool | None = None,
|
728
|
+
save: bool | str | None = None,
|
729
|
+
**kwargs,
|
730
|
+
) -> None:
|
731
|
+
"""Visualize DA results on abstracted graph (wrapper around sc.pl.embedding)
|
732
|
+
|
733
|
+
Args:
|
734
|
+
mdata: MuData object
|
735
|
+
alpha: Significance threshold. (default: 0.1)
|
736
|
+
min_logFC: Minimum absolute log-Fold Change to show results. If is 0, show all significant neighbourhoods.
|
737
|
+
min_size: Minimum size of nodes in visualization. (default: 10)
|
738
|
+
plot_edges: If edges for neighbourhood overlaps whould be plotted.
|
739
|
+
title: Plot title.
|
740
|
+
show: Show the plot, do not return axis.
|
741
|
+
save: If `True` or a `str`, save the figure. A string is appended to the default filename.
|
742
|
+
Infer the filetype if ending on {`'.pdf'`, `'.png'`, `'.svg'`}.
|
743
|
+
**kwargs: Additional arguments to `scanpy.pl.embedding`.
|
744
|
+
|
745
|
+
Examples:
|
746
|
+
>>> import pertpy as pt
|
747
|
+
>>> import scanpy as sc
|
748
|
+
>>> adata = pt.dt.bhattacherjee()
|
749
|
+
>>> milo = pt.tl.Milo()
|
750
|
+
>>> mdata = milo.load(adata)
|
751
|
+
>>> sc.pp.neighbors(mdata["rna"])
|
752
|
+
>>> sc.tl.umap(mdata["rna"])
|
753
|
+
>>> milo.make_nhoods(mdata["rna"])
|
754
|
+
>>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
|
755
|
+
>>> milo.da_nhoods(mdata,
|
756
|
+
>>> design='~label',
|
757
|
+
>>> model_contrasts='labelwithdraw_15d_Cocaine-labelwithdraw_48h_Cocaine')
|
758
|
+
>>> milo.build_nhood_graph(mdata)
|
759
|
+
>>> milo.plot_nhood_graph(mdata)
|
760
|
+
|
761
|
+
Preview:
|
762
|
+
.. image:: /_static/docstring_previews/milo_nhood_graph.png
|
763
|
+
"""
|
764
|
+
nhood_adata = mdata["milo"].T.copy()
|
765
|
+
|
766
|
+
if "Nhood_size" not in nhood_adata.obs.columns:
|
767
|
+
raise KeyError(
|
768
|
+
'Cannot find "Nhood_size" column in adata.uns["nhood_adata"].obs -- \
|
769
|
+
please run milopy.utils.build_nhood_graph(adata)'
|
770
|
+
)
|
771
|
+
|
772
|
+
nhood_adata.obs["graph_color"] = nhood_adata.obs["logFC"]
|
773
|
+
nhood_adata.obs.loc[nhood_adata.obs["SpatialFDR"] > alpha, "graph_color"] = np.nan
|
774
|
+
nhood_adata.obs["abs_logFC"] = abs(nhood_adata.obs["logFC"])
|
775
|
+
nhood_adata.obs.loc[nhood_adata.obs["abs_logFC"] < min_logFC, "graph_color"] = np.nan
|
776
|
+
|
777
|
+
# Plotting order - extreme logFC on top
|
778
|
+
nhood_adata.obs.loc[nhood_adata.obs["graph_color"].isna(), "abs_logFC"] = np.nan
|
779
|
+
ordered = nhood_adata.obs.sort_values("abs_logFC", na_position="first").index
|
780
|
+
nhood_adata = nhood_adata[ordered]
|
781
|
+
|
782
|
+
vmax = np.max([nhood_adata.obs["graph_color"].max(), abs(nhood_adata.obs["graph_color"].min())])
|
783
|
+
vmin = -vmax
|
784
|
+
|
785
|
+
sc.pl.embedding(
|
786
|
+
nhood_adata,
|
787
|
+
"X_milo_graph",
|
788
|
+
color="graph_color",
|
789
|
+
cmap="RdBu_r",
|
790
|
+
size=nhood_adata.obs["Nhood_size"] * min_size,
|
791
|
+
edges=plot_edges,
|
792
|
+
neighbors_key="nhood",
|
793
|
+
sort_order=False,
|
794
|
+
frameon=False,
|
795
|
+
vmax=vmax,
|
796
|
+
vmin=vmin,
|
797
|
+
title=title,
|
798
|
+
color_map=color_map,
|
799
|
+
palette=palette,
|
800
|
+
ax=ax,
|
801
|
+
show=show,
|
802
|
+
save=save,
|
803
|
+
**kwargs,
|
804
|
+
)
|
805
|
+
|
806
|
+
def plot_nhood(
|
807
|
+
self,
|
808
|
+
mdata: MuData,
|
809
|
+
ix: int,
|
810
|
+
feature_key: str | None = "rna",
|
811
|
+
basis: str = "X_umap",
|
812
|
+
color_map: Colormap | str | None = None,
|
813
|
+
palette: str | Sequence[str] | None = None,
|
814
|
+
return_fig: bool | None = None,
|
815
|
+
ax: Axes | None = None,
|
816
|
+
show: bool | None = None,
|
817
|
+
save: bool | str | None = None,
|
818
|
+
**kwargs,
|
819
|
+
) -> None:
|
820
|
+
"""Visualize cells in a neighbourhood.
|
821
|
+
|
822
|
+
Args:
|
823
|
+
mdata: MuData object with feature_key slot, storing neighbourhood assignments in `mdata[feature_key].obsm['nhoods']`
|
824
|
+
ix: index of neighbourhood to visualize
|
825
|
+
basis: Embedding to use for visualization.
|
826
|
+
show: Show the plot, do not return axis.
|
827
|
+
save: If True or a str, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}.
|
828
|
+
**kwargs: Additional arguments to `scanpy.pl.embedding`.
|
829
|
+
|
830
|
+
Examples:
|
831
|
+
>>> import pertpy as pt
|
832
|
+
>>> import scanpy as sc
|
833
|
+
>>> adata = pt.dt.bhattacherjee()
|
834
|
+
>>> milo = pt.tl.Milo()
|
835
|
+
>>> mdata = milo.load(adata)
|
836
|
+
>>> sc.pp.neighbors(mdata["rna"])
|
837
|
+
>>> sc.tl.umap(mdata["rna"])
|
838
|
+
>>> milo.make_nhoods(mdata["rna"])
|
839
|
+
>>> milo.plot_nhood(mdata, ix=0)
|
840
|
+
|
841
|
+
Preview:
|
842
|
+
.. image:: /_static/docstring_previews/milo_nhood.png
|
843
|
+
"""
|
844
|
+
mdata[feature_key].obs["Nhood"] = mdata[feature_key].obsm["nhoods"][:, ix].toarray().ravel()
|
845
|
+
sc.pl.embedding(
|
846
|
+
mdata[feature_key],
|
847
|
+
basis,
|
848
|
+
color="Nhood",
|
849
|
+
size=30,
|
850
|
+
title="Nhood" + str(ix),
|
851
|
+
color_map=color_map,
|
852
|
+
palette=palette,
|
853
|
+
return_fig=return_fig,
|
854
|
+
ax=ax,
|
855
|
+
show=show,
|
856
|
+
save=save,
|
857
|
+
**kwargs,
|
858
|
+
)
|
859
|
+
|
860
|
+
def plot_da_beeswarm(
|
861
|
+
self,
|
862
|
+
mdata: MuData,
|
863
|
+
feature_key: str | None = "rna",
|
864
|
+
anno_col: str = "nhood_annotation",
|
865
|
+
alpha: float = 0.1,
|
866
|
+
subset_nhoods: list[str] = None,
|
867
|
+
palette: str | Sequence[str] | dict[str, str] | None = None,
|
868
|
+
return_fig: bool | None = None,
|
869
|
+
save: bool | str | None = None,
|
870
|
+
show: bool | None = None,
|
871
|
+
) -> Figure | Axes | None:
|
872
|
+
"""Plot beeswarm plot of logFC against nhood labels
|
873
|
+
|
874
|
+
Args:
|
875
|
+
mdata: MuData object
|
876
|
+
anno_col: Column in adata.uns['nhood_adata'].obs to use as annotation. (default: 'nhood_annotation'.)
|
877
|
+
alpha: Significance threshold. (default: 0.1)
|
878
|
+
subset_nhoods: List of nhoods to plot. If None, plot all nhoods.
|
879
|
+
palette: Name of Seaborn color palette for violinplots.
|
880
|
+
Defaults to pre-defined category colors for violinplots.
|
881
|
+
|
882
|
+
Examples:
|
883
|
+
>>> import pertpy as pt
|
884
|
+
>>> import scanpy as sc
|
885
|
+
>>> adata = pt.dt.bhattacherjee()
|
886
|
+
>>> milo = pt.tl.Milo()
|
887
|
+
>>> mdata = milo.load(adata)
|
888
|
+
>>> sc.pp.neighbors(mdata["rna"])
|
889
|
+
>>> milo.make_nhoods(mdata["rna"])
|
890
|
+
>>> mdata = milo.count_nhoods(mdata, sample_col="orig.ident")
|
891
|
+
>>> milo.da_nhoods(mdata, design="~label")
|
892
|
+
>>> milo.annotate_nhoods(mdata, anno_col="cell_type")
|
893
|
+
>>> milo.plot_da_beeswarm(mdata)
|
894
|
+
|
895
|
+
Preview:
|
896
|
+
.. image:: /_static/docstring_previews/milo_da_beeswarm.png
|
897
|
+
"""
|
898
|
+
try:
|
899
|
+
nhood_adata = mdata["milo"].T.copy()
|
900
|
+
except KeyError:
|
901
|
+
raise RuntimeError(
|
902
|
+
"mdata should be a MuData object with two slots: feature_key and 'milo'. Run 'milopy.count_nhoods(adata)' first."
|
903
|
+
) from None
|
904
|
+
|
905
|
+
try:
|
906
|
+
nhood_adata.obs[anno_col]
|
907
|
+
except KeyError:
|
908
|
+
raise RuntimeError(
|
909
|
+
f"Unable to find {anno_col} in mdata['milo'].var. Run 'milopy.utils.annotate_nhoods(adata, anno_col)' first"
|
910
|
+
) from None
|
911
|
+
|
912
|
+
if subset_nhoods is not None:
|
913
|
+
nhood_adata = nhood_adata[nhood_adata.obs[anno_col].isin(subset_nhoods)]
|
914
|
+
|
915
|
+
try:
|
916
|
+
nhood_adata.obs["logFC"]
|
917
|
+
except KeyError:
|
918
|
+
raise RuntimeError(
|
919
|
+
"Unable to find 'logFC' in mdata.uns['nhood_adata'].obs. Run 'core.da_nhoods(adata)' first."
|
920
|
+
) from None
|
921
|
+
|
922
|
+
sorted_annos = (
|
923
|
+
nhood_adata.obs[[anno_col, "logFC"]].groupby(anno_col).median().sort_values("logFC", ascending=True).index
|
924
|
+
)
|
925
|
+
|
926
|
+
anno_df = nhood_adata.obs[[anno_col, "logFC", "SpatialFDR"]].copy()
|
927
|
+
anno_df["is_signif"] = anno_df["SpatialFDR"] < alpha
|
928
|
+
anno_df = anno_df[anno_df[anno_col] != "nan"]
|
929
|
+
|
930
|
+
try:
|
931
|
+
obs_col = nhood_adata.uns["annotation_obs"]
|
932
|
+
if palette is None:
|
933
|
+
palette = dict(
|
934
|
+
zip(
|
935
|
+
mdata[feature_key].obs[obs_col].cat.categories,
|
936
|
+
mdata[feature_key].uns[f"{obs_col}_colors"],
|
937
|
+
strict=False,
|
938
|
+
)
|
939
|
+
)
|
940
|
+
sns.violinplot(
|
941
|
+
data=anno_df,
|
942
|
+
y=anno_col,
|
943
|
+
x="logFC",
|
944
|
+
order=sorted_annos,
|
945
|
+
inner=None,
|
946
|
+
orient="h",
|
947
|
+
palette=palette,
|
948
|
+
linewidth=0,
|
949
|
+
scale="width",
|
950
|
+
)
|
951
|
+
except BaseException: # noqa: BLE001
|
952
|
+
sns.violinplot(
|
953
|
+
data=anno_df,
|
954
|
+
y=anno_col,
|
955
|
+
x="logFC",
|
956
|
+
order=sorted_annos,
|
957
|
+
inner=None,
|
958
|
+
orient="h",
|
959
|
+
linewidth=0,
|
960
|
+
scale="width",
|
961
|
+
)
|
962
|
+
sns.stripplot(
|
963
|
+
data=anno_df,
|
964
|
+
y=anno_col,
|
965
|
+
x="logFC",
|
966
|
+
order=sorted_annos,
|
967
|
+
size=2,
|
968
|
+
hue="is_signif",
|
969
|
+
palette=["grey", "black"],
|
970
|
+
orient="h",
|
971
|
+
alpha=0.5,
|
972
|
+
)
|
973
|
+
plt.legend(loc="upper left", title=f"< {int(alpha * 100)}% SpatialFDR", bbox_to_anchor=(1, 1), frameon=False)
|
974
|
+
plt.axvline(x=0, ymin=0, ymax=1, color="black", linestyle="--")
|
975
|
+
|
976
|
+
if save:
|
977
|
+
plt.savefig(save, bbox_inches="tight")
|
978
|
+
return None
|
979
|
+
if show:
|
980
|
+
plt.show()
|
981
|
+
return None
|
982
|
+
if return_fig:
|
983
|
+
return plt.gcf()
|
984
|
+
if (not show and not save) or (show is None and save is None):
|
985
|
+
return plt.gca()
|
986
|
+
|
987
|
+
return None
|
988
|
+
|
989
|
+
def plot_nhood_counts_by_cond(
|
990
|
+
self,
|
991
|
+
mdata: MuData,
|
992
|
+
test_var: str,
|
993
|
+
subset_nhoods: list[str] = None,
|
994
|
+
log_counts: bool = False,
|
995
|
+
return_fig: bool | None = None,
|
996
|
+
save: bool | str | None = None,
|
997
|
+
show: bool | None = None,
|
998
|
+
) -> Figure | Axes | None:
|
999
|
+
"""Plot boxplot of cell numbers vs condition of interest.
|
1000
|
+
|
1001
|
+
Args:
|
1002
|
+
mdata: MuData object storing cell level and nhood level information
|
1003
|
+
test_var: Name of column in adata.obs storing condition of interest (y-axis for boxplot)
|
1004
|
+
subset_nhoods: List of obs_names for neighbourhoods to include in plot. If None, plot all nhoods.
|
1005
|
+
log_counts: Whether to plot log1p of cell counts.
|
1006
|
+
"""
|
1007
|
+
try:
|
1008
|
+
nhood_adata = mdata["milo"].T.copy()
|
1009
|
+
except KeyError:
|
1010
|
+
raise RuntimeError(
|
1011
|
+
"mdata should be a MuData object with two slots: feature_key and 'milo'. Run milopy.count_nhoods(mdata) first"
|
1012
|
+
) from None
|
1013
|
+
|
1014
|
+
if subset_nhoods is None:
|
1015
|
+
subset_nhoods = nhood_adata.obs_names
|
1016
|
+
|
1017
|
+
pl_df = pd.DataFrame(nhood_adata[subset_nhoods].X.A, columns=nhood_adata.var_names).melt(
|
1018
|
+
var_name=nhood_adata.uns["sample_col"], value_name="n_cells"
|
1019
|
+
)
|
1020
|
+
pl_df = pd.merge(pl_df, nhood_adata.var)
|
1021
|
+
pl_df["log_n_cells"] = np.log1p(pl_df["n_cells"])
|
1022
|
+
if not log_counts:
|
1023
|
+
sns.boxplot(data=pl_df, x=test_var, y="n_cells", color="lightblue")
|
1024
|
+
sns.stripplot(data=pl_df, x=test_var, y="n_cells", color="black", s=3)
|
1025
|
+
plt.ylabel("# cells")
|
1026
|
+
else:
|
1027
|
+
sns.boxplot(data=pl_df, x=test_var, y="log_n_cells", color="lightblue")
|
1028
|
+
sns.stripplot(data=pl_df, x=test_var, y="log_n_cells", color="black", s=3)
|
1029
|
+
plt.ylabel("log(# cells + 1)")
|
1030
|
+
|
1031
|
+
plt.xticks(rotation=90)
|
1032
|
+
plt.xlabel(test_var)
|
1033
|
+
|
1034
|
+
if save:
|
1035
|
+
plt.savefig(save, bbox_inches="tight")
|
1036
|
+
return None
|
1037
|
+
if show:
|
1038
|
+
plt.show()
|
1039
|
+
return None
|
1040
|
+
if return_fig:
|
1041
|
+
return plt.gcf()
|
1042
|
+
if not (show or save):
|
1043
|
+
return plt.gca()
|
1044
|
+
|
1045
|
+
return None
|