biopipen 0.31.7__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/scrna.py +153 -0
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +14 -0
- biopipen/reports/scrna/SeuratMap2Ref.svelte +10 -6
- biopipen/scripts/scrna/AnnData2Seurat.R +22 -14
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +101 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +191 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -42
- biopipen/scripts/scrna/SeuratMap2Ref.R +20 -1
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1364 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/TCRClustering.R +25 -4
- biopipen/utils/single_cell.R +92 -1
- {biopipen-0.31.7.dist-info → biopipen-0.32.0.dist-info}/METADATA +1 -1
- {biopipen-0.31.7.dist-info → biopipen-0.32.0.dist-info}/RECORD +19 -15
- {biopipen-0.31.7.dist-info → biopipen-0.32.0.dist-info}/WHEEL +0 -0
- {biopipen-0.31.7.dist-info → biopipen-0.32.0.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.32.0"
|
biopipen/ns/scrna.py
CHANGED
|
@@ -2314,3 +2314,156 @@ class ScSimulation(Proc):
|
|
|
2314
2314
|
"params": {},
|
|
2315
2315
|
}
|
|
2316
2316
|
script = "file://../scripts/scrna/ScSimulation.R"
|
|
2317
|
+
|
|
2318
|
+
|
|
2319
|
+
class CellCellCommunication(Proc):
|
|
2320
|
+
"""Cell-cell communication inference
|
|
2321
|
+
|
|
2322
|
+
This is implemented based on [LIANA](https://liana-py.readthedocs.io/en/latest/index.html),
|
|
2323
|
+
which is a Python package for cell-cell communication inference and provides a list of existing
|
|
2324
|
+
methods including [CellPhoneDB](https://github.com/ventolab/CellphoneDB),
|
|
2325
|
+
[Connectome](https://github.com/msraredon/Connectome/), log2FC,
|
|
2326
|
+
[NATMI](https://github.com/forrest-lab/NATMI),
|
|
2327
|
+
[SingleCellSignalR](https://github.com/SCA-IRCM/SingleCellSignalR), Rank_Aggregate, Geometric Mean,
|
|
2328
|
+
[scSeqComm](https://gitlab.com/sysbiobig/scseqcomm), and [CellChat](https://github.com/jinworks/CellChat).
|
|
2329
|
+
|
|
2330
|
+
You can also try `python -c 'import liana; liana.mt.show_methods()'` to see the methods available.
|
|
2331
|
+
|
|
2332
|
+
Note that this process does not do any visualization. You can use `CellCellCommunicationPlots`
|
|
2333
|
+
to visualize the results.
|
|
2334
|
+
|
|
2335
|
+
Reference:
|
|
2336
|
+
- [Review](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9184522/).
|
|
2337
|
+
- [LIANA](https://www.biorxiv.org/content/10.1101/2023.08.19.553863v1).
|
|
2338
|
+
|
|
2339
|
+
Input:
|
|
2340
|
+
sobjfile: The seurat object file in RDS or h5seurat format or AnnData file.
|
|
2341
|
+
|
|
2342
|
+
Output:
|
|
2343
|
+
outfile: The output file with the 'liana_res' data frame.
|
|
2344
|
+
Stats are provided for both ligand and receptor entities, more specifically: ligand and receptor are
|
|
2345
|
+
the two entities that potentially interact. As a reminder, CCC events are not limited to secreted signalling,
|
|
2346
|
+
but we refer to them as ligand and receptor for simplicity.
|
|
2347
|
+
Also, in the case of heteromeric complexes, the ligand and receptor columns represent the subunit with minimum
|
|
2348
|
+
expression, while *_complex corresponds to the actual complex, with subunits being separated by _.
|
|
2349
|
+
source and target columns represent the source/sender and target/receiver cell identity for each interaction, respectively
|
|
2350
|
+
* `*_props`: represents the proportion of cells that express the entity.
|
|
2351
|
+
By default, any interactions in which either entity is not expressed in above 10% of cells per cell type
|
|
2352
|
+
is considered as a false positive, under the assumption that since CCC occurs between cell types, a sufficient
|
|
2353
|
+
proportion of cells within should express the genes.
|
|
2354
|
+
* `*_means`: entity expression mean per cell type.
|
|
2355
|
+
* `lr_means`: mean ligand-receptor expression, as a measure of ligand-receptor interaction magnitude.
|
|
2356
|
+
* `cellphone_pvals`: permutation-based p-values, as a measure of interaction specificity.
|
|
2357
|
+
|
|
2358
|
+
Envs:
|
|
2359
|
+
method (choice): The method to use for cell-cell communication inference.
|
|
2360
|
+
- CellPhoneDB: Use CellPhoneDB method.
|
|
2361
|
+
Magnitude Score: lr_means; Specificity Score: cellphone_pvals.
|
|
2362
|
+
- Connectome: Use Connectome method.
|
|
2363
|
+
- log2FC: Use log2FC method.
|
|
2364
|
+
- NATMI: Use NATMI method.
|
|
2365
|
+
- SingleCellSignalR: Use SingleCellSignalR method.
|
|
2366
|
+
- Rank_Aggregate: Use Rank_Aggregate method.
|
|
2367
|
+
- Geometric_Mean: Use Geometric Mean method.
|
|
2368
|
+
- scSeqComm: Use scSeqComm method.
|
|
2369
|
+
- CellChat: Use CellChat method.
|
|
2370
|
+
- cellphonedb: alias for `CellPhoneDB`
|
|
2371
|
+
- connectome: alias for `Connectome`
|
|
2372
|
+
- log2fc: alias for `log2FC`
|
|
2373
|
+
- natmi: alias for `NATMI`
|
|
2374
|
+
- singlesignaler: alias for `SingleCellSignalR`
|
|
2375
|
+
- rank_aggregate: alias for `Rank_Aggregate`
|
|
2376
|
+
- geometric_mean: alias for `Geometric_Mean`
|
|
2377
|
+
- scseqcomm: alias for `scSeqComm`
|
|
2378
|
+
- cellchat: alias for `CellChat`
|
|
2379
|
+
assay: The assay to use for the analysis.
|
|
2380
|
+
Only works for Seurat object.
|
|
2381
|
+
seed (type=int): The seed for the random number generator.
|
|
2382
|
+
ncores (type=int): The number of cores to use.
|
|
2383
|
+
groupby: The column name in metadata to group the cells.
|
|
2384
|
+
Typically, this column should be the cluster id.
|
|
2385
|
+
species (choice): The species of the cells.
|
|
2386
|
+
- human: Human cells, the 'consensus' resource will be used.
|
|
2387
|
+
- mouse: Mouse cells, the 'mouseconsensus' resource will be used.
|
|
2388
|
+
expr_prop (type=float): Minimum expression proportion for the ligands and
|
|
2389
|
+
receptors (+ their subunits) in the corresponding cell identities. Set to 0
|
|
2390
|
+
to return unfiltered results.
|
|
2391
|
+
min_cells (type=int): Minimum cells (per cell identity if grouped by `groupby`)
|
|
2392
|
+
to be considered for downstream analysis.
|
|
2393
|
+
n_perms (type=int): Number of permutations for the permutation test.
|
|
2394
|
+
Relevant only for permutation-based methods (e.g., `CellPhoneDB`).
|
|
2395
|
+
If `0` is passed, no permutation testing is performed.
|
|
2396
|
+
rscript: The path to the Rscript executable used to convert RDS file to AnnData.
|
|
2397
|
+
if `in.sobjfile` is an RDS file, it will be converted to AnnData file (h5ad).
|
|
2398
|
+
You need `Seurat`, `SeuratDisk` and `digest` installed.
|
|
2399
|
+
<more>: Other arguments for the method.
|
|
2400
|
+
The arguments are passed to the method directly.
|
|
2401
|
+
See the method documentation for more details and also
|
|
2402
|
+
`help(liana.mt.<method>.__call__)` in Python.
|
|
2403
|
+
""" # noqa: E501
|
|
2404
|
+
input = "sobjfile:file"
|
|
2405
|
+
output = "outfile:file:{{in.sobjfile | stem}}-ccc.txt"
|
|
2406
|
+
lang = config.lang.python
|
|
2407
|
+
envs = {
|
|
2408
|
+
"method": "cellchat",
|
|
2409
|
+
"assay": None,
|
|
2410
|
+
"seed": 1337,
|
|
2411
|
+
"ncores": config.misc.ncores,
|
|
2412
|
+
"groupby": "seurat_clusters",
|
|
2413
|
+
"species": "human",
|
|
2414
|
+
"expr_prop": 0.1,
|
|
2415
|
+
"min_cells": 5,
|
|
2416
|
+
"n_perms": 1000,
|
|
2417
|
+
"rscript": config.lang.rscript,
|
|
2418
|
+
}
|
|
2419
|
+
script = "file://../scripts/scrna/CellCellCommunication.py"
|
|
2420
|
+
|
|
2421
|
+
|
|
2422
|
+
class CellCellCommunicationPlots(Proc):
|
|
2423
|
+
"""Visualization for cell-cell communication inference.
|
|
2424
|
+
|
|
2425
|
+
R package [`CCPlotR`](https://github.com/Sarah145/CCPlotR) is used to visualize
|
|
2426
|
+
the results.
|
|
2427
|
+
|
|
2428
|
+
Input:
|
|
2429
|
+
cccfile: The output file from `CellCellCommunication`
|
|
2430
|
+
or a tab-separated file with the following columns: `source`, `target`,
|
|
2431
|
+
`ligand`, `receptor`, and `score`.
|
|
2432
|
+
If so, `in.expfile` can be provided where `exp_df` is needed.
|
|
2433
|
+
expfile: The expression file with the expression of ligands and receptors.
|
|
2434
|
+
Columns include: `cell_type`, `gene` and `mean_exp`.
|
|
2435
|
+
|
|
2436
|
+
Output:
|
|
2437
|
+
outdir: The output directory for the plots.
|
|
2438
|
+
|
|
2439
|
+
Envs:
|
|
2440
|
+
score_col: The column name in the input file that contains the score, if
|
|
2441
|
+
the input file is from `CellCellCommunication`.
|
|
2442
|
+
Two alias columns are added in the result file of `CellCellCommunication`,
|
|
2443
|
+
`mag_score` and `spec_score`, which are the magnitude and specificity
|
|
2444
|
+
scores.
|
|
2445
|
+
subset: An expression to pass to `dplyr::filter()` to subset the ccc data.
|
|
2446
|
+
cases (type=json): The cases for the plots.
|
|
2447
|
+
The keys are the names of the cases and the values are the arguments for
|
|
2448
|
+
the plots. The arguments include:
|
|
2449
|
+
* kind: one of `arrow`, `circos`, `dotplot`, `heatmap`, `network`,
|
|
2450
|
+
and `sigmoid`.
|
|
2451
|
+
* devpars: The parameters for `png()` for the plot, including `res`,
|
|
2452
|
+
`width`, and `height`.
|
|
2453
|
+
* section: The section name for the report to group the plots.
|
|
2454
|
+
* <other>: Other arguments for `cc_<kind>` function in `CCPlotR`.
|
|
2455
|
+
See the documentation for more details.
|
|
2456
|
+
Or you can use `?CCPlotR::cc_<kind>` in R.
|
|
2457
|
+
"""
|
|
2458
|
+
input = "cccfile:file, expfile:file"
|
|
2459
|
+
output = "outdir:dir:{{in.cccfile | stem}}-ccc_plots"
|
|
2460
|
+
lang = config.lang.rscript
|
|
2461
|
+
envs = {
|
|
2462
|
+
"score_col": "mag_score",
|
|
2463
|
+
"subset": None,
|
|
2464
|
+
"cases": {},
|
|
2465
|
+
}
|
|
2466
|
+
script = "file://../scripts/scrna/CellCellCommunicationPlots.R"
|
|
2467
|
+
plugin_opts = {
|
|
2468
|
+
"report": "file://../reports/scrna/CellCellCommunicationPlots.svelte",
|
|
2469
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{% from "utils/misc.liq" import report_jobs, table_of_images -%}
|
|
2
|
+
<script>
|
|
3
|
+
import { Image } from "$libs";
|
|
4
|
+
</script>
|
|
5
|
+
|
|
6
|
+
{%- macro report_job(job, h=1) -%}
|
|
7
|
+
{{ job | render_job: h=h }}
|
|
8
|
+
{%- endmacro -%}
|
|
9
|
+
|
|
10
|
+
{%- macro head_job(job) -%}
|
|
11
|
+
<h1>{{job.in.cccfile | stem0 | escape}}</h1>
|
|
12
|
+
{%- endmacro -%}
|
|
13
|
+
|
|
14
|
+
{{ report_jobs(jobs, head_job, report_job) }}
|
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
{% from "utils/misc.liq" import report_jobs, table_of_images -%}
|
|
2
2
|
<script>
|
|
3
|
-
import { Image } from "$libs";
|
|
3
|
+
import { Image, DataTable } from "$libs";
|
|
4
4
|
</script>
|
|
5
5
|
|
|
6
6
|
{%- macro report_job(job, h=1) -%}
|
|
7
|
-
<h{{h}}>Reference UMAP</h{{h}}>
|
|
8
|
-
{% set imgs = job.outdir | glob: "Reference_UMAP_*.png" %}
|
|
9
|
-
{{ table_of_images(imgs) }}
|
|
10
7
|
|
|
11
|
-
<h{{h}}>
|
|
12
|
-
{% set imgs = job.outdir | glob: "
|
|
8
|
+
<h{{h}}>UMAPs</h{{h}}>
|
|
9
|
+
{% set imgs = job.outdir | glob: "UMAPs-*.png" %}
|
|
13
10
|
{{ table_of_images(imgs) }}
|
|
11
|
+
|
|
12
|
+
<h{{h}}>Stats</h{{h}}>
|
|
13
|
+
{% for stfile in job.outdir | glob: "stats-*.txt" %}
|
|
14
|
+
<h{{h+1}}>{{stfile | stem | replace: "stats-", ""}}</h{{h+1}}>
|
|
15
|
+
<DataTable src="{{stfile}}" data={ {{stfile | datatable: sep="\t"}} } />
|
|
16
|
+
{% endfor %}
|
|
17
|
+
|
|
14
18
|
{%- endmacro -%}
|
|
15
19
|
|
|
16
20
|
{%- macro head_job(job) -%}
|
|
@@ -41,25 +41,33 @@ if (outtype == "rds") {
|
|
|
41
41
|
f <- H5File$new(h5seurat_file, "r+")
|
|
42
42
|
groups <- f$ls(recursive = TRUE)
|
|
43
43
|
|
|
44
|
-
for (name in groups$name[grepl("categories", groups$name)]) {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
44
|
+
for (name in groups$name[grepl("/categories$", groups$name)]) {
|
|
45
|
+
valuenames <- levelnames <- codenames <- strsplit(name, "/")[[1]]
|
|
46
|
+
valuenames[length(valuenames)] <- "values"
|
|
47
|
+
valuenames <- paste(valuenames, collapse = "/")
|
|
48
|
+
levelnames[length(levelnames)] <- "levels"
|
|
49
|
+
levelnames <- paste(levelnames, collapse = "/")
|
|
50
|
+
codenames[length(codenames)] <- "codes"
|
|
51
|
+
codenames <- paste(codenames, collapse = "/")
|
|
52
|
+
if (!f$exists(codenames)) {
|
|
53
|
+
# No codes, skip
|
|
54
|
+
next
|
|
55
|
+
}
|
|
50
56
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
f
|
|
56
|
-
|
|
57
|
-
|
|
57
|
+
if (!f$exists(levelnames)) {
|
|
58
|
+
f[[levelnames]] <- f[[name]]
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (!f$exists(valuenames)) {
|
|
62
|
+
f[[valuenames]] <- f[[codenames]]
|
|
63
|
+
grp <- f[[valuenames]]
|
|
64
|
+
grp$write(args = list(1:grp$dims), value = grp$read() + 1)
|
|
65
|
+
}
|
|
58
66
|
}
|
|
59
67
|
f$close_all()
|
|
60
68
|
# end
|
|
61
69
|
|
|
62
|
-
sobj <- LoadH5Seurat(h5seurat_file)
|
|
70
|
+
sobj <- LoadH5Seurat(h5seurat_file, assays = assay)
|
|
63
71
|
if (!isFALSE(dotplot_check)) {
|
|
64
72
|
log_info("Checking dotplot ...")
|
|
65
73
|
dotfig <- file.path(outdir, "dotplot.png")
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# patched version of cc_circos
|
|
2
|
+
# See https://github.com/Sarah145/CCPlotR/issues/4
|
|
3
|
+
|
|
4
|
+
cc_circos <- function(cc_df, option = "A", n_top_ints = 15, exp_df = NULL, cell_cols = NULL, palette = "BuPu", cex = 1, show_legend = TRUE, scale = FALSE, ...) {
|
|
5
|
+
stopifnot("'cc_df' must be a dataframe" = is(cc_df, "data.frame"))
|
|
6
|
+
stopifnot("cc_df should contain columns named source, target, ligand, receptor and score. See `toy_data` for an example." = all(c('source', 'target', 'ligand', 'receptor', 'score') %in% colnames(cc_df)))
|
|
7
|
+
stopifnot("option must be either 'A', 'B', 'C'" = option %in% c('A', 'B', 'C'))
|
|
8
|
+
library(stringr)
|
|
9
|
+
library(ComplexHeatmap)
|
|
10
|
+
library(circlize)
|
|
11
|
+
circos.clear()
|
|
12
|
+
|
|
13
|
+
target <- score <- ligand <- receptor <- source_lig <- target_rec <- cell_type <- gene <- cell_gene <- NULL
|
|
14
|
+
if (option == "A") {
|
|
15
|
+
input_df <- cc_df %>%
|
|
16
|
+
mutate(source = factor(source), target = factor(target)) %>%
|
|
17
|
+
group_by(source, target) %>%
|
|
18
|
+
tally()
|
|
19
|
+
if (is.null(cell_cols)) {
|
|
20
|
+
cell_cols <- setNames(paletteMartin(n = length(unique(c(input_df$source, input_df$target)))), unique(c(input_df$source, input_df$target)))
|
|
21
|
+
}
|
|
22
|
+
circlize_plot <- function() {
|
|
23
|
+
par(cex = cex)
|
|
24
|
+
chordDiagram(input_df,
|
|
25
|
+
scale = FALSE, grid.col = cell_cols,
|
|
26
|
+
annotationTrack = c("grid", "name"), directional = 1, direction.type = c("arrows", "diffHeight"), link.arr.type = "big.arrow", link.arr.length = 0.1, diffHeight = -mm_h(0.5), preAllocateTracks = list(
|
|
27
|
+
track.height = mm_h(10),
|
|
28
|
+
track.margin = c(mm_h(2), -mm_h(4))
|
|
29
|
+
), ...
|
|
30
|
+
)
|
|
31
|
+
}
|
|
32
|
+
} else if (option == "B") {
|
|
33
|
+
input_df <- cc_df %>%
|
|
34
|
+
slice_max(order_by = score, n = n_top_ints) %>%
|
|
35
|
+
mutate(
|
|
36
|
+
source_lig = paste0(source, "|", ligand),
|
|
37
|
+
target_rec = paste0(target, "|", receptor)
|
|
38
|
+
)
|
|
39
|
+
arr_wd <- (((input_df$score - min(input_df$score)) / (max(input_df$score) - min(input_df$score))) * (4)) + 1
|
|
40
|
+
|
|
41
|
+
if (is.null(cell_cols)) {
|
|
42
|
+
cell_cols <- setNames(paletteMartin(n = length(unique(c(input_df$source, input_df$target)))), unique(c(input_df$source, input_df$target)))
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
link_cols <- c()
|
|
46
|
+
for (i in input_df$source_lig) {
|
|
47
|
+
link_cols <- c(link_cols, cell_cols[str_extract(i, "[^|]+")])
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
segments <- unique(c(paste0(input_df$source, "|", input_df$ligand), paste0(input_df$target, "|", input_df$receptor)))
|
|
51
|
+
grp <- str_extract(segments, "[^|]+")
|
|
52
|
+
names(grp) <- segments
|
|
53
|
+
lgd <- Legend(
|
|
54
|
+
labels = unique(c(input_df$source, input_df$target)),
|
|
55
|
+
title = "Cell type",
|
|
56
|
+
type = "points",
|
|
57
|
+
title_gp = gpar(fontsize = 14 * cex),
|
|
58
|
+
labels_gp = gpar(fontsize = 12 * cex),
|
|
59
|
+
legend_gp = gpar(col = "transparent"),
|
|
60
|
+
background = cell_cols[unique(c(input_df$source, input_df$target))]
|
|
61
|
+
)
|
|
62
|
+
circlize_plot <- function() {
|
|
63
|
+
par(cex = cex)
|
|
64
|
+
chordDiagram(
|
|
65
|
+
input_df %>%
|
|
66
|
+
select(source_lig, target_rec, score),
|
|
67
|
+
directional = 1, group = grp, link.sort = FALSE, scale = scale, diffHeight = 0.005,
|
|
68
|
+
direction.type = c("arrows"), link.arr.type = "triangle", annotationTrack = c(),
|
|
69
|
+
preAllocateTracks = list(list(track.height = 0.175), list(track.height = 0.05)),
|
|
70
|
+
big.gap = 3, transparency = 1, link.arr.lwd = arr_wd, link.arr.col = link_cols,
|
|
71
|
+
link.arr.length = 0.4, link.arr.width = 0.35, ...
|
|
72
|
+
)
|
|
73
|
+
circos.track(track.index = 1, panel.fun = function(x, y) {
|
|
74
|
+
circos.text(CELL_META$xcenter, CELL_META$ylim[1], str_extract(CELL_META$sector.index, "[^|]+$"),
|
|
75
|
+
facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1.3
|
|
76
|
+
)
|
|
77
|
+
}, bg.border = NA)
|
|
78
|
+
for (l in unique(str_extract(segments, "[^|]+"))) {
|
|
79
|
+
highlight.sector(segments[str_detect(segments, paste0("^", str_escape(l)))], track.index = 2, col = cell_cols[l])
|
|
80
|
+
}
|
|
81
|
+
if (show_legend == TRUE) {
|
|
82
|
+
draw(lgd, just = c("left", "bottom"), x = unit(5, "mm"), y = unit(5, "mm"))
|
|
83
|
+
}
|
|
84
|
+
circos.clear()
|
|
85
|
+
}
|
|
86
|
+
} else if (option == "C") {
|
|
87
|
+
stopifnot("'exp_df' must be a dataframe" = is(exp_df, "data.frame"))
|
|
88
|
+
stopifnot("exp_df should contain columns named cell_type, gene and mean_exp. See `toy_exp` for an example." = all(c('cell_type', 'gene', 'mean_exp') %in% colnames(exp_df)))
|
|
89
|
+
|
|
90
|
+
input_df <- cc_df %>%
|
|
91
|
+
slice_max(order_by = score, n = n_top_ints) %>%
|
|
92
|
+
mutate(
|
|
93
|
+
source_lig = paste0(source, "|", ligand),
|
|
94
|
+
target_rec = paste0(target, "|", receptor)
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
arr_wd <- (((input_df$score - min(input_df$score)) / (max(input_df$score) - min(input_df$score))) * (4)) + 1
|
|
98
|
+
|
|
99
|
+
if (is.null(cell_cols)) {
|
|
100
|
+
cell_cols <- setNames(paletteMartin(n = length(unique(c(input_df$source, input_df$target)))), unique(c(input_df$source, input_df$target)))
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
segments <- unique(c(paste0(input_df$source, "|", input_df$ligand), paste0(input_df$target, "|", input_df$receptor)))
|
|
104
|
+
grp <- str_extract(segments, "[^|]+")
|
|
105
|
+
names(grp) <- segments
|
|
106
|
+
|
|
107
|
+
gene_df <- as.data.frame(exp_df %>% mutate(cell_gene = paste0(cell_type, "|", gene)) %>% filter(cell_gene %in% segments))
|
|
108
|
+
rownames(gene_df) <- gene_df$cell_gene
|
|
109
|
+
|
|
110
|
+
brks <- scales::pretty_breaks(n = 5)(c(floor(min(gene_df$mean_exp)), ceiling(max(gene_df$mean_exp))))
|
|
111
|
+
gene_col_fun <- colorRamp2(brks, RColorBrewer::brewer.pal(length(brks), palette))
|
|
112
|
+
|
|
113
|
+
inner.cols <- setNames(gene_col_fun(gene_df[segments, "mean_exp"]), segments)
|
|
114
|
+
lgd1 <- Legend(
|
|
115
|
+
labels = unique(c(input_df$source, input_df$target)),
|
|
116
|
+
title = "Cell type",
|
|
117
|
+
type = "points",
|
|
118
|
+
title_gp = gpar(fontsize = 14 * cex),
|
|
119
|
+
labels_gp = gpar(fontsize = 12 * cex),
|
|
120
|
+
legend_gp = gpar(col = "transparent"),
|
|
121
|
+
background = cell_cols[unique(c(input_df$source, input_df$target))],
|
|
122
|
+
direction = "horizontal"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
lgd2 <- Legend(
|
|
126
|
+
title_gp = gpar(fontsize = 14 * cex),
|
|
127
|
+
labels_gp = gpar(fontsize = 12 * cex),
|
|
128
|
+
direction = "horizontal", at = brks,
|
|
129
|
+
col_fun = gene_col_fun, title = "Mean exp."
|
|
130
|
+
)
|
|
131
|
+
circlize_plot <- function() {
|
|
132
|
+
par(cex = cex)
|
|
133
|
+
chordDiagram(
|
|
134
|
+
input_df %>%
|
|
135
|
+
select(source_lig, target_rec, score),
|
|
136
|
+
directional = 1, group = grp, link.sort = FALSE, diffHeight = 0.005, scale = scale,
|
|
137
|
+
direction.type = c("arrows"), link.arr.type = "triangle", annotationTrack = c(),
|
|
138
|
+
preAllocateTracks = list(list(track.height = 0.175), list(track.height = 0.05), list(track.height = 0.045)),
|
|
139
|
+
big.gap = 3, transparency = 1, link.arr.lwd = arr_wd, link.arr.col = "black", link.arr.length = 0.4, link.arr.width = 0.35, ...
|
|
140
|
+
)
|
|
141
|
+
circos.track(track.index = 1, panel.fun = function(x, y) {
|
|
142
|
+
circos.text(CELL_META$xcenter, CELL_META$ylim[1], str_extract(CELL_META$sector.index, "[^|]+$"),
|
|
143
|
+
facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1.3
|
|
144
|
+
)
|
|
145
|
+
}, bg.border = NA)
|
|
146
|
+
for (l in unique(str_extract(segments, "[^|]+"))) {
|
|
147
|
+
highlight.sector(segments[str_detect(segments, paste0("^", str_escape(l)))], track.index = 2, col = cell_cols[l])
|
|
148
|
+
}
|
|
149
|
+
circos.track(track.index = 3, panel.fun = function(x, y) {
|
|
150
|
+
circos.rect(CELL_META$xlim[1], CELL_META$ylim[1], CELL_META$xlim[2], CELL_META$ylim[2],
|
|
151
|
+
sector.index = CELL_META$sector.index, col = inner.cols[CELL_META$sector.index]
|
|
152
|
+
)
|
|
153
|
+
}, bg.border = NA)
|
|
154
|
+
if (show_legend == TRUE) {
|
|
155
|
+
draw(packLegend(lgd1, lgd2, direction = "vertical"), just = c("left", "bottom"), x = unit(4.75, "mm"), y = unit(4.75, "mm"))
|
|
156
|
+
}
|
|
157
|
+
circos.clear()
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
circlize_plot()
|
|
161
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from biopipen.utils.misc import run_command, logger
|
|
3
|
+
import numpy as np
|
|
4
|
+
import scanpy
|
|
5
|
+
import liana
|
|
6
|
+
import liana.method.sc._liana_pipe as _liana_pipe
|
|
7
|
+
|
|
8
|
+
# monkey-patch liana.method.sc._liana_pipe._trimean due to the updates by scipy 1.14
|
|
9
|
+
# https://github.com/scipy/scipy/commit/a660202652deead0f3b4b688eb9fdcdf9f74066c
|
|
10
|
+
def _trimean(a, axis=0):
|
|
11
|
+
try:
|
|
12
|
+
arr = a.A
|
|
13
|
+
except AttributeError:
|
|
14
|
+
arr = a.toarray()
|
|
15
|
+
|
|
16
|
+
quantiles = np.quantile(arr, q=[0.25, 0.75], axis=axis)
|
|
17
|
+
median = np.median(arr, axis=axis)
|
|
18
|
+
return (quantiles[0] + 2 * median + quantiles[1]) / 4
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_liana_pipe._trimean = _trimean
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
sobjfile = Path({{in.sobjfile | repr}}) # pyright: ignore # noqa: E999
|
|
25
|
+
outfile = Path({{out.outfile | repr}}) # pyright: ignore
|
|
26
|
+
envs = {{envs | repr}} # pyright: ignore
|
|
27
|
+
|
|
28
|
+
method = envs.pop("method")
|
|
29
|
+
assay = envs.pop("assay")
|
|
30
|
+
ncores = envs.pop("ncores")
|
|
31
|
+
species = envs.pop("species")
|
|
32
|
+
rscript = envs.pop("rscript")
|
|
33
|
+
|
|
34
|
+
if sobjfile.suffix.lower() == ".rds" or sobjfile.suffix.lower() == ".h5seurat":
|
|
35
|
+
annfile = outfile.parent / f"{sobjfile.stem}.h5ad"
|
|
36
|
+
r_script_convert_to_anndata = f"""
|
|
37
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
38
|
+
{{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
|
|
39
|
+
|
|
40
|
+
seurat_to_anndata(
|
|
41
|
+
"{sobjfile}",
|
|
42
|
+
"{annfile}",
|
|
43
|
+
assay = {{ envs.assay | r }},
|
|
44
|
+
log_info = log_info
|
|
45
|
+
)
|
|
46
|
+
"""
|
|
47
|
+
run_command([rscript, "-e", r_script_convert_to_anndata], fg=True)
|
|
48
|
+
|
|
49
|
+
sobjfile = annfile
|
|
50
|
+
|
|
51
|
+
logger.info("Reading the h5ad file ...")
|
|
52
|
+
adata = scanpy.read_h5ad(sobjfile)
|
|
53
|
+
|
|
54
|
+
method = method.lower()
|
|
55
|
+
if method == "log2fc":
|
|
56
|
+
method_fun = liana.mt.logfc
|
|
57
|
+
else:
|
|
58
|
+
method_fun = getattr(liana.mt, method)
|
|
59
|
+
|
|
60
|
+
logger.info(f"Running {method} ...")
|
|
61
|
+
envs["adata"] = adata
|
|
62
|
+
envs["resource_name"] = "consensus" if species == "human" else "mouseconsensus"
|
|
63
|
+
envs["n_jobs"] = ncores
|
|
64
|
+
envs["inplace"] = True
|
|
65
|
+
envs["verbose"] = True
|
|
66
|
+
envs["key_added"] = "liana_ccc"
|
|
67
|
+
method_fun(**envs)
|
|
68
|
+
|
|
69
|
+
res = adata.uns['liana_ccc']
|
|
70
|
+
|
|
71
|
+
mag_score_names = {
|
|
72
|
+
"cellphonedb": "lr_means",
|
|
73
|
+
"connectome": "expr_prod",
|
|
74
|
+
"log2fc": None,
|
|
75
|
+
"natmi": "expr_prod",
|
|
76
|
+
"singlecellsignalr": "lrscore",
|
|
77
|
+
"rank_aggregation": "magnitude_rank",
|
|
78
|
+
"geometric_mean": "lr_gmeans",
|
|
79
|
+
"scseqcomm": "inter_score",
|
|
80
|
+
"cellchat": "lr_probs",
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
spec_score_names = {
|
|
84
|
+
"cellphonedb": "cellphone_pvals",
|
|
85
|
+
"connectome": "scaled_weight",
|
|
86
|
+
"log2fc": "lr_logfc",
|
|
87
|
+
"natmi": "spec_weight",
|
|
88
|
+
"singlecellsignalr": None,
|
|
89
|
+
"rank_aggregation": "specificity_rank",
|
|
90
|
+
"geometric_mean": "gmean_pvals",
|
|
91
|
+
"scseqcomm": None,
|
|
92
|
+
"cellchat": "cellchat_pvals",
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if mag_score_names[method] is not None:
|
|
96
|
+
res['mag_score'] = res[mag_score_names[method]]
|
|
97
|
+
if spec_score_names[method] is not None:
|
|
98
|
+
res['spec_score'] = res[spec_score_names[method]]
|
|
99
|
+
|
|
100
|
+
logger.info("Saving the result ...")
|
|
101
|
+
res.to_csv(outfile, sep="\t", index=False)
|