biopipen 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +4 -0
- biopipen/core/filters.py +1 -1
- biopipen/core/testing.py +2 -1
- biopipen/ns/cellranger.py +33 -3
- biopipen/ns/regulatory.py +4 -0
- biopipen/ns/scrna.py +548 -98
- biopipen/ns/scrna_metabolic_landscape.py +4 -0
- biopipen/ns/tcr.py +256 -16
- biopipen/ns/web.py +5 -0
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +9 -9
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +9 -8
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +9 -9
- biopipen/reports/tcr/ClonalStats.svelte +1 -0
- biopipen/scripts/cellranger/CellRangerCount.py +55 -11
- biopipen/scripts/cellranger/CellRangerVdj.py +54 -8
- biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
- biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
- biopipen/scripts/regulatory/motifs-common.R +3 -2
- biopipen/scripts/scrna/AnnData2Seurat.R +2 -1
- biopipen/scripts/scrna/CellCellCommunication.py +26 -14
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +27 -36
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +42 -26
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +11 -13
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +5 -8
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +5 -8
- biopipen/scripts/scrna/CellTypeAnnotation.R +26 -3
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +128 -30
- biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
- biopipen/scripts/scrna/PseudoBulkDEG.R +113 -27
- biopipen/scripts/scrna/ScFGSEA.R +23 -26
- biopipen/scripts/scrna/ScVelo.py +20 -8
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -1
- biopipen/scripts/scrna/SeuratClustering.R +5 -1
- biopipen/scripts/scrna/SeuratMap2Ref.R +1 -2
- biopipen/scripts/scrna/SeuratPreparing.R +19 -11
- biopipen/scripts/scrna/SeuratSubClustering.R +1 -1
- biopipen/scripts/scrna/Slingshot.R +2 -4
- biopipen/scripts/scrna/TopExpressingGenes.R +1 -4
- biopipen/scripts/scrna/celltypist-wrapper.py +140 -4
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +18 -1
- biopipen/scripts/tcr/{TCRClustering.R → CDR3Clustering.R} +63 -23
- biopipen/scripts/tcr/ClonalStats.R +76 -35
- biopipen/utils/misc.py +104 -9
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/METADATA +5 -2
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/RECORD +55 -53
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- biopipen/utils/common_docstrs.py +0 -103
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +0 -0
|
@@ -1,15 +1,23 @@
|
|
|
1
1
|
library(Seurat)
|
|
2
|
+
library(rlang)
|
|
3
|
+
library(dplyr)
|
|
4
|
+
library(tidyseurat)
|
|
2
5
|
|
|
3
6
|
sobjfile <- {{in.sobjfile | r}}
|
|
4
7
|
outfile <- {{out.outfile | r}}
|
|
5
8
|
celltypes <- {{envs.cell_types | r}}
|
|
6
9
|
newcol <- {{envs.newcol | r}}
|
|
10
|
+
ident <- {{envs.ident | r }}
|
|
7
11
|
merge_same_labels <- {{envs.merge | r}}
|
|
12
|
+
more_cell_types <- {{envs.more_cell_types | r}}
|
|
8
13
|
|
|
9
14
|
log <- biopipen.utils::get_logger()
|
|
10
15
|
|
|
11
16
|
if (is.null(celltypes) || length(celltypes) == 0) {
|
|
12
17
|
log$warn("No cell types are given!")
|
|
18
|
+
if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
|
|
19
|
+
log$warn("`envs.celltypes` is not given, won't process `envs.more_cell_types`!")
|
|
20
|
+
}
|
|
13
21
|
|
|
14
22
|
if (merge_same_labels) {
|
|
15
23
|
log$warn("Ignoring 'envs.merge' because no cell types are given!")
|
|
@@ -19,45 +27,52 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
19
27
|
} else {
|
|
20
28
|
log$info("Loading Seurat object ...")
|
|
21
29
|
sobj <- biopipen.utils::read_obj(sobjfile)
|
|
30
|
+
ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
|
|
31
|
+
Idents(sobj) <- ident
|
|
22
32
|
idents <- Idents(sobj)
|
|
23
33
|
if (is.factor(idents)) {
|
|
24
34
|
idents <- levels(idents)
|
|
25
35
|
} else {
|
|
26
36
|
idents <- as.character(unique(idents))
|
|
27
37
|
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
38
|
+
process_celltypes <- function(ct, key = NULL) {
|
|
39
|
+
if (length(ct) < length(idents)) {
|
|
40
|
+
ct <- c(ct, idents[(length(ct) + 1):length(idents)])
|
|
41
|
+
} else if (length(ct) > length(idents)) {
|
|
42
|
+
ct <- ct[1:length(idents)]
|
|
43
|
+
if (is.null(key)) {
|
|
44
|
+
log$warn("The length of cell types is longer than the number of clusters!")
|
|
45
|
+
} else {
|
|
46
|
+
log$warn(paste0("The length of cell types for '", key, "' is longer than the number of clusters!"))
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
for (i in seq_along(ct)) {
|
|
50
|
+
if (ct[i] == "-" || ct[i] == "") {
|
|
51
|
+
ct[i] <- idents[i]
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
names(ct) <- idents
|
|
55
|
+
return(ct)
|
|
34
56
|
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
57
|
+
|
|
58
|
+
if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
|
|
59
|
+
for (key in names(more_cell_types)) {
|
|
60
|
+
ct <- more_cell_types[[key]]
|
|
61
|
+
ct <- process_celltypes(ct, key)
|
|
62
|
+
log$info(paste0("Adding additional cell type annotation: '", key, "' ..."))
|
|
63
|
+
sobj@meta.data[[key]] <- ct[as.character(Idents(sobj))]
|
|
38
64
|
}
|
|
39
65
|
}
|
|
40
|
-
|
|
66
|
+
|
|
67
|
+
celltypes <- process_celltypes(celltypes)
|
|
41
68
|
|
|
42
69
|
log$info("Renaming cell types ...")
|
|
43
70
|
if (is.null(newcol)) {
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
sobj <- do_call(RenameIdents, celltypes)
|
|
48
|
-
sobj$seurat_clusters <- Idents(sobj)
|
|
49
|
-
if (has_na) {
|
|
50
|
-
log$info("Filtering clusters if NA ...")
|
|
51
|
-
sobj <- subset(
|
|
52
|
-
sobj,
|
|
53
|
-
subset = seurat_clusters != "NA" & !is.na(seurat_clusters)
|
|
54
|
-
)
|
|
55
|
-
}
|
|
71
|
+
sobj <- rename_idents(sobj, ident, celltypes)
|
|
72
|
+
log$info("Filtering clusters if NA ...")
|
|
73
|
+
sobj <- filter(sobj, !!sym(ident) != "NA" & !is.na(!!sym(ident)))
|
|
56
74
|
} else {
|
|
57
|
-
|
|
58
|
-
sobj <- do_call(RenameIdents, celltypes)
|
|
59
|
-
sobj[[newcol]] <- Idents(sobj)
|
|
60
|
-
Idents(sobj) <- "seurat_clusters"
|
|
75
|
+
sobj[[newcol]] <- celltypes[as.character(Idents(sobj))]
|
|
61
76
|
}
|
|
62
77
|
|
|
63
78
|
if (merge_same_labels) {
|
|
@@ -65,5 +80,6 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
65
80
|
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
66
81
|
}
|
|
67
82
|
|
|
83
|
+
log$info("Saving Seurat object ...")
|
|
68
84
|
biopipen.utils::save_obj(sobj, outfile)
|
|
69
85
|
}
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
library(
|
|
1
|
+
library(rlang)
|
|
2
2
|
library(dplyr)
|
|
3
3
|
library(hitype)
|
|
4
|
-
library(biopipen.utils)
|
|
5
4
|
|
|
6
5
|
sobjfile = {{in.sobjfile | r}}
|
|
7
6
|
outfile = {{out.outfile | r}}
|
|
8
7
|
tissue = {{envs.hitype_tissue | r}}
|
|
9
8
|
db = {{envs.hitype_db | r}}
|
|
10
9
|
newcol = {{envs.newcol | r}}
|
|
10
|
+
ident = {{envs.ident | r }}
|
|
11
11
|
merge_same_labels = {{envs.merge | r}}
|
|
12
12
|
|
|
13
13
|
if (is.null(db)) { stop("`envs.hitype_db` is not set") }
|
|
@@ -16,6 +16,8 @@ log <- get_logger()
|
|
|
16
16
|
|
|
17
17
|
log$info("Reading Seurat object...")
|
|
18
18
|
sobj = biopipen.utils::read_obj(sobjfile)
|
|
19
|
+
ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
|
|
20
|
+
Idents(sobj) <- ident
|
|
19
21
|
|
|
20
22
|
# prepare gene sets
|
|
21
23
|
log$info("Preparing gene sets...")
|
|
@@ -30,18 +32,14 @@ log$info("Running RunHitype...")
|
|
|
30
32
|
sobj = RunHitype(sobj, gs_list, threshold = 0.0, make_unique = TRUE)
|
|
31
33
|
|
|
32
34
|
log$info("Renaming cell types...")
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
arrange(as.numeric(seurat_clusters)) %>%
|
|
37
|
-
pull("hitype")
|
|
35
|
+
hitype_labels <- sobj@meta.data %>%
|
|
36
|
+
distinct(!!sym(ident), hitype)
|
|
37
|
+
hitype_labels <- split(hitype_labels$hitype, hitype_labels[[ident]])
|
|
38
38
|
|
|
39
39
|
if (is.null(newcol)) {
|
|
40
|
-
sobj
|
|
41
|
-
sobj$seurat_clusters = factor(sobj$hitype, levels = hitype_levels)
|
|
42
|
-
Idents(sobj) = "seurat_clusters"
|
|
40
|
+
sobj <- rename_idents(sobj, ident, hitype_labels)
|
|
43
41
|
} else {
|
|
44
|
-
sobj[[newcol]] =
|
|
42
|
+
sobj[[newcol]] = sobj$hitype
|
|
45
43
|
}
|
|
46
44
|
|
|
47
45
|
if (merge_same_labels) {
|
|
@@ -55,9 +53,9 @@ biopipen.utils::save_obj(sobj, outfile)
|
|
|
55
53
|
log$info("Saving the mappings ...")
|
|
56
54
|
if (is.null(newcol)) {
|
|
57
55
|
celltypes = sobj@meta.data %>%
|
|
58
|
-
group_by(
|
|
56
|
+
group_by(!!sym(backup_col)) %>%
|
|
59
57
|
summarize(CellType = hitype[1]) %>%
|
|
60
|
-
select(Cluster =
|
|
58
|
+
select(Cluster = !!sym(backup_col), CellType) %>%
|
|
61
59
|
ungroup()
|
|
62
60
|
} else {
|
|
63
61
|
celltypes = sobj@meta.data %>%
|
|
@@ -6,6 +6,7 @@ sobjfile = {{in.sobjfile | r}}
|
|
|
6
6
|
outfile = {{out.outfile | r}}
|
|
7
7
|
sccatch_args = {{envs.sccatch_args | r}}
|
|
8
8
|
newcol = {{envs.newcol | r}}
|
|
9
|
+
ident = {{envs.ident | r }}
|
|
9
10
|
merge_same_labels = {{envs.merge | r}}
|
|
10
11
|
|
|
11
12
|
log <- get_logger()
|
|
@@ -22,6 +23,8 @@ if (is.integer(sccatch_args$use_method)) {
|
|
|
22
23
|
|
|
23
24
|
log$info("Reading Seurat object...")
|
|
24
25
|
sobj = read_obj(sobjfile)
|
|
26
|
+
ident <- ident %||% GetIdentityColumn(sobj)
|
|
27
|
+
Idents(sobj) <- ident
|
|
25
28
|
|
|
26
29
|
log$info("Running createscCATCH ...")
|
|
27
30
|
obj = createscCATCH(data = GetAssayData(sobj), cluster = as.character(Idents(sobj)))
|
|
@@ -48,15 +51,9 @@ if (length(celltypes) == 0) {
|
|
|
48
51
|
log$warn("- No cell types annotated from the database!")
|
|
49
52
|
} else {
|
|
50
53
|
if (is.null(newcol)) {
|
|
51
|
-
sobj
|
|
52
|
-
celltypes$object = sobj
|
|
53
|
-
sobj = do_call(RenameIdents, celltypes)
|
|
54
|
-
sobj$seurat_clusters = Idents(sobj)
|
|
54
|
+
sobj <- rename_idents(sobj, ident, celltypes)
|
|
55
55
|
} else {
|
|
56
|
-
|
|
57
|
-
sobj = do_call(RenameIdents, celltypes)
|
|
58
|
-
sobj[[newcol]] = Idents(sobj)
|
|
59
|
-
Idents(sobj) = "seurat_clusters"
|
|
56
|
+
sobj@meta.data[[newcol]] = celltypes[as.character(Idents(sobj))]
|
|
60
57
|
}
|
|
61
58
|
|
|
62
59
|
if (merge_same_labels) {
|
|
@@ -11,6 +11,7 @@ outfile = {{out.outfile | r}}
|
|
|
11
11
|
tissue = {{envs.sctype_tissue | r}}
|
|
12
12
|
db = {{envs.sctype_db | r}}
|
|
13
13
|
newcol = {{envs.newcol | r}}
|
|
14
|
+
ident = {{envs.ident | r }}
|
|
14
15
|
merge_same_labels = {{envs.merge | r}}
|
|
15
16
|
|
|
16
17
|
if (is.null(db)) { stop("`envs.sctype_args.db` is not set") }
|
|
@@ -19,6 +20,8 @@ log <- get_logger()
|
|
|
19
20
|
|
|
20
21
|
log$info("Reading Seurat object...")
|
|
21
22
|
sobj = biopipen.utils::read_obj(sobjfile)
|
|
23
|
+
ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
|
|
24
|
+
Idents(sobj) <- ident
|
|
22
25
|
|
|
23
26
|
# prepare gene sets
|
|
24
27
|
log$info("Preparing gene sets...")
|
|
@@ -116,15 +119,9 @@ for (key in names(celltypes)) {
|
|
|
116
119
|
|
|
117
120
|
celltypes = as.list(celltypes)
|
|
118
121
|
if (is.null(newcol)) {
|
|
119
|
-
sobj
|
|
120
|
-
celltypes$object = sobj
|
|
121
|
-
sobj = do_call(RenameIdents, celltypes)
|
|
122
|
-
sobj$seurat_clusters = Idents(sobj)
|
|
122
|
+
sobj <- rename_idents(sobj, ident, celltypes)
|
|
123
123
|
} else {
|
|
124
|
-
|
|
125
|
-
sobj = do_call(RenameIdents, celltypes)
|
|
126
|
-
sobj[[newcol]] = Idents(sobj)
|
|
127
|
-
Idents(sobj) = "seurat_clusters"
|
|
124
|
+
sobj@meta.data[[newcol]] = celltypes[as.character(Idents(sobj))]
|
|
128
125
|
}
|
|
129
126
|
celltypes$object = NULL
|
|
130
127
|
gc()
|
|
@@ -1,9 +1,15 @@
|
|
|
1
|
+
library(Seurat)
|
|
2
|
+
library(biopipen.utils)
|
|
1
3
|
set.seed(8525)
|
|
2
4
|
|
|
3
|
-
|
|
5
|
+
backup_col <- {{envs.backup_col | r}}
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
merge_clusters_with_same_labels <- function(sobj, newcol = NULL) {
|
|
4
9
|
if (is.null(newcol)) {
|
|
5
|
-
|
|
6
|
-
|
|
10
|
+
newcol <- biopipen.utils::GetIdentityColumn(sobj)
|
|
11
|
+
sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
|
|
12
|
+
Idents(sobj) <- newcol
|
|
7
13
|
} else {
|
|
8
14
|
sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
|
|
9
15
|
}
|
|
@@ -11,6 +17,23 @@ merge_clusters_with_same_labels <- function(sobj, newcol) {
|
|
|
11
17
|
sobj
|
|
12
18
|
}
|
|
13
19
|
|
|
20
|
+
rename_idents <- function(sobj, ident_col, mapping) {
|
|
21
|
+
orig_ident_col <- biopipen.utils::GetIdentityColumn(sobj)
|
|
22
|
+
if (!identical(ident_col, orig_ident_col)) {
|
|
23
|
+
Idents(sobj) <- ident_col
|
|
24
|
+
mapping$object <- sobj
|
|
25
|
+
sobj <- do_call(RenameIdents, mapping)
|
|
26
|
+
} else {
|
|
27
|
+
if (!is.null(backup_col)) {
|
|
28
|
+
sobj@meta.data[[backup_col]] <- Idents(sobj)
|
|
29
|
+
}
|
|
30
|
+
mapping$object <- sobj
|
|
31
|
+
sobj <- do_call(RenameIdents, mapping)
|
|
32
|
+
}
|
|
33
|
+
sobj@meta.data[[ident_col]] <- Idents(sobj)
|
|
34
|
+
sobj
|
|
35
|
+
}
|
|
36
|
+
|
|
14
37
|
{% if envs.tool == "hitype" %}
|
|
15
38
|
{% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-hitype.R" %}
|
|
16
39
|
{% elif envs.tool == "sctype" %}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from contextlib import suppress
|
|
5
|
+
from biopipen.core.filters import dict_to_cli_args
|
|
6
|
+
from biopipen.utils.misc import run_command
|
|
7
|
+
|
|
8
|
+
cellsnpout = {{in.cellsnpout | quote}} # noqa: E999 # pyright: ignore
|
|
9
|
+
outdir = {{out.outdir | quote}} # pyright: ignore
|
|
10
|
+
envs: dict = {{envs | repr}} # pyright: ignore
|
|
11
|
+
mquad = envs.pop("mquad")
|
|
12
|
+
ncores = envs.pop("ncores")
|
|
13
|
+
seed = envs.pop("seed", 8525)
|
|
14
|
+
|
|
15
|
+
with suppress(RuntimeError):
|
|
16
|
+
run_command([mquad], fg=True)
|
|
17
|
+
print("")
|
|
18
|
+
|
|
19
|
+
envs["cellData"] = cellsnpout
|
|
20
|
+
envs["outDir"] = outdir
|
|
21
|
+
envs["randSeed"] = seed
|
|
22
|
+
envs["nproc"] = ncores
|
|
23
|
+
|
|
24
|
+
cmd = [mquad, *dict_to_cli_args(envs, sep="=")]
|
|
25
|
+
run_command(cmd, fg=True, bufsize=1)
|
|
@@ -42,15 +42,12 @@ if (isTRUE(cache)) { cache <- joboutdir }
|
|
|
42
42
|
|
|
43
43
|
set.seed(8525)
|
|
44
44
|
if (ncores > 1) {
|
|
45
|
-
options(future.globals.maxSize =
|
|
45
|
+
options(future.globals.maxSize = Inf)
|
|
46
46
|
plan(strategy = "multicore", workers = ncores)
|
|
47
47
|
}
|
|
48
48
|
|
|
49
49
|
log$info("Reading Seurat object ...")
|
|
50
50
|
srtobj <- read_obj(srtfile)
|
|
51
|
-
if (!"Identity" %in% colnames(srtobj@meta.data)) {
|
|
52
|
-
srtobj@meta.data$Identity <- Idents(srtobj)
|
|
53
|
-
}
|
|
54
51
|
|
|
55
52
|
|
|
56
53
|
if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
@@ -105,7 +102,7 @@ log$info("Expanding cases ...")
|
|
|
105
102
|
post_casing <- function(name, case) {
|
|
106
103
|
outcases <- list()
|
|
107
104
|
|
|
108
|
-
case$group_by <- case$group_by %||%
|
|
105
|
+
case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
|
|
109
106
|
|
|
110
107
|
if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
|
|
111
108
|
# single cases, no need to expand
|
|
@@ -165,10 +162,12 @@ post_casing <- function(name, case) {
|
|
|
165
162
|
|
|
166
163
|
if (length(cases) == 0 && name == "Marker Discovery") {
|
|
167
164
|
name <- case$each
|
|
165
|
+
} else {
|
|
166
|
+
name <- paste0(name, " (", case$each, ")")
|
|
168
167
|
}
|
|
169
168
|
|
|
170
169
|
for (each in eachs) {
|
|
171
|
-
newname <- paste0(name, "
|
|
170
|
+
newname <- paste0(name, "::", each)
|
|
172
171
|
newcase <- case
|
|
173
172
|
|
|
174
173
|
newcase$original_case <- name
|
|
@@ -266,20 +265,23 @@ process_markers <- function(markers, info, case) {
|
|
|
266
265
|
ui = "tabs"
|
|
267
266
|
)
|
|
268
267
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
268
|
+
if (nrow(markers) > 0) {
|
|
269
|
+
for (plotname in names(case$marker_plots)) {
|
|
270
|
+
plotargs <- case$marker_plots[[plotname]]
|
|
271
|
+
plotargs$markers <- markers
|
|
272
|
+
plotargs$object <- case$object
|
|
273
|
+
plotargs$comparison_by <- case$group_by
|
|
274
|
+
plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
|
|
275
|
+
do_call(VizDEGs, plotargs)
|
|
276
|
+
reporter$add2(
|
|
277
|
+
list(
|
|
278
|
+
name = plotname,
|
|
279
|
+
contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
|
|
280
|
+
hs = c(info$section, info$name),
|
|
281
|
+
hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
|
|
282
|
+
ui = "tabs"
|
|
283
|
+
)
|
|
284
|
+
}
|
|
283
285
|
}
|
|
284
286
|
|
|
285
287
|
# Do enrichment analysis
|
|
@@ -349,16 +351,29 @@ process_markers <- function(markers, info, case) {
|
|
|
349
351
|
for (db in case$dbs) {
|
|
350
352
|
plots <- list()
|
|
351
353
|
for (plotname in names(case$enrich_plots)) {
|
|
352
|
-
plotargs <- case$enrich_plots[[plotname]]
|
|
354
|
+
plotargs <- extract_vars(case$enrich_plots[[plotname]], "descr", allow_nonexisting = TRUE)
|
|
353
355
|
plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
|
|
354
356
|
|
|
355
|
-
p <-
|
|
357
|
+
p <- tryCatch(
|
|
358
|
+
do_call(VizEnrichment, plotargs),
|
|
359
|
+
error = function(e) {
|
|
360
|
+
stop("Failed to plot enrichment for database '", db, "' with plot '", plotname, "': ", e$message)
|
|
361
|
+
}
|
|
362
|
+
)
|
|
356
363
|
|
|
357
364
|
if (plotargs$plot_type == "bar") {
|
|
358
365
|
attr(p, "height") <- attr(p, "height") / 1.5
|
|
366
|
+
descr <- descr %||% glue::glue(
|
|
367
|
+
"The bar plot shows the top enriched terms in database '{db}', ",
|
|
368
|
+
"the x-axis shows the -log10 of the adjusted p-values, ",
|
|
369
|
+
"and the y-axis shows the term names. The number next to each bar indicates the overlap gene count."
|
|
370
|
+
)
|
|
359
371
|
}
|
|
360
372
|
outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
|
|
361
373
|
save_plot(p, outprefix, plotargs$devpars, formats = "png")
|
|
374
|
+
if (!is.null(descr)) {
|
|
375
|
+
plots[[length(plots) + 1]] <- list(kind = "descr", content = glue::glue(descr))
|
|
376
|
+
}
|
|
362
377
|
plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
|
|
363
378
|
}
|
|
364
379
|
reporter$add2(
|
|
@@ -389,13 +404,18 @@ process_markers <- function(markers, info, case) {
|
|
|
389
404
|
}
|
|
390
405
|
}
|
|
391
406
|
|
|
392
|
-
process_allmarkers <- function(markers, plotcases, casename, groupname) {
|
|
407
|
+
process_allmarkers <- function(markers, object, comparison_by, plotcases, casename, groupname, subset_by_group = TRUE) {
|
|
393
408
|
name <- paste0(casename, "::", paste0(groupname, " (All Markers)"))
|
|
394
409
|
info <- case_info(name, outdir, create = TRUE)
|
|
395
410
|
|
|
396
411
|
for (plotname in names(plotcases)) {
|
|
412
|
+
log$info(" {plotname} ...")
|
|
397
413
|
plotargs <- plotcases[[plotname]]
|
|
398
|
-
plotargs$
|
|
414
|
+
plotargs$markers <- markers
|
|
415
|
+
plotargs$object <- object
|
|
416
|
+
plotargs$comparison_by <- comparison_by
|
|
417
|
+
if (subset_by_group)
|
|
418
|
+
plotargs$subset_by <- groupname
|
|
399
419
|
plotargs$outprefix <- file.path(info$prefix, slugify(plotname))
|
|
400
420
|
do_call(VizDEGs, plotargs)
|
|
401
421
|
reporter$add2(
|
|
@@ -417,6 +437,7 @@ process_allenriches <- function(enriches, plotcases, casename, groupname) {
|
|
|
417
437
|
for (db in dbs) {
|
|
418
438
|
plots <- list()
|
|
419
439
|
for (plotname in names(plotcases)) {
|
|
440
|
+
log$info(" {plotname} ({db}) ...")
|
|
420
441
|
plotargs <- plotcases[[plotname]]
|
|
421
442
|
plotargs <- extract_vars(plotargs, "devpars")
|
|
422
443
|
plotargs$data <- enriches[enriches$Database == db, , drop = FALSE]
|
|
@@ -449,6 +470,7 @@ process_overlaps <- function(markers, ovcases, casename, groupname) {
|
|
|
449
470
|
info <- case_info(name, outdir, create = TRUE)
|
|
450
471
|
|
|
451
472
|
for (plotname in names(ovcases)) {
|
|
473
|
+
log$info(" {plotname} ...")
|
|
452
474
|
args <- extract_vars(
|
|
453
475
|
ovcases[[plotname]],
|
|
454
476
|
sigm = "sigmarkers", "more_formats", "save_code", "devpars", "plot_type",
|
|
@@ -513,6 +535,7 @@ run_case <- function(name) {
|
|
|
513
535
|
case,
|
|
514
536
|
"dbs", "sigmarkers", "allmarker_plots", "allenrich_plots", "marker_plots", "enrich_plots",
|
|
515
537
|
"overlaps", "original_case", "markers", "enriches", "each_name", "each", "enrich_style", "original_subset",
|
|
538
|
+
subset_ = "subset",
|
|
516
539
|
allow_nonexisting = TRUE
|
|
517
540
|
)
|
|
518
541
|
|
|
@@ -545,7 +568,16 @@ run_case <- function(name) {
|
|
|
545
568
|
attr(markers, "group_by") <- each
|
|
546
569
|
attr(markers, "ident_1") <- NULL
|
|
547
570
|
attr(markers, "ident_2") <- NULL
|
|
548
|
-
|
|
571
|
+
if (!is.null(markers) && nrow(markers) > 0) {
|
|
572
|
+
process_allmarkers(
|
|
573
|
+
markers,
|
|
574
|
+
object = if (is.null(original_subset)) srtobj else filter(srtobj, !!parse_expr(original_subset)),
|
|
575
|
+
comparison_by = group_by,
|
|
576
|
+
allmarker_plots,
|
|
577
|
+
name,
|
|
578
|
+
each
|
|
579
|
+
)
|
|
580
|
+
}
|
|
549
581
|
}
|
|
550
582
|
|
|
551
583
|
if (length(overlaps) > 0) {
|
|
@@ -555,7 +587,7 @@ run_case <- function(name) {
|
|
|
555
587
|
|
|
556
588
|
}
|
|
557
589
|
|
|
558
|
-
if (!is.null(enriches)) {
|
|
590
|
+
if (!is.null(enriches) && length(enriches) > 0) {
|
|
559
591
|
log$info("- Summarizing enrichments in subcases (by each: {each}) ...")
|
|
560
592
|
if (!is.data.frame(enriches)) {
|
|
561
593
|
each_levels <- names(enriches)
|
|
@@ -571,8 +603,30 @@ run_case <- function(name) {
|
|
|
571
603
|
enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
|
|
572
604
|
}
|
|
573
605
|
|
|
574
|
-
if (length(allenrich_plots) > 0) {
|
|
606
|
+
if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
|
|
575
607
|
log$info("- Visualizing all enrichments together ...")
|
|
608
|
+
# add other metadata columns if any by mapping groupname
|
|
609
|
+
# only add the metadata columns from object if there is a single value mapped
|
|
610
|
+
metacols <- srtobj@meta.data %>% group_by(!!sym(each)) %>%
|
|
611
|
+
summarize(across(everything(), ~ n_distinct(.) == 1), .groups = "keep") %>%
|
|
612
|
+
select(where(~ all(. == TRUE))) %>%
|
|
613
|
+
colnames()
|
|
614
|
+
|
|
615
|
+
if (length(metacols) > 1) {
|
|
616
|
+
metadf <- srtobj@meta.data[, metacols, drop = FALSE] %>%
|
|
617
|
+
distinct(!!sym(each), .keep_all = TRUE)
|
|
618
|
+
|
|
619
|
+
for (col in setdiff(metacols, each)) {
|
|
620
|
+
if (col %in% colnames(enriches)) {
|
|
621
|
+
warning("Column name conflict: {col}, adding with suffix '_meta'", immediate. = TRUE)
|
|
622
|
+
metadf[[paste0(col, "_meta")]] <- metadf[[col]]
|
|
623
|
+
metadf[[col]] <- NULL
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
enriches <- left_join(enriches, metadf, by = each)
|
|
628
|
+
}
|
|
629
|
+
|
|
576
630
|
process_allenriches(enriches, allenrich_plots, name, each)
|
|
577
631
|
}
|
|
578
632
|
}
|
|
@@ -580,11 +634,15 @@ run_case <- function(name) {
|
|
|
580
634
|
return(invisible())
|
|
581
635
|
}
|
|
582
636
|
|
|
637
|
+
# Let RunSeuratDEAnalysis handle the subset
|
|
638
|
+
case$subset <- subset_
|
|
583
639
|
case$object <- srtobj
|
|
584
640
|
markers <- do_call(RunSeuratDEAnalysis, case)
|
|
585
|
-
case$object <- NULL
|
|
641
|
+
case$object <- NULL # Release memory
|
|
586
642
|
gc()
|
|
587
643
|
|
|
644
|
+
subobj <- if (is.null(subset_)) srtobj else filter(srtobj, !!parse_expr(subset_))
|
|
645
|
+
|
|
588
646
|
if (is.null(case$ident_1)) {
|
|
589
647
|
all_idents <- unique(as.character(markers[[case$group_by]]))
|
|
590
648
|
enriches <- list()
|
|
@@ -596,7 +654,9 @@ run_case <- function(name) {
|
|
|
596
654
|
|
|
597
655
|
attr(ident_markers, "ident_1") <- ident
|
|
598
656
|
enrich <- process_markers(ident_markers, info = info, case = list(
|
|
657
|
+
object = subobj,
|
|
599
658
|
dbs = dbs,
|
|
659
|
+
group_by = case$group_by,
|
|
600
660
|
sigmarkers = sigmarkers,
|
|
601
661
|
enrich_style = enrich_style,
|
|
602
662
|
marker_plots = marker_plots,
|
|
@@ -609,7 +669,14 @@ run_case <- function(name) {
|
|
|
609
669
|
|
|
610
670
|
if (length(allmarker_plots) > 0) {
|
|
611
671
|
log$info("- Visualizing all markers together ...")
|
|
612
|
-
process_allmarkers(
|
|
672
|
+
process_allmarkers(
|
|
673
|
+
markers,
|
|
674
|
+
object = subobj,
|
|
675
|
+
comparison_by = case$group_by,
|
|
676
|
+
plotcases = allmarker_plots,
|
|
677
|
+
casename = name,
|
|
678
|
+
groupname = case$group_by,
|
|
679
|
+
subset_by_group = FALSE)
|
|
613
680
|
}
|
|
614
681
|
|
|
615
682
|
if (length(overlaps) > 0) {
|
|
@@ -619,12 +686,41 @@ run_case <- function(name) {
|
|
|
619
686
|
|
|
620
687
|
if (length(allenrich_plots) > 0) {
|
|
621
688
|
log$info("- Visualizing all enrichments together ...")
|
|
689
|
+
# add other metadata columns if any by mapping groupname
|
|
690
|
+
# only add the metadata columns from object if there is a single value mapped
|
|
691
|
+
metacols <- subobj@meta.data %>% group_by(!!sym(case$group_by)) %>%
|
|
692
|
+
summarize(across(everything(), ~ n_distinct(.) == 1), .groups = "keep") %>%
|
|
693
|
+
select(where(~ all(. == TRUE))) %>%
|
|
694
|
+
colnames()
|
|
695
|
+
|
|
696
|
+
if (length(metacols) > 1) {
|
|
697
|
+
metadf <- subobj@meta.data[, metacols, drop = FALSE] %>%
|
|
698
|
+
distinct(!!sym(case$group_by), .keep_all = TRUE)
|
|
699
|
+
|
|
700
|
+
for (col in setdiff(metacols, case$group_by)) {
|
|
701
|
+
if (col %in% colnames(enriches[[1]])) {
|
|
702
|
+
warning("Column name conflict: {col}, adding with suffix '_meta'", immediate. = TRUE)
|
|
703
|
+
metadf[[paste0(col, "_meta")]] <- metadf[[col]]
|
|
704
|
+
metadf[[col]] <- NULL
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
for (ne in names(enriches)) {
|
|
709
|
+
if (!case$group_by %in% colnames(enriches[[ne]])) {
|
|
710
|
+
enriches[[ne]][[case$group_by]] <- ne
|
|
711
|
+
}
|
|
712
|
+
enriches[[ne]] <- left_join(enriches[[ne]], metadf, by = case$group_by)
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
enriches <- do_call(rbind, enriches)
|
|
622
716
|
process_allenriches(enriches, allenrich_plots, name, case$group_by)
|
|
623
717
|
}
|
|
624
718
|
} else {
|
|
625
719
|
info <- case_info(name, outdir, create = TRUE)
|
|
626
720
|
enrich <- process_markers(markers, info = info, case = list(
|
|
721
|
+
object = subobj,
|
|
627
722
|
dbs = dbs,
|
|
723
|
+
group_by = case$group_by,
|
|
628
724
|
sigmarkers = sigmarkers,
|
|
629
725
|
enrich_style = enrich_style,
|
|
630
726
|
marker_plots = marker_plots,
|
|
@@ -634,7 +730,9 @@ run_case <- function(name) {
|
|
|
634
730
|
))
|
|
635
731
|
|
|
636
732
|
if (!is.null(original_case) && !is.null(cases[[original_case]])) {
|
|
637
|
-
markers
|
|
733
|
+
if (nrow(markers) > 0) {
|
|
734
|
+
markers[[each_name]] <- each
|
|
735
|
+
}
|
|
638
736
|
cases[[original_case]]$markers[[each]] <<- markers
|
|
639
737
|
cases[[original_case]]$enriches[[each]] <<- enrich
|
|
640
738
|
}
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
library(
|
|
1
|
+
library(rlang)
|
|
2
2
|
library(dplyr)
|
|
3
|
+
library(Seurat)
|
|
3
4
|
library(biopipen.utils)
|
|
4
5
|
|
|
5
6
|
sobjfile <- {{in.srtobj | r}}
|
|
6
7
|
outfile <- {{out.rdsfile | r}}
|
|
7
8
|
defaults <- {{envs.defaults | r}}
|
|
8
9
|
modules <- {{envs.modules | r}}
|
|
10
|
+
post_mutaters <- {{envs.post_mutaters | r}}
|
|
9
11
|
|
|
10
12
|
log <- get_logger()
|
|
11
13
|
|
|
@@ -134,6 +136,12 @@ for (key in names(modules)) {
|
|
|
134
136
|
}
|
|
135
137
|
}
|
|
136
138
|
|
|
139
|
+
if (!is.null(post_mutaters) && length(post_mutaters) > 0) {
|
|
140
|
+
log$info("Applying post mutaters ...")
|
|
141
|
+
sobj@meta.data <- sobj@meta.data %>%
|
|
142
|
+
mutate(!!!lapply(post_mutaters, parse_expr))
|
|
143
|
+
}
|
|
144
|
+
|
|
137
145
|
# save seurat object
|
|
138
146
|
log$info("Saving Seurat object ...")
|
|
139
147
|
save_obj(sobj, outfile)
|