biopipen 0.29.2__py3-none-any.whl → 0.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +2 -0
- biopipen/core/filters.py +21 -0
- biopipen/ns/plot.py +55 -0
- biopipen/ns/scrna.py +49 -13
- biopipen/ns/web.py +87 -5
- biopipen/scripts/bam/CNAClinic.R +2 -1
- biopipen/scripts/cellranger/CellRangerCount.py +3 -3
- biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
- biopipen/scripts/cnv/AneuploidyScore.R +1 -1
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +3 -2
- biopipen/scripts/gene/GeneNameConversion.R +2 -2
- biopipen/scripts/gsea/Enrichr.R +3 -3
- biopipen/scripts/gsea/FGSEA.R +2 -2
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +2 -1
- biopipen/scripts/plot/QQPlot.R +1 -1
- biopipen/scripts/plot/ROC.R +1 -1
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +3 -3
- biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
- biopipen/scripts/rnaseq/Simulation.R +1 -1
- biopipen/scripts/rnaseq/UnitConversion.R +2 -1
- biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
- biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
- biopipen/scripts/scrna/CellsDistribution.R +3 -2
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
- biopipen/scripts/scrna/MarkersFinder.R +5 -5
- biopipen/scripts/scrna/MetaMarkers.R +4 -4
- biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +4 -3
- biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -5
- biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
- biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
- biopipen/scripts/scrna/SeuratClustering.R +10 -170
- biopipen/scripts/scrna/SeuratMap2Ref.R +65 -31
- biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
- biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
- biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
- biopipen/scripts/scrna/SeuratPreparing.R +22 -562
- biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
- biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +2 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +2 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
- biopipen/scripts/snp/MatrixEQTL.R +1 -1
- biopipen/scripts/snp/PlinkCallRate.R +2 -2
- biopipen/scripts/snp/PlinkFreq.R +2 -2
- biopipen/scripts/snp/PlinkHWE.R +2 -2
- biopipen/scripts/snp/PlinkHet.R +2 -2
- biopipen/scripts/snp/PlinkIBD.R +2 -2
- biopipen/scripts/stats/ChowTest.R +1 -1
- biopipen/scripts/stats/DiffCoexpr.R +1 -1
- biopipen/scripts/stats/LiquidAssoc.R +1 -1
- biopipen/scripts/stats/Mediation.R +11 -9
- biopipen/scripts/stats/MetaPvalue.R +4 -1
- biopipen/scripts/stats/MetaPvalue1.R +4 -1
- biopipen/scripts/tcr/Attach2Seurat.R +1 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
- biopipen/scripts/tcr/CloneResidency.R +2 -2
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch-basic.R +0 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
- biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
- biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
- biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
- biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
- biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
- biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
- biopipen/scripts/tcr/Immunarch.R +43 -11
- biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
- biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +2 -2
- biopipen/scripts/tcr/TESSA.R +2 -2
- biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/METADATA +1 -1
- {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/RECORD +105 -96
- {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/WHEEL +0 -0
- {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# Loaded variables: srtfile, outdir, srtobj
|
|
2
2
|
|
|
3
|
-
hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
|
|
4
|
-
hists <- {{envs.hists | r: todot="-", skip=1}}
|
|
3
|
+
# hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
|
|
4
|
+
# hists <- {{envs.hists | r: todot="-", skip=1}}
|
|
5
|
+
log_info("hists:")
|
|
5
6
|
|
|
6
7
|
do_one_hists <- function(m, case, odir, h1, each = NULL) {
|
|
7
8
|
ofile <- file.path(odir, paste0(slugify(h1), ifelse(is.null(each), "", paste0("-", slugify(each))), ".png"))
|
|
@@ -57,7 +58,7 @@ do_one_hists <- function(m, case, odir, h1, each = NULL) {
|
|
|
57
58
|
}
|
|
58
59
|
|
|
59
60
|
if (is.null(hists) || length(hists) == 0) {
|
|
60
|
-
log_warn("
|
|
61
|
+
log_warn("- no cases specified, skipping ...")
|
|
61
62
|
} else {
|
|
62
63
|
|
|
63
64
|
for (name in names(hists)) {
|
|
@@ -112,12 +113,12 @@ if (is.null(hists) || length(hists) == 0) {
|
|
|
112
113
|
h1 = h1
|
|
113
114
|
)
|
|
114
115
|
for (each in eachs) {
|
|
115
|
-
log_info("
|
|
116
|
+
log_info("- Case: {h1} - {each} ...")
|
|
116
117
|
m <- meta %>% filter(!!sym(case$each) == each)
|
|
117
118
|
do_one_hists(m, case, odir, h1, each)
|
|
118
119
|
}
|
|
119
120
|
} else {
|
|
120
|
-
log_info("
|
|
121
|
+
log_info("- Case: {h1} ...")
|
|
121
122
|
add_report(
|
|
122
123
|
list(
|
|
123
124
|
kind = "descr",
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
# Loaded variables: srtfile, outdir, srtobj
|
|
2
2
|
|
|
3
|
-
ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
|
|
4
|
-
ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
|
|
3
|
+
# ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
|
|
4
|
+
# ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
|
|
5
|
+
log_info("ngenes:")
|
|
5
6
|
|
|
6
7
|
odir <- file.path(outdir, "ngenes")
|
|
7
8
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
8
9
|
|
|
9
10
|
do_one_ngenes <- function(name) {
|
|
10
|
-
log_info("
|
|
11
|
+
log_info("- Case: {name}")
|
|
11
12
|
|
|
12
13
|
case <- list_update(ngenes_defaults, ngenes[[name]])
|
|
13
14
|
case$devpars <- list_update(ngenes_defaults$devpars, case$devpars)
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
# Loaded variables: srtfile, outdir, srtobj
|
|
2
2
|
library(circlize)
|
|
3
3
|
|
|
4
|
-
stats_defaults = {{envs.stats_defaults | r: todot="-"}}
|
|
5
|
-
stats = {{envs.stats | r: todot="-", skip=1}}
|
|
4
|
+
# stats_defaults = {{envs.stats_defaults | r: todot="-"}}
|
|
5
|
+
# stats = {{envs.stats | r: todot="-", skip=1}}
|
|
6
|
+
log_info("stats:")
|
|
6
7
|
|
|
7
8
|
odir = file.path(outdir, "stats")
|
|
8
9
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
9
10
|
|
|
10
11
|
do_one_stats = function(name) {
|
|
11
|
-
log_info("
|
|
12
|
+
log_info("- Case: {name}")
|
|
12
13
|
|
|
13
14
|
case = list_update(stats_defaults, stats[[name]])
|
|
14
15
|
case$devpars = list_update(stats_defaults$devpars, case$devpars)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
3
|
+
{{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
|
|
4
|
+
|
|
4
5
|
library(Seurat)
|
|
5
6
|
library(rlang)
|
|
6
7
|
library(dplyr)
|
|
@@ -26,19 +27,34 @@ if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
|
26
27
|
mutate(!!!lapply(mutaters, parse_expr))
|
|
27
28
|
}
|
|
28
29
|
|
|
30
|
+
############## clustree ##############
|
|
31
|
+
clustrees_defaults <- {{envs.clustrees_defaults | r}}
|
|
32
|
+
clustrees <- {{envs.clustrees | r}}
|
|
33
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-clustree.R" | source_r }}
|
|
34
|
+
|
|
29
35
|
############## stats ##############
|
|
30
|
-
{
|
|
36
|
+
stats_defaults = {{envs.stats_defaults | r: todot="-"}}
|
|
37
|
+
stats = {{envs.stats | r: todot="-", skip=1}}
|
|
38
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-stats.R" | source_r }}
|
|
31
39
|
|
|
32
40
|
############## hists ##############
|
|
33
|
-
{
|
|
41
|
+
hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
|
|
42
|
+
hists <- {{envs.hists | r: todot="-", skip=1}}
|
|
43
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-hists.R" | source_r }}
|
|
34
44
|
|
|
35
45
|
############## ngenes ##############
|
|
36
|
-
{
|
|
46
|
+
ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
|
|
47
|
+
ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
|
|
48
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-ngenes.R" | source_r }}
|
|
37
49
|
|
|
38
50
|
############## features ##############
|
|
39
|
-
{
|
|
51
|
+
features_defaults = {{envs.features_defaults | r: todot="-"}}
|
|
52
|
+
features = {{envs.features | r: todot="-", skip=1}}
|
|
53
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-features.R" | source_r }}
|
|
40
54
|
|
|
41
55
|
############## dimplots ##############
|
|
42
|
-
{
|
|
56
|
+
dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
|
|
57
|
+
dimplots = {{envs.dimplots | r: todot="-", skip=1}}
|
|
58
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-dimplots.R" | source_r }}
|
|
43
59
|
|
|
44
60
|
save_report(joboutdir)
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
|
|
2
|
+
expand_dims <- function(args, name = "dims") {
|
|
3
|
+
# Expand dims from 30 to 1:30
|
|
4
|
+
if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
|
|
5
|
+
args[[name]] <- 1:args[[name]]
|
|
6
|
+
}
|
|
7
|
+
args
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
expand_resolution <- function(resolution) {
|
|
11
|
+
expanded_res <- c()
|
|
12
|
+
for (res in resolution) {
|
|
13
|
+
if (is.numeric(res)) {
|
|
14
|
+
expanded_res <- c(expanded_res, res)
|
|
15
|
+
} else {
|
|
16
|
+
# is.character
|
|
17
|
+
parts <- trimws(unlist(strsplit(res, ",")))
|
|
18
|
+
for (part in parts) {
|
|
19
|
+
if (grepl(":", part)) {
|
|
20
|
+
ps <- trimws(unlist(strsplit(part, ":")))
|
|
21
|
+
if (length(ps) == 2) { ps <- c(ps, 0.1) }
|
|
22
|
+
if (length(ps) != 3) {
|
|
23
|
+
stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
|
|
24
|
+
}
|
|
25
|
+
ps <- as.numeric(ps)
|
|
26
|
+
expanded_res <- c(expanded_res, seq(ps[1], ps[2], by = ps[3]))
|
|
27
|
+
} else {
|
|
28
|
+
expanded_res <- c(expanded_res, as.numeric(part))
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
# keep the last resolution at last
|
|
34
|
+
rev(unique(rev(round(expanded_res, 2))))
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
|
|
38
|
+
recode_clusters <- function(clusters) {
|
|
39
|
+
recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
|
|
40
|
+
clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
|
|
41
|
+
clusters
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
run_transformation <- function(sobj) {
|
|
45
|
+
if (length(envs$ScaleData) == 0 && length(envs$SCTransform) == 0) {
|
|
46
|
+
log_warn("Skipping ScaleData/SCTransform (neither specified) ...")
|
|
47
|
+
return(sobj)
|
|
48
|
+
}
|
|
49
|
+
if (length(envs$ScaleData) > 0 && length(envs$SCTransform) > 0) {
|
|
50
|
+
stop("Both envs.ScaleData and envs.SCTransform are specified. Please choose either.")
|
|
51
|
+
}
|
|
52
|
+
if (length(envs$ScaleData) > 0) {
|
|
53
|
+
if (DefaultAssay(sobj) == "SCT") {
|
|
54
|
+
stop("SCT assay detected, but envs.ScaleData is specified. Use envs.SCTransform instead.")
|
|
55
|
+
}
|
|
56
|
+
cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
|
|
57
|
+
if (is.null(cached$data)) {
|
|
58
|
+
log_info("Running ScaleData ...")
|
|
59
|
+
sobj <- do_call(ScaleData, c(list(object = sobj), envs$ScaleData))
|
|
60
|
+
cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
|
|
61
|
+
save_to_cache(cached, "ScaleData", cache_dir)
|
|
62
|
+
} else {
|
|
63
|
+
log_info("Loading cached ScaleData ...")
|
|
64
|
+
sobj@assays$RNA <- cached$data$assay
|
|
65
|
+
sobj@commands <- cached$data$commands
|
|
66
|
+
DefaultAssay(sobj) <- "RNA"
|
|
67
|
+
}
|
|
68
|
+
} else if (length(envs$SCTransform) > 0) {
|
|
69
|
+
if (DefaultAssay(sobj) != "SCT") {
|
|
70
|
+
stop("SCT assay not detected, but envs.SCTransform is specified. Use envs.ScaleData instead.")
|
|
71
|
+
}
|
|
72
|
+
cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
|
|
73
|
+
asssay <- envs$SCTransform$new.assay.name %||% "SCT"
|
|
74
|
+
if (is.null(cached$data)) {
|
|
75
|
+
log_info("Running SCTransform ...")
|
|
76
|
+
sobj <- do_call(SCTransform, c(list(object = sobj), envs$SCTransform))
|
|
77
|
+
cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
|
|
78
|
+
save_to_cache(cached, "SCTransform", cache_dir)
|
|
79
|
+
} else {
|
|
80
|
+
log_info("Loading cached SCTransform ...")
|
|
81
|
+
sobj@assays[[assay]] <- cached$data$assay
|
|
82
|
+
sobj@commands <- cached$data$commands
|
|
83
|
+
DefaultAssay(sobj) <- assay
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
sobj
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
run_umap <- function(sobj) {
|
|
90
|
+
cached <- get_cached(
|
|
91
|
+
list(sobj = sobj, RunUMAP = envs$RunUMAP),
|
|
92
|
+
"RunUMAP",
|
|
93
|
+
cache_dir
|
|
94
|
+
)
|
|
95
|
+
reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
|
|
96
|
+
if (is.null(cached$data)) {
|
|
97
|
+
log_info("Running RunUMAP ...")
|
|
98
|
+
umap_args <- list_setdefault(
|
|
99
|
+
envs$RunUMAP,
|
|
100
|
+
object = sobj,
|
|
101
|
+
dims = 1:30,
|
|
102
|
+
reduction = sobj@misc$integrated_new_reduction %||% "pca"
|
|
103
|
+
)
|
|
104
|
+
ncells <- ncol(sobj)
|
|
105
|
+
umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
|
|
106
|
+
umap_method <- envs$RunUMAP$umap.method %||% "uwot"
|
|
107
|
+
if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
|
|
108
|
+
# https://github.com/satijalab/seurat/issues/4312
|
|
109
|
+
umap_args$n.neighbors <- min(ncells - 1, 30)
|
|
110
|
+
}
|
|
111
|
+
sobj <- do_call(RunUMAP, umap_args)
|
|
112
|
+
cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
|
|
113
|
+
save_to_cache(cached, "RunUMAP", cache_dir)
|
|
114
|
+
} else {
|
|
115
|
+
log_info("Loading cached RunUMAP ...")
|
|
116
|
+
sobj@reductions[[reduc_name]] <- cached$data$reduc
|
|
117
|
+
sobj@commands <- cached$data$commands
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
sobj
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
run_findneighbors <- function(sobj) {
|
|
124
|
+
cached <- get_cached(
|
|
125
|
+
list(sobj = sobj, FindNeighbors = envs$FindNeighbors),
|
|
126
|
+
"FindNeighbors",
|
|
127
|
+
cache_dir
|
|
128
|
+
)
|
|
129
|
+
if (is.null(cached$data)) {
|
|
130
|
+
log_info("Running FindNeighbors ...")
|
|
131
|
+
envs$FindNeighbors$object <- sobj
|
|
132
|
+
envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
133
|
+
sobj <- do_call(FindNeighbors, envs$FindNeighbors)
|
|
134
|
+
cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
|
|
135
|
+
save_to_cache(cached, "FindNeighbors", cache_dir)
|
|
136
|
+
} else {
|
|
137
|
+
log_info("Loading cached FindNeighbors ...")
|
|
138
|
+
sobj@graphs <- cached$data$graphs
|
|
139
|
+
sobj@commands <- cached$data$commands
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
sobj
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
run_findclusters <- function(sobj) {
|
|
146
|
+
cached <- get_cached(
|
|
147
|
+
list(sobj = sobj, FindClusters = envs$FindClusters),
|
|
148
|
+
"FindClusters",
|
|
149
|
+
cache_dir
|
|
150
|
+
)
|
|
151
|
+
if (is.null(cached$data)) {
|
|
152
|
+
findclusters_args <- envs$FindClusters
|
|
153
|
+
findclusters_args$random.seed <- findclusters_args$random.seed %||% 8525
|
|
154
|
+
resolution <- findclusters_args$resolution <- expand_resolution(findclusters_args$resolution %||% 0.8)
|
|
155
|
+
log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
|
|
156
|
+
|
|
157
|
+
findclusters_args$object <- sobj
|
|
158
|
+
findclusters_args$cluster.name <- paste0("seurat_clusters.", resolution)
|
|
159
|
+
sobj <- do_call(FindClusters, findclusters_args)
|
|
160
|
+
|
|
161
|
+
for (clname in findclusters_args$cluster.name) {
|
|
162
|
+
sobj@meta.data[[clname]] <- recode_clusters(sobj@meta.data[[clname]])
|
|
163
|
+
}
|
|
164
|
+
sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
|
|
165
|
+
Idents(sobj) <- "seurat_clusters"
|
|
166
|
+
|
|
167
|
+
ident_table <- table(Idents(sobj))
|
|
168
|
+
log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
|
|
169
|
+
print(ident_table)
|
|
170
|
+
cat("\n")
|
|
171
|
+
|
|
172
|
+
cached$data <- list(
|
|
173
|
+
clusters = sobj@meta.data[, c(findclusters_args$cluster.name, "seurat_clusters"), drop = FALSE],
|
|
174
|
+
commands = sobj@commands
|
|
175
|
+
)
|
|
176
|
+
save_to_cache(cached, "FindClusters", cache_dir)
|
|
177
|
+
} else {
|
|
178
|
+
log_info("Loading cached FindClusters ...")
|
|
179
|
+
|
|
180
|
+
sobj <- AddMetaData(sobj, metadata = cached$data$clusters)
|
|
181
|
+
Idents(sobj) <- "seurat_clusters"
|
|
182
|
+
sobj@commands <- cached$data$commands
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
sobj
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
run_prepsctfindmarkers <- function(sobj) {
|
|
189
|
+
if (DefaultAssay(sobj) == "SCT") {
|
|
190
|
+
cached <- get_cached(list(sobj = sobj), "PrepSCTFindMarkers", cache_dir)
|
|
191
|
+
if (is.null(cached$data)) {
|
|
192
|
+
# https://github.com/satijalab/seurat/issues/6968
|
|
193
|
+
log_info("Running PrepSCTFindMarkers ...")
|
|
194
|
+
sobj <- PrepSCTFindMarkers(sobj)
|
|
195
|
+
# compose a new SeuratCommand to record it to sobj@commands
|
|
196
|
+
scommand <- sobj@commands$FindClusters
|
|
197
|
+
scommand@name <- "PrepSCTFindMarkers"
|
|
198
|
+
scommand@time.stamp <- Sys.time()
|
|
199
|
+
scommand@assay.used <- "SCT"
|
|
200
|
+
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
201
|
+
scommand@params <- list()
|
|
202
|
+
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
203
|
+
|
|
204
|
+
cached$data <- sobj
|
|
205
|
+
save_to_cache(cached, "PrepSCTFindMarkers", cache_dir)
|
|
206
|
+
} else {
|
|
207
|
+
log_info("Loading cached PrepSCTFindMarkers ...")
|
|
208
|
+
sobj <- cached$data
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
sobj
|
|
213
|
+
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "caching.R" | source_r }}
|
|
3
3
|
|
|
4
4
|
library(Seurat)
|
|
5
5
|
library(future)
|
|
@@ -7,7 +7,6 @@ library(rlang)
|
|
|
7
7
|
library(tidyr)
|
|
8
8
|
library(dplyr)
|
|
9
9
|
library(digest)
|
|
10
|
-
library(clustree)
|
|
11
10
|
|
|
12
11
|
set.seed(8525)
|
|
13
12
|
|
|
@@ -24,16 +23,10 @@ options(str = strOptions(vec.len = 5, digits.d = 5))
|
|
|
24
23
|
options(future.globals.maxSize = 80000 * 1024^2)
|
|
25
24
|
plan(strategy = "multicore", workers = envs$ncores)
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
# Expand dims from 30 to 1:30
|
|
29
|
-
if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
|
|
30
|
-
args[[name]] <- 1:args[[name]]
|
|
31
|
-
}
|
|
32
|
-
args
|
|
33
|
-
}
|
|
26
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClustering-common.R" | source_r }}
|
|
34
27
|
|
|
35
|
-
envs$RunUMAP <-
|
|
36
|
-
envs$FindNeighbors <-
|
|
28
|
+
envs$RunUMAP <- expand_dims(envs$RunUMAP)
|
|
29
|
+
envs$FindNeighbors <- expand_dims(envs$FindNeighbors)
|
|
37
30
|
|
|
38
31
|
log_info("Reading Seurat object ...")
|
|
39
32
|
sobj <- readRDS(srtfile)
|
|
@@ -53,164 +46,11 @@ if (is.character(envs$cache)) {
|
|
|
53
46
|
writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
|
|
54
47
|
}
|
|
55
48
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
if (is.null(cached$data)) {
|
|
62
|
-
log_info("Running ScaleData ...")
|
|
63
|
-
envs$ScaleData$object <- sobj
|
|
64
|
-
sobj <- do_call(ScaleData, envs$ScaleData)
|
|
65
|
-
cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
|
|
66
|
-
save_to_cache(cached, "ScaleData", cache_dir)
|
|
67
|
-
} else {
|
|
68
|
-
log_info("Loading cached ScaleData ...")
|
|
69
|
-
sobj@assays$RNA <- cached$data$assay
|
|
70
|
-
sobj@commands <- cached$data$commands
|
|
71
|
-
DefaultAssay(sobj) <- "RNA"
|
|
72
|
-
}
|
|
73
|
-
} else if (length(envs$SCTransform) > 0) {
|
|
74
|
-
if (DefaultAssay(sobj) != "SCT") {
|
|
75
|
-
stop("SCT assay not detected, but SCTransform is specified. Use ScaleData instead.")
|
|
76
|
-
}
|
|
77
|
-
cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
|
|
78
|
-
asssay <- envs$SCTransform$new.assay.name %||% "SCT"
|
|
79
|
-
if (is.null(cached$data)) {
|
|
80
|
-
log_info("Running SCTransform ...")
|
|
81
|
-
envs$SCTransform$object <- sobj
|
|
82
|
-
sobj <- do_call(SCTransform, envs$SCTransform)
|
|
83
|
-
cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
|
|
84
|
-
save_to_cache(cached, "SCTransform", cache_dir)
|
|
85
|
-
} else {
|
|
86
|
-
log_info("Loading cached SCTransform ...")
|
|
87
|
-
sobj@assays[[assay]] <- cached$data$assay
|
|
88
|
-
sobj@commands <- cached$data$commands
|
|
89
|
-
DefaultAssay(sobj) <- assay
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
cached <- get_cached(envs$RunUMAP, "RunUMAP", cache_dir)
|
|
94
|
-
reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
|
|
95
|
-
if (is.null(cached$data)) {
|
|
96
|
-
log_info("Running RunUMAP ...")
|
|
97
|
-
umap_args <- list_setdefault(
|
|
98
|
-
envs$RunUMAP,
|
|
99
|
-
object = sobj,
|
|
100
|
-
dims = 1:30,
|
|
101
|
-
reduction = sobj@misc$integrated_new_reduction %||% "pca"
|
|
102
|
-
)
|
|
103
|
-
ncells <- ncol(sobj)
|
|
104
|
-
umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
|
|
105
|
-
umap_method <- envs$RunUMAP$umap.method %||% "uwot"
|
|
106
|
-
if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
|
|
107
|
-
# https://github.com/satijalab/seurat/issues/4312
|
|
108
|
-
umap_args$n.neighbors <- min(ncells - 1, 30)
|
|
109
|
-
}
|
|
110
|
-
sobj <- do_call(RunUMAP, umap_args)
|
|
111
|
-
cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
|
|
112
|
-
save_to_cache(cached, "RunUMAP", cache_dir)
|
|
113
|
-
} else {
|
|
114
|
-
log_info("Loading cached RunUMAP ...")
|
|
115
|
-
sobj@reductions[[reduc_name]] <- cached$data$reduc
|
|
116
|
-
sobj@commands <- cached$data$commands
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
cached <- get_cached(envs$FindNeighbors, "FindNeighbors", cache_dir)
|
|
120
|
-
if (is.null(cached$data)) {
|
|
121
|
-
log_info("Running FindNeighbors ...")
|
|
122
|
-
envs$FindNeighbors$object <- sobj
|
|
123
|
-
envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
124
|
-
sobj <- do_call(FindNeighbors, envs$FindNeighbors)
|
|
125
|
-
cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
|
|
126
|
-
save_to_cache(cached, "FindNeighbors", cache_dir)
|
|
127
|
-
} else {
|
|
128
|
-
log_info("Loading cached FindNeighbors ...")
|
|
129
|
-
sobj@graphs <- cached$data$graphs
|
|
130
|
-
sobj@commands <- cached$data$commands
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
envs$FindClusters$random.seed <- envs$FindClusters$random.seed %||% 8525
|
|
134
|
-
expand_resolution <- function(resolution) {
|
|
135
|
-
expanded_res <- c()
|
|
136
|
-
for (res in resolution) {
|
|
137
|
-
if (is.numeric(res)) {
|
|
138
|
-
expanded_res <- c(expanded_res, res)
|
|
139
|
-
} else {
|
|
140
|
-
# is.character
|
|
141
|
-
parts <- trimws(unlist(strsplit(res, ",")))
|
|
142
|
-
for (part in parts) {
|
|
143
|
-
if (grepl(":", part)) {
|
|
144
|
-
parts <- trimws(unlist(strsplit(part, ":")))
|
|
145
|
-
if (length(parts) == 2) { parts <- c(parts, 0.1) }
|
|
146
|
-
if (length(parts) != 3) {
|
|
147
|
-
stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
|
|
148
|
-
}
|
|
149
|
-
parts <- as.numeric(parts)
|
|
150
|
-
expanded_res <- c(expanded_res, seq(parts[1], parts[2], by = parts[3]))
|
|
151
|
-
} else {
|
|
152
|
-
expanded_res <- c(expanded_res, as.numeric(part))
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
# keep the last resolution at last
|
|
158
|
-
rev(unique(rev(expanded_res)))
|
|
159
|
-
}
|
|
160
|
-
resolution <- envs$FindClusters$resolution <- expand_resolution(envs$FindClusters$resolution %||% 0.8)
|
|
161
|
-
log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
|
|
162
|
-
|
|
163
|
-
envs$FindClusters$object <- sobj
|
|
164
|
-
sobj <- do_call(FindClusters, envs$FindClusters)
|
|
165
|
-
|
|
166
|
-
# recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
|
|
167
|
-
recode_clusters <- function(clusters) {
|
|
168
|
-
recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
|
|
169
|
-
clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
|
|
170
|
-
clusters
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
graph_name <- envs$FindClusters$graph.name %||% paste0(DefaultAssay(sobj), "_snn_res.")
|
|
174
|
-
for (res in resolution) {
|
|
175
|
-
cluster_name <- paste0(graph_name, res)
|
|
176
|
-
new_cluster_name <- paste0("seurat_clusters.", res)
|
|
177
|
-
sobj@meta.data[[new_cluster_name]] <- recode_clusters(sobj@meta.data[[cluster_name]])
|
|
178
|
-
}
|
|
179
|
-
sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
|
|
180
|
-
Idents(sobj) <- "seurat_clusters"
|
|
181
|
-
|
|
182
|
-
ident_table <- table(Idents(sobj))
|
|
183
|
-
log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
|
|
184
|
-
print(ident_table)
|
|
185
|
-
cat("\n")
|
|
186
|
-
|
|
187
|
-
# plot the tree
|
|
188
|
-
if (length(resolution) > 1) {
|
|
189
|
-
log_info("Plotting clustree ...")
|
|
190
|
-
png(
|
|
191
|
-
file.path(joboutdir, "clustree.png"),
|
|
192
|
-
res = envs$clustree_devpars$res,
|
|
193
|
-
width = envs$clustree_devpars$width,
|
|
194
|
-
height = envs$clustree_devpars$height
|
|
195
|
-
)
|
|
196
|
-
p <- clustree(sobj, prefix = "seurat_clusters.")
|
|
197
|
-
print(p)
|
|
198
|
-
dev.off()
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
if (DefaultAssay(sobj) == "SCT") {
|
|
202
|
-
# https://github.com/satijalab/seurat/issues/6968
|
|
203
|
-
log_info("Running PrepSCTFindMarkers ...")
|
|
204
|
-
sobj <- PrepSCTFindMarkers(sobj)
|
|
205
|
-
# compose a new SeuratCommand to record it to sobj@commands
|
|
206
|
-
scommand <- sobj@commands$FindClusters
|
|
207
|
-
scommand@name <- "PrepSCTFindMarkers"
|
|
208
|
-
scommand@time.stamp <- Sys.time()
|
|
209
|
-
scommand@assay.used <- "SCT"
|
|
210
|
-
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
211
|
-
scommand@params <- list()
|
|
212
|
-
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
213
|
-
}
|
|
49
|
+
sobj <- run_transformation(sobj)
|
|
50
|
+
sobj <- run_umap(sobj)
|
|
51
|
+
sobj <- run_findneighbors(sobj)
|
|
52
|
+
sobj <- run_findclusters(sobj)
|
|
53
|
+
sobj <- run_prepsctfindmarkers(sobj)
|
|
214
54
|
|
|
215
55
|
log_info("Saving results ...")
|
|
216
56
|
saveRDS(sobj, file = rdsfile)
|