biopipen 0.29.2__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +2 -0
- biopipen/core/filters.py +21 -0
- biopipen/ns/plot.py +55 -0
- biopipen/ns/scrna.py +110 -21
- biopipen/ns/web.py +87 -5
- biopipen/scripts/bam/CNAClinic.R +2 -1
- biopipen/scripts/cellranger/CellRangerCount.py +3 -3
- biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
- biopipen/scripts/cnv/AneuploidyScore.R +1 -1
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +3 -2
- biopipen/scripts/gene/GeneNameConversion.R +2 -2
- biopipen/scripts/gsea/Enrichr.R +3 -3
- biopipen/scripts/gsea/FGSEA.R +2 -2
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +2 -1
- biopipen/scripts/plot/QQPlot.R +1 -1
- biopipen/scripts/plot/ROC.R +1 -1
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +3 -3
- biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
- biopipen/scripts/rnaseq/Simulation.R +1 -1
- biopipen/scripts/rnaseq/UnitConversion.R +2 -1
- biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
- biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
- biopipen/scripts/scrna/CellsDistribution.R +4 -3
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
- biopipen/scripts/scrna/MarkersFinder.R +5 -5
- biopipen/scripts/scrna/MetaMarkers.R +4 -4
- biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +4 -3
- biopipen/scripts/scrna/ScSimulation.R +64 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -5
- biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +20 -25
- biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
- biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
- biopipen/scripts/scrna/SeuratClustering.R +10 -170
- biopipen/scripts/scrna/SeuratMap2Ref.R +98 -54
- biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
- biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
- biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
- biopipen/scripts/scrna/SeuratPreparing.R +22 -562
- biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
- biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +2 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +2 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
- biopipen/scripts/snp/MatrixEQTL.R +1 -1
- biopipen/scripts/snp/PlinkCallRate.R +2 -2
- biopipen/scripts/snp/PlinkFreq.R +2 -2
- biopipen/scripts/snp/PlinkHWE.R +2 -2
- biopipen/scripts/snp/PlinkHet.R +2 -2
- biopipen/scripts/snp/PlinkIBD.R +2 -2
- biopipen/scripts/stats/ChowTest.R +1 -1
- biopipen/scripts/stats/DiffCoexpr.R +1 -1
- biopipen/scripts/stats/LiquidAssoc.R +1 -1
- biopipen/scripts/stats/Mediation.R +11 -9
- biopipen/scripts/stats/MetaPvalue.R +4 -1
- biopipen/scripts/stats/MetaPvalue1.R +4 -1
- biopipen/scripts/tcr/Attach2Seurat.R +1 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
- biopipen/scripts/tcr/CloneResidency.R +2 -2
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch-basic.R +0 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
- biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
- biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
- biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
- biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
- biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
- biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
- biopipen/scripts/tcr/Immunarch.R +43 -11
- biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
- biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +2 -2
- biopipen/scripts/tcr/TESSA.R +2 -2
- biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/METADATA +7 -7
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/RECORD +106 -96
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/WHEEL +0 -0
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
|
|
3
|
+
library(rlang)
|
|
4
|
+
library(splatter)
|
|
5
|
+
library(scater)
|
|
6
|
+
|
|
7
|
+
# Load template variables
|
|
8
|
+
seed <- {{ in.seed | r }}
|
|
9
|
+
outfile <- {{ out.outfile | r }}
|
|
10
|
+
ngenes <- {{ envs.ngenes | r }}
|
|
11
|
+
ncells <- {{ envs.ncells | r }}
|
|
12
|
+
nspikes <- {{ envs.nspikes | r }}
|
|
13
|
+
outtype <- {{ envs.outtype | r }}
|
|
14
|
+
method <- {{ envs.method | r }}
|
|
15
|
+
user_params <- {{ envs.params | r: todot="-" }}
|
|
16
|
+
|
|
17
|
+
log_info("Generating simulation parameters ...")
|
|
18
|
+
|
|
19
|
+
seed <- seed %||% 1
|
|
20
|
+
if (length(seed) > 1) {
|
|
21
|
+
log_warn("- multiple seeds provided, using the first one")
|
|
22
|
+
seed <- seed[1]
|
|
23
|
+
}
|
|
24
|
+
if (is.character(seed)) {
|
|
25
|
+
library(digest)
|
|
26
|
+
proj <- seed
|
|
27
|
+
seed <- digest2int(seed)
|
|
28
|
+
} else {
|
|
29
|
+
proj <- paste0("S", seed)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
set.seed(seed)
|
|
33
|
+
mock_sce_params <- list()
|
|
34
|
+
if (!is.null(ngenes)) mock_sce_params$ngenes <- ngenes
|
|
35
|
+
if (!is.null(ncells)) mock_sce_params$ncells <- ncells
|
|
36
|
+
if (!is.null(nspikes)) mock_sce_params$nspikes <- nspikes
|
|
37
|
+
sce <- do.call(mockSCE, mock_sce_params)
|
|
38
|
+
params <- splatEstimate(sce)
|
|
39
|
+
user_params$seed <- seed
|
|
40
|
+
user_params$object = params
|
|
41
|
+
do_call(setParams, user_params)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
log_info("Saving simulation parameters to file ...")
|
|
45
|
+
|
|
46
|
+
sim <- splatSimulate(params, method = method, verbose = TRUE)
|
|
47
|
+
|
|
48
|
+
outtype <- tolower(outtype)
|
|
49
|
+
if (outtype == "sce") outtype <- "singlecellexperiment"
|
|
50
|
+
|
|
51
|
+
if (outtype == "singlecellexperiment") {
|
|
52
|
+
log_info("Saving simulation to file ...")
|
|
53
|
+
saveRDS(sim, file = outfile)
|
|
54
|
+
} else {
|
|
55
|
+
log_info("Converting simulation to Seurat object ...")
|
|
56
|
+
cnts <- SingleCellExperiment::counts(sim)
|
|
57
|
+
sobj <- Seurat::CreateSeuratObject(counts = cnts, project = proj)
|
|
58
|
+
rm(sim)
|
|
59
|
+
rm(cnts)
|
|
60
|
+
gc()
|
|
61
|
+
|
|
62
|
+
log_info("Saving simulation to file ...")
|
|
63
|
+
saveRDS(sobj, file = outfile)
|
|
64
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# srtobj, clustrees_defaults, clustrees
|
|
2
|
+
log_info("clustrees:")
|
|
3
|
+
if (
|
|
4
|
+
(is.null(clustrees) || length(clustrees) == 0) &&
|
|
5
|
+
(is.null(clustrees_defaults$prefix) || clustrees_defaults$prefix == "")) {
|
|
6
|
+
log_warn("- no cases, skipping intentionally ...")
|
|
7
|
+
} else { # clustrees set or prefix is not empty
|
|
8
|
+
library(clustree)
|
|
9
|
+
odir = file.path(outdir, "clustrees")
|
|
10
|
+
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
11
|
+
|
|
12
|
+
if ((is.null(clustrees) || length(clustrees) == 0) && clustrees_defaults$prefix == "_auto") {
|
|
13
|
+
clustrees <- list()
|
|
14
|
+
for (key in names(srtobj@commands)) {
|
|
15
|
+
if (startsWith(key, "FindClusters") && length(srtobj@commands[[key]]$resolution) > 1) {
|
|
16
|
+
pref <- substring(key, 14)
|
|
17
|
+
if (pref == "") {
|
|
18
|
+
pref <- "seurat_clusters"
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
clustrees[[pref]] <- list(prefix = pref)
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
if (length(clustrees) == 0) {
|
|
26
|
+
log_warn("- no cases found, skipping ...")
|
|
27
|
+
} else {
|
|
28
|
+
reports <- list()
|
|
29
|
+
for (name in names(clustrees)) {
|
|
30
|
+
if (is.null(clustrees[[name]]$prefix)) {
|
|
31
|
+
stop(paste0("clustrees: prefix is required for case: ", name))
|
|
32
|
+
}
|
|
33
|
+
case <- list_update(clustrees_defaults, clustrees[[name]])
|
|
34
|
+
|
|
35
|
+
devpars <- case$devpars
|
|
36
|
+
devpars$width <- devpars$width %||% clustrees_defaults$devpars$width %||% 800
|
|
37
|
+
devpars$height <- devpars$height %||% clustrees_defaults$devpars$height %||% 1000
|
|
38
|
+
devpars$res <- devpars$res %||% clustrees_defaults$devpars$res %||% 100
|
|
39
|
+
case$devpars <- NULL
|
|
40
|
+
prefix <- sub("\\.$", "", case$prefix)
|
|
41
|
+
log_info("- Case: {name} ...")
|
|
42
|
+
case$prefix <- paste0(prefix, ".")
|
|
43
|
+
case$x <- srtobj@meta.data %>% select(starts_with(case$prefix))
|
|
44
|
+
case$x <- case$x[complete.cases(case$x), , drop = FALSE]
|
|
45
|
+
|
|
46
|
+
command <- srtobj@commands[[paste0("FindClusters.", prefix)]] %||%
|
|
47
|
+
(if(prefix == "seurat_clusters") srtobj@commands$FindClusters else NULL)
|
|
48
|
+
|
|
49
|
+
clustree_file <- file.path(odir, paste0(prefix, ".clustree.png"))
|
|
50
|
+
png(clustree_file, width = devpars$width, height = devpars$height, res = devpars$res)
|
|
51
|
+
p <- do_call(clustree, case)
|
|
52
|
+
print(p)
|
|
53
|
+
dev.off()
|
|
54
|
+
|
|
55
|
+
if (is.null(command)) {
|
|
56
|
+
resolution <- substring(colnames(case$x), nchar(case$prefix) + 1)
|
|
57
|
+
} else {
|
|
58
|
+
resolution <- command$resolution
|
|
59
|
+
}
|
|
60
|
+
resolution_used <- resolution[length(resolution)]
|
|
61
|
+
|
|
62
|
+
reports[[length(reports) + 1]] <- list(
|
|
63
|
+
kind = "table_image",
|
|
64
|
+
src = clustree_file,
|
|
65
|
+
name = name,
|
|
66
|
+
descr = paste0("Resolutions: ", paste(resolution, collapse = ", "), "; resolution used: ", resolution_used)
|
|
67
|
+
)
|
|
68
|
+
}
|
|
69
|
+
reports$h1 <- "Clustree plots"
|
|
70
|
+
reports$ui <- "table_of_images"
|
|
71
|
+
do.call(add_report, reports)
|
|
72
|
+
}
|
|
73
|
+
}
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
# Loaded variables: srtfile, outdir, srtobj
|
|
2
2
|
|
|
3
|
-
dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
|
|
4
|
-
dimplots = {{envs.dimplots | r: todot="-", skip=1}}
|
|
3
|
+
# dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
|
|
4
|
+
# dimplots = {{envs.dimplots | r: todot="-", skip=1}}
|
|
5
|
+
log_info("dimplots:")
|
|
5
6
|
|
|
6
7
|
odir = file.path(outdir, "dimplots")
|
|
7
8
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
8
9
|
|
|
9
10
|
do_one_dimplot = function(name) {
|
|
10
|
-
log_info(
|
|
11
|
+
log_info("- Case: {name}")
|
|
11
12
|
|
|
12
13
|
case = list_update(dimplots_defaults, dimplots[[name]])
|
|
13
14
|
case$devpars = list_update(dimplots_defaults$devpars, dimplots[[name]]$devpars)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# Loaded variables: srtfile, outdir, srtobj
|
|
2
2
|
|
|
3
|
-
features_defaults = {{envs.features_defaults | r: todot="-"}}
|
|
4
|
-
features = {{envs.features | r: todot="-", skip=1}}
|
|
3
|
+
# features_defaults = {{envs.features_defaults | r: todot="-"}}
|
|
4
|
+
# features = {{envs.features | r: todot="-", skip=1}}
|
|
5
|
+
log_info("features:")
|
|
5
6
|
|
|
6
7
|
odir = file.path(outdir, "features")
|
|
7
8
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
@@ -36,7 +37,7 @@ dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
|
36
37
|
}
|
|
37
38
|
|
|
38
39
|
do_one_features = function(name) {
|
|
39
|
-
log_info("
|
|
40
|
+
log_info("- Case: {name}")
|
|
40
41
|
|
|
41
42
|
case = list_update(features_defaults, features[[name]])
|
|
42
43
|
case$devpars = list_update(features_defaults$devpars, features[[name]]$devpars)
|
|
@@ -105,7 +106,7 @@ do_one_features = function(name) {
|
|
|
105
106
|
if (is.null(ncol)) { ncol = 1 }
|
|
106
107
|
list(
|
|
107
108
|
width = 400 * ncol,
|
|
108
|
-
height = ceiling(length(features) / ncol
|
|
109
|
+
height = ceiling(length(features) / ncol) * (max_nchar_idents * .1 + 275),
|
|
109
110
|
res = 100
|
|
110
111
|
)
|
|
111
112
|
}
|
|
@@ -398,7 +399,7 @@ do_one_features = function(name) {
|
|
|
398
399
|
devpars = list_update(default_devpars(case$features, case$ncol), devpars)
|
|
399
400
|
if (kind == "heatmap") {
|
|
400
401
|
if (!exists("downsample") || is.null(downsample)) {
|
|
401
|
-
log_warn("
|
|
402
|
+
log_warn(" 'downsample' is not specified for `heatmap`, using `downsample=1000`")
|
|
402
403
|
downsample = 1000
|
|
403
404
|
}
|
|
404
405
|
if (is.numeric(downsample)) {
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# Loaded variables: srtfile, outdir, srtobj
|
|
2
2
|
|
|
3
|
-
hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
|
|
4
|
-
hists <- {{envs.hists | r: todot="-", skip=1}}
|
|
3
|
+
# hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
|
|
4
|
+
# hists <- {{envs.hists | r: todot="-", skip=1}}
|
|
5
|
+
log_info("hists:")
|
|
5
6
|
|
|
6
7
|
do_one_hists <- function(m, case, odir, h1, each = NULL) {
|
|
7
8
|
ofile <- file.path(odir, paste0(slugify(h1), ifelse(is.null(each), "", paste0("-", slugify(each))), ".png"))
|
|
@@ -57,7 +58,7 @@ do_one_hists <- function(m, case, odir, h1, each = NULL) {
|
|
|
57
58
|
}
|
|
58
59
|
|
|
59
60
|
if (is.null(hists) || length(hists) == 0) {
|
|
60
|
-
log_warn("
|
|
61
|
+
log_warn("- no cases specified, skipping ...")
|
|
61
62
|
} else {
|
|
62
63
|
|
|
63
64
|
for (name in names(hists)) {
|
|
@@ -112,12 +113,12 @@ if (is.null(hists) || length(hists) == 0) {
|
|
|
112
113
|
h1 = h1
|
|
113
114
|
)
|
|
114
115
|
for (each in eachs) {
|
|
115
|
-
log_info("
|
|
116
|
+
log_info("- Case: {h1} - {each} ...")
|
|
116
117
|
m <- meta %>% filter(!!sym(case$each) == each)
|
|
117
118
|
do_one_hists(m, case, odir, h1, each)
|
|
118
119
|
}
|
|
119
120
|
} else {
|
|
120
|
-
log_info("
|
|
121
|
+
log_info("- Case: {h1} ...")
|
|
121
122
|
add_report(
|
|
122
123
|
list(
|
|
123
124
|
kind = "descr",
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
# Loaded variables: srtfile, outdir, srtobj
|
|
2
2
|
|
|
3
|
-
ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
|
|
4
|
-
ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
|
|
3
|
+
# ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
|
|
4
|
+
# ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
|
|
5
|
+
log_info("ngenes:")
|
|
5
6
|
|
|
6
7
|
odir <- file.path(outdir, "ngenes")
|
|
7
8
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
8
9
|
|
|
9
10
|
do_one_ngenes <- function(name) {
|
|
10
|
-
log_info("
|
|
11
|
+
log_info("- Case: {name}")
|
|
11
12
|
|
|
12
13
|
case <- list_update(ngenes_defaults, ngenes[[name]])
|
|
13
14
|
case$devpars <- list_update(ngenes_defaults$devpars, case$devpars)
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
# Loaded variables: srtfile, outdir, srtobj
|
|
2
2
|
library(circlize)
|
|
3
3
|
|
|
4
|
-
stats_defaults = {{envs.stats_defaults | r: todot="-"}}
|
|
5
|
-
stats = {{envs.stats | r: todot="-", skip=1}}
|
|
4
|
+
# stats_defaults = {{envs.stats_defaults | r: todot="-"}}
|
|
5
|
+
# stats = {{envs.stats | r: todot="-", skip=1}}
|
|
6
|
+
log_info("stats:")
|
|
6
7
|
|
|
7
8
|
odir = file.path(outdir, "stats")
|
|
8
9
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
9
10
|
|
|
10
11
|
do_one_stats = function(name) {
|
|
11
|
-
log_info("
|
|
12
|
+
log_info("- Case: {name}")
|
|
12
13
|
|
|
13
14
|
case = list_update(stats_defaults, stats[[name]])
|
|
14
15
|
case$devpars = list_update(stats_defaults$devpars, case$devpars)
|
|
@@ -17,12 +18,6 @@ do_one_stats = function(name) {
|
|
|
17
18
|
if (isTRUE(case$pie) && !is.null(case$group.by)) {
|
|
18
19
|
stop(paste0(name, ": pie charts are not supported for group-by"))
|
|
19
20
|
}
|
|
20
|
-
if (!isTRUE(case$frac) && isTRUE(case$frac_ofall)) {
|
|
21
|
-
stop(paste0(name, ": frac_ofall is only supported when frac is true"))
|
|
22
|
-
}
|
|
23
|
-
if (isTRUE(case$frac_ofall) && is.null(case$group.by)) {
|
|
24
|
-
stop(paste0(name, ": frac_ofall is only supported for group-by"))
|
|
25
|
-
}
|
|
26
21
|
if (isTRUE(case$transpose) && is.null(case$group.by)) {
|
|
27
22
|
stop(paste0(name, ": transpose is only supported for group-by"))
|
|
28
23
|
}
|
|
@@ -45,28 +40,28 @@ do_one_stats = function(name) {
|
|
|
45
40
|
!!!syms(case$split.by)
|
|
46
41
|
), function(df) {
|
|
47
42
|
out <- df %>% group_by(!!!syms(select_cols)) %>% summarise(.n = n(), .groups = "drop")
|
|
48
|
-
if (!is.null(case$group.by) &&
|
|
49
|
-
if (
|
|
43
|
+
if (!is.null(case$group.by) && case$frac != "none") {
|
|
44
|
+
if (case$frac == "all") {
|
|
50
45
|
out <- out %>% mutate(.frac = .n / sum(.n))
|
|
51
|
-
} else if (
|
|
52
|
-
out <- out %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
|
|
53
|
-
} else {
|
|
46
|
+
} else if (case$frac == "group") {
|
|
54
47
|
out <- out %>% group_by(!!sym(case$group.by)) %>% mutate(.frac = .n / sum(.n))
|
|
48
|
+
} else { # case$frac == "ident" or "cluster"
|
|
49
|
+
out <- out %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
|
|
55
50
|
}
|
|
56
51
|
}
|
|
57
52
|
out
|
|
58
53
|
}))
|
|
59
|
-
} else if (!is.null(case$group.by) &&
|
|
54
|
+
} else if (!is.null(case$group.by) && case$frac != "none") { # no split.by
|
|
60
55
|
plot_df <- df_cells %>%
|
|
61
56
|
select(all_of(select_cols)) %>%
|
|
62
57
|
group_by(!!!syms(select_cols)) %>%
|
|
63
58
|
summarise(.n = n(), .groups = "drop")
|
|
64
|
-
if (
|
|
59
|
+
if (case$frac == "all") {
|
|
65
60
|
plot_df = plot_df %>% mutate(.frac = .n / sum(.n))
|
|
66
|
-
} else {
|
|
67
|
-
plot_df = plot_df %>%
|
|
68
|
-
|
|
69
|
-
|
|
61
|
+
} else if (case$frac == "group") {
|
|
62
|
+
plot_df = plot_df %>% group_by(!!sym(case$group.by)) %>% mutate(.frac = .n / sum(.n))
|
|
63
|
+
} else { # case$frac == "ident" or "cluster"
|
|
64
|
+
plot_df = plot_df %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
|
|
70
65
|
}
|
|
71
66
|
} else {
|
|
72
67
|
plot_df <- df_cells %>%
|
|
@@ -74,7 +69,7 @@ do_one_stats = function(name) {
|
|
|
74
69
|
group_by(!!!syms(select_cols)) %>%
|
|
75
70
|
summarise(.n = n(), .groups = "drop")
|
|
76
71
|
|
|
77
|
-
if (
|
|
72
|
+
if (case$frac != "none") {
|
|
78
73
|
plot_df <- plot_df %>% mutate(.frac = .n / sum(.n))
|
|
79
74
|
}
|
|
80
75
|
}
|
|
@@ -87,13 +82,13 @@ do_one_stats = function(name) {
|
|
|
87
82
|
p = plot_df %>%
|
|
88
83
|
ggplot(aes(
|
|
89
84
|
x=!!sym(ifelse(case$transpose, case$group.by, case$ident)),
|
|
90
|
-
y=if (
|
|
85
|
+
y=if (case$frac != "none") .frac else .n,
|
|
91
86
|
fill=!!sym(ifelse(is.null(case$group.by) || isTRUE(case$transpose), case$ident, case$group.by))
|
|
92
87
|
)) +
|
|
93
88
|
geom_bar(stat="identity", position=bar_position, alpha=.8) +
|
|
94
89
|
theme_prism(axis_text_angle = 90) +
|
|
95
90
|
scale_fill_biopipen() +
|
|
96
|
-
ylab(ifelse(
|
|
91
|
+
ylab(ifelse(case$frac != "none", "Fraction of cells", "Number of cells"))
|
|
97
92
|
|
|
98
93
|
if (!is.null(case$split.by)) {
|
|
99
94
|
p = p + facet_wrap(case$split.by)
|
|
@@ -108,7 +103,7 @@ do_one_stats = function(name) {
|
|
|
108
103
|
kind = "descr",
|
|
109
104
|
content = paste0(
|
|
110
105
|
"Plots showing the ",
|
|
111
|
-
ifelse(
|
|
106
|
+
ifelse(case$frac != "none", "number/faction", "number"),
|
|
112
107
|
" of cells per cluster",
|
|
113
108
|
ifelse(
|
|
114
109
|
is.null(case$group.by),
|
|
@@ -149,7 +144,7 @@ do_one_stats = function(name) {
|
|
|
149
144
|
guides(fill = guide_legend(title = case$ident)) +
|
|
150
145
|
theme_void() +
|
|
151
146
|
geom_label(
|
|
152
|
-
if (
|
|
147
|
+
if (case$frac != "none")
|
|
153
148
|
aes(label=sprintf("%.1f%%", .frac * 100))
|
|
154
149
|
else
|
|
155
150
|
aes(label=.n),
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
3
|
+
{{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
|
|
4
|
+
|
|
4
5
|
library(Seurat)
|
|
5
6
|
library(rlang)
|
|
6
7
|
library(dplyr)
|
|
@@ -26,19 +27,34 @@ if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
|
26
27
|
mutate(!!!lapply(mutaters, parse_expr))
|
|
27
28
|
}
|
|
28
29
|
|
|
30
|
+
############## clustree ##############
|
|
31
|
+
clustrees_defaults <- {{envs.clustrees_defaults | r}}
|
|
32
|
+
clustrees <- {{envs.clustrees | r}}
|
|
33
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-clustree.R" | source_r }}
|
|
34
|
+
|
|
29
35
|
############## stats ##############
|
|
30
|
-
{
|
|
36
|
+
stats_defaults = {{envs.stats_defaults | r: todot="-"}}
|
|
37
|
+
stats = {{envs.stats | r: todot="-", skip=1}}
|
|
38
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-stats.R" | source_r }}
|
|
31
39
|
|
|
32
40
|
############## hists ##############
|
|
33
|
-
{
|
|
41
|
+
hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
|
|
42
|
+
hists <- {{envs.hists | r: todot="-", skip=1}}
|
|
43
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-hists.R" | source_r }}
|
|
34
44
|
|
|
35
45
|
############## ngenes ##############
|
|
36
|
-
{
|
|
46
|
+
ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
|
|
47
|
+
ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
|
|
48
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-ngenes.R" | source_r }}
|
|
37
49
|
|
|
38
50
|
############## features ##############
|
|
39
|
-
{
|
|
51
|
+
features_defaults = {{envs.features_defaults | r: todot="-"}}
|
|
52
|
+
features = {{envs.features | r: todot="-", skip=1}}
|
|
53
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-features.R" | source_r }}
|
|
40
54
|
|
|
41
55
|
############## dimplots ##############
|
|
42
|
-
{
|
|
56
|
+
dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
|
|
57
|
+
dimplots = {{envs.dimplots | r: todot="-", skip=1}}
|
|
58
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-dimplots.R" | source_r }}
|
|
43
59
|
|
|
44
60
|
save_report(joboutdir)
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
|
|
2
|
+
expand_dims <- function(args, name = "dims") {
|
|
3
|
+
# Expand dims from 30 to 1:30
|
|
4
|
+
if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
|
|
5
|
+
args[[name]] <- 1:args[[name]]
|
|
6
|
+
}
|
|
7
|
+
args
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
expand_resolution <- function(resolution) {
|
|
11
|
+
expanded_res <- c()
|
|
12
|
+
for (res in resolution) {
|
|
13
|
+
if (is.numeric(res)) {
|
|
14
|
+
expanded_res <- c(expanded_res, res)
|
|
15
|
+
} else {
|
|
16
|
+
# is.character
|
|
17
|
+
parts <- trimws(unlist(strsplit(res, ",")))
|
|
18
|
+
for (part in parts) {
|
|
19
|
+
if (grepl(":", part)) {
|
|
20
|
+
ps <- trimws(unlist(strsplit(part, ":")))
|
|
21
|
+
if (length(ps) == 2) { ps <- c(ps, 0.1) }
|
|
22
|
+
if (length(ps) != 3) {
|
|
23
|
+
stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
|
|
24
|
+
}
|
|
25
|
+
ps <- as.numeric(ps)
|
|
26
|
+
expanded_res <- c(expanded_res, seq(ps[1], ps[2], by = ps[3]))
|
|
27
|
+
} else {
|
|
28
|
+
expanded_res <- c(expanded_res, as.numeric(part))
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
# keep the last resolution at last
|
|
34
|
+
rev(unique(rev(round(expanded_res, 2))))
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
|
|
38
|
+
recode_clusters <- function(clusters) {
|
|
39
|
+
recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
|
|
40
|
+
clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
|
|
41
|
+
clusters
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
run_transformation <- function(sobj) {
|
|
45
|
+
if (length(envs$ScaleData) == 0 && length(envs$SCTransform) == 0) {
|
|
46
|
+
log_warn("Skipping ScaleData/SCTransform (neither specified) ...")
|
|
47
|
+
return(sobj)
|
|
48
|
+
}
|
|
49
|
+
if (length(envs$ScaleData) > 0 && length(envs$SCTransform) > 0) {
|
|
50
|
+
stop("Both envs.ScaleData and envs.SCTransform are specified. Please choose either.")
|
|
51
|
+
}
|
|
52
|
+
if (length(envs$ScaleData) > 0) {
|
|
53
|
+
if (DefaultAssay(sobj) == "SCT") {
|
|
54
|
+
stop("SCT assay detected, but envs.ScaleData is specified. Use envs.SCTransform instead.")
|
|
55
|
+
}
|
|
56
|
+
cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
|
|
57
|
+
if (is.null(cached$data)) {
|
|
58
|
+
log_info("Running ScaleData ...")
|
|
59
|
+
sobj <- do_call(ScaleData, c(list(object = sobj), envs$ScaleData))
|
|
60
|
+
cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
|
|
61
|
+
save_to_cache(cached, "ScaleData", cache_dir)
|
|
62
|
+
} else {
|
|
63
|
+
log_info("Loading cached ScaleData ...")
|
|
64
|
+
sobj@assays$RNA <- cached$data$assay
|
|
65
|
+
sobj@commands <- cached$data$commands
|
|
66
|
+
DefaultAssay(sobj) <- "RNA"
|
|
67
|
+
}
|
|
68
|
+
} else if (length(envs$SCTransform) > 0) {
|
|
69
|
+
if (DefaultAssay(sobj) != "SCT") {
|
|
70
|
+
stop("SCT assay not detected, but envs.SCTransform is specified. Use envs.ScaleData instead.")
|
|
71
|
+
}
|
|
72
|
+
cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
|
|
73
|
+
asssay <- envs$SCTransform$new.assay.name %||% "SCT"
|
|
74
|
+
if (is.null(cached$data)) {
|
|
75
|
+
log_info("Running SCTransform ...")
|
|
76
|
+
sobj <- do_call(SCTransform, c(list(object = sobj), envs$SCTransform))
|
|
77
|
+
cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
|
|
78
|
+
save_to_cache(cached, "SCTransform", cache_dir)
|
|
79
|
+
} else {
|
|
80
|
+
log_info("Loading cached SCTransform ...")
|
|
81
|
+
sobj@assays[[assay]] <- cached$data$assay
|
|
82
|
+
sobj@commands <- cached$data$commands
|
|
83
|
+
DefaultAssay(sobj) <- assay
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
sobj
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
run_umap <- function(sobj) {
|
|
90
|
+
cached <- get_cached(
|
|
91
|
+
list(sobj = sobj, RunUMAP = envs$RunUMAP),
|
|
92
|
+
"RunUMAP",
|
|
93
|
+
cache_dir
|
|
94
|
+
)
|
|
95
|
+
reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
|
|
96
|
+
if (is.null(cached$data)) {
|
|
97
|
+
log_info("Running RunUMAP ...")
|
|
98
|
+
umap_args <- list_setdefault(
|
|
99
|
+
envs$RunUMAP,
|
|
100
|
+
object = sobj,
|
|
101
|
+
dims = 1:30,
|
|
102
|
+
reduction = sobj@misc$integrated_new_reduction %||% "pca"
|
|
103
|
+
)
|
|
104
|
+
ncells <- ncol(sobj)
|
|
105
|
+
umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
|
|
106
|
+
umap_method <- envs$RunUMAP$umap.method %||% "uwot"
|
|
107
|
+
if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
|
|
108
|
+
# https://github.com/satijalab/seurat/issues/4312
|
|
109
|
+
umap_args$n.neighbors <- min(ncells - 1, 30)
|
|
110
|
+
}
|
|
111
|
+
sobj <- do_call(RunUMAP, umap_args)
|
|
112
|
+
cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
|
|
113
|
+
save_to_cache(cached, "RunUMAP", cache_dir)
|
|
114
|
+
} else {
|
|
115
|
+
log_info("Loading cached RunUMAP ...")
|
|
116
|
+
sobj@reductions[[reduc_name]] <- cached$data$reduc
|
|
117
|
+
sobj@commands <- cached$data$commands
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
sobj
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
run_findneighbors <- function(sobj) {
|
|
124
|
+
cached <- get_cached(
|
|
125
|
+
list(sobj = sobj, FindNeighbors = envs$FindNeighbors),
|
|
126
|
+
"FindNeighbors",
|
|
127
|
+
cache_dir
|
|
128
|
+
)
|
|
129
|
+
if (is.null(cached$data)) {
|
|
130
|
+
log_info("Running FindNeighbors ...")
|
|
131
|
+
envs$FindNeighbors$object <- sobj
|
|
132
|
+
envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
133
|
+
sobj <- do_call(FindNeighbors, envs$FindNeighbors)
|
|
134
|
+
cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
|
|
135
|
+
save_to_cache(cached, "FindNeighbors", cache_dir)
|
|
136
|
+
} else {
|
|
137
|
+
log_info("Loading cached FindNeighbors ...")
|
|
138
|
+
sobj@graphs <- cached$data$graphs
|
|
139
|
+
sobj@commands <- cached$data$commands
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
sobj
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
run_findclusters <- function(sobj) {
|
|
146
|
+
cached <- get_cached(
|
|
147
|
+
list(sobj = sobj, FindClusters = envs$FindClusters),
|
|
148
|
+
"FindClusters",
|
|
149
|
+
cache_dir
|
|
150
|
+
)
|
|
151
|
+
if (is.null(cached$data)) {
|
|
152
|
+
findclusters_args <- envs$FindClusters
|
|
153
|
+
findclusters_args$random.seed <- findclusters_args$random.seed %||% 8525
|
|
154
|
+
resolution <- findclusters_args$resolution <- expand_resolution(findclusters_args$resolution %||% 0.8)
|
|
155
|
+
log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
|
|
156
|
+
|
|
157
|
+
findclusters_args$object <- sobj
|
|
158
|
+
findclusters_args$cluster.name <- paste0("seurat_clusters.", resolution)
|
|
159
|
+
sobj <- do_call(FindClusters, findclusters_args)
|
|
160
|
+
|
|
161
|
+
for (clname in findclusters_args$cluster.name) {
|
|
162
|
+
sobj@meta.data[[clname]] <- recode_clusters(sobj@meta.data[[clname]])
|
|
163
|
+
}
|
|
164
|
+
sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
|
|
165
|
+
Idents(sobj) <- "seurat_clusters"
|
|
166
|
+
|
|
167
|
+
ident_table <- table(Idents(sobj))
|
|
168
|
+
log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
|
|
169
|
+
print(ident_table)
|
|
170
|
+
cat("\n")
|
|
171
|
+
|
|
172
|
+
cached$data <- list(
|
|
173
|
+
clusters = sobj@meta.data[, c(findclusters_args$cluster.name, "seurat_clusters"), drop = FALSE],
|
|
174
|
+
commands = sobj@commands
|
|
175
|
+
)
|
|
176
|
+
save_to_cache(cached, "FindClusters", cache_dir)
|
|
177
|
+
} else {
|
|
178
|
+
log_info("Loading cached FindClusters ...")
|
|
179
|
+
|
|
180
|
+
sobj <- AddMetaData(sobj, metadata = cached$data$clusters)
|
|
181
|
+
Idents(sobj) <- "seurat_clusters"
|
|
182
|
+
sobj@commands <- cached$data$commands
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
sobj
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
run_prepsctfindmarkers <- function(sobj) {
|
|
189
|
+
if (DefaultAssay(sobj) == "SCT") {
|
|
190
|
+
cached <- get_cached(list(sobj = sobj), "PrepSCTFindMarkers", cache_dir)
|
|
191
|
+
if (is.null(cached$data)) {
|
|
192
|
+
# https://github.com/satijalab/seurat/issues/6968
|
|
193
|
+
log_info("Running PrepSCTFindMarkers ...")
|
|
194
|
+
sobj <- PrepSCTFindMarkers(sobj)
|
|
195
|
+
# compose a new SeuratCommand to record it to sobj@commands
|
|
196
|
+
scommand <- sobj@commands$FindClusters
|
|
197
|
+
scommand@name <- "PrepSCTFindMarkers"
|
|
198
|
+
scommand@time.stamp <- Sys.time()
|
|
199
|
+
scommand@assay.used <- "SCT"
|
|
200
|
+
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
201
|
+
scommand@params <- list()
|
|
202
|
+
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
203
|
+
|
|
204
|
+
cached$data <- sobj
|
|
205
|
+
save_to_cache(cached, "PrepSCTFindMarkers", cache_dir)
|
|
206
|
+
} else {
|
|
207
|
+
log_info("Loading cached PrepSCTFindMarkers ...")
|
|
208
|
+
sobj <- cached$data
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
sobj
|
|
213
|
+
}
|