biopipen 0.29.2__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +2 -0
- biopipen/core/filters.py +21 -0
- biopipen/ns/plot.py +55 -0
- biopipen/ns/scrna.py +110 -21
- biopipen/ns/web.py +87 -5
- biopipen/scripts/bam/CNAClinic.R +2 -1
- biopipen/scripts/cellranger/CellRangerCount.py +3 -3
- biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
- biopipen/scripts/cnv/AneuploidyScore.R +1 -1
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +3 -2
- biopipen/scripts/gene/GeneNameConversion.R +2 -2
- biopipen/scripts/gsea/Enrichr.R +3 -3
- biopipen/scripts/gsea/FGSEA.R +2 -2
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +2 -1
- biopipen/scripts/plot/QQPlot.R +1 -1
- biopipen/scripts/plot/ROC.R +1 -1
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +3 -3
- biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
- biopipen/scripts/rnaseq/Simulation.R +1 -1
- biopipen/scripts/rnaseq/UnitConversion.R +2 -1
- biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
- biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
- biopipen/scripts/scrna/CellsDistribution.R +4 -3
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
- biopipen/scripts/scrna/MarkersFinder.R +5 -5
- biopipen/scripts/scrna/MetaMarkers.R +4 -4
- biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +4 -3
- biopipen/scripts/scrna/ScSimulation.R +64 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -5
- biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +20 -25
- biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
- biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
- biopipen/scripts/scrna/SeuratClustering.R +10 -170
- biopipen/scripts/scrna/SeuratMap2Ref.R +98 -54
- biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
- biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
- biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
- biopipen/scripts/scrna/SeuratPreparing.R +22 -562
- biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
- biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +2 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +2 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
- biopipen/scripts/snp/MatrixEQTL.R +1 -1
- biopipen/scripts/snp/PlinkCallRate.R +2 -2
- biopipen/scripts/snp/PlinkFreq.R +2 -2
- biopipen/scripts/snp/PlinkHWE.R +2 -2
- biopipen/scripts/snp/PlinkHet.R +2 -2
- biopipen/scripts/snp/PlinkIBD.R +2 -2
- biopipen/scripts/stats/ChowTest.R +1 -1
- biopipen/scripts/stats/DiffCoexpr.R +1 -1
- biopipen/scripts/stats/LiquidAssoc.R +1 -1
- biopipen/scripts/stats/Mediation.R +11 -9
- biopipen/scripts/stats/MetaPvalue.R +4 -1
- biopipen/scripts/stats/MetaPvalue1.R +4 -1
- biopipen/scripts/tcr/Attach2Seurat.R +1 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
- biopipen/scripts/tcr/CloneResidency.R +2 -2
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch-basic.R +0 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
- biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
- biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
- biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
- biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
- biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
- biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
- biopipen/scripts/tcr/Immunarch.R +43 -11
- biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
- biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +2 -2
- biopipen/scripts/tcr/TESSA.R +2 -2
- biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/METADATA +7 -7
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/RECORD +106 -96
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/WHEEL +0 -0
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
|
|
3
|
+
library(ggpmisc)
|
|
4
|
+
library(rlang)
|
|
5
|
+
library(ggplot2)
|
|
6
|
+
library(ggprism)
|
|
7
|
+
|
|
8
|
+
theme_set(theme_prism())
|
|
9
|
+
|
|
10
|
+
infile <- {{in.infile | r}}
|
|
11
|
+
outfile <- {{out.outfile | r}}
|
|
12
|
+
x_col <- {{envs.x_col | r}}
|
|
13
|
+
y_col <- {{envs.y_col | r}}
|
|
14
|
+
devpars <- {{envs.devpars | r}}
|
|
15
|
+
args <- {{envs.args | r}}
|
|
16
|
+
ggs <- {{envs.ggs | r}}
|
|
17
|
+
formula <- {{envs.formula | r}}
|
|
18
|
+
mapping <- {{envs.mapping | r}}
|
|
19
|
+
stats <- {{envs.stats | r}}
|
|
20
|
+
|
|
21
|
+
.ensure_r <- function(ex, recursive=TRUE) {
|
|
22
|
+
if (is.character(ex)) {
|
|
23
|
+
ex <- trimws(ex)
|
|
24
|
+
if (grepl("^-\\s*[a-zA-Z\\.][0-9a-zA-Z\\._]*$", ex)) {
|
|
25
|
+
ex <- trimws(substring(ex, 2))
|
|
26
|
+
ex <- eval(parse(text = ex))
|
|
27
|
+
return(function(x) -ex(x))
|
|
28
|
+
} else {
|
|
29
|
+
return(eval(parse(text = ex)))
|
|
30
|
+
}
|
|
31
|
+
} else if (is.list(ex) && recursive) {
|
|
32
|
+
return(lapply(ex, .ensure_r, recursive=TRUE))
|
|
33
|
+
} else {
|
|
34
|
+
return(ex)
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
.merge_aes <- function(aes1, aes2) {
|
|
39
|
+
if (is.null(aes1)) {
|
|
40
|
+
return(aes2)
|
|
41
|
+
}
|
|
42
|
+
if (is.null(aes2)) {
|
|
43
|
+
return(aes1)
|
|
44
|
+
}
|
|
45
|
+
merged <- c(aes1, aes2) # list
|
|
46
|
+
out <- list()
|
|
47
|
+
for (key in names(merged)) {
|
|
48
|
+
if (is.null(out[[key]])) {
|
|
49
|
+
out[[key]] <- merged[[key]]
|
|
50
|
+
} else {
|
|
51
|
+
log_warn(paste("Overwriting mapping key:", key))
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return(do.call(aes, out))
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (is.null(formula)) {
|
|
58
|
+
stop("Formula must be provided")
|
|
59
|
+
}
|
|
60
|
+
if (!is.null(mapping)) {
|
|
61
|
+
if (startsWith(mapping, "(") && endsWith(mapping, ")")) {
|
|
62
|
+
mapping <- paste0("aes", mapping)
|
|
63
|
+
} else if (!startsWith(mapping, "aes(")) {
|
|
64
|
+
mapping <- paste0("aes(", mapping, ")")
|
|
65
|
+
}
|
|
66
|
+
mapping <- .ensure_r(mapping)
|
|
67
|
+
}
|
|
68
|
+
formula <- as.formula(formula)
|
|
69
|
+
|
|
70
|
+
indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
71
|
+
if (is.numeric(x_col)) {
|
|
72
|
+
x_col <- colnames(indata)[x_col]
|
|
73
|
+
}
|
|
74
|
+
if (is.numeric(y_col)) {
|
|
75
|
+
y_col <- colnames(indata)[y_col]
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
args <- lapply(args, .ensure_r)
|
|
79
|
+
args$mapping <- .merge_aes(args$mapping, mapping)
|
|
80
|
+
|
|
81
|
+
if (!is.null(stats)) {
|
|
82
|
+
stats <- lapply(stats, .ensure_r)
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
p <- ggplot(indata, aes(x = !!sym(x_col), y = !!sym(y_col))) +
|
|
86
|
+
do.call(geom_point, args)
|
|
87
|
+
|
|
88
|
+
for (stat in names(stats)) {
|
|
89
|
+
if (startsWith(stat, "stat_")) {
|
|
90
|
+
stat <- substring(stat, 6)
|
|
91
|
+
}
|
|
92
|
+
if (grepl("#", stat)) {
|
|
93
|
+
st <- paste0("stat_", strsplit(stat, "#")[[1]][1])
|
|
94
|
+
} else {
|
|
95
|
+
st <- paste0("stat_", stat)
|
|
96
|
+
}
|
|
97
|
+
stats[[stat]]$formula <- stats[[stat]]$formula %||% formula
|
|
98
|
+
stats[[stat]]$mapping <- .merge_aes(stats[[stat]]$mapping, mapping)
|
|
99
|
+
p <- p + do.call(st, stats[[stat]])
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (!is.null(ggs)) {
|
|
103
|
+
for (gg in ggs) {
|
|
104
|
+
p <- p + eval(parse(text = gg))
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
p <- p + scale_color_biopipen()
|
|
109
|
+
|
|
110
|
+
png(outfile, width=devpars$width, height=devpars$height, res=devpars$res)
|
|
111
|
+
print(p)
|
|
112
|
+
dev.off()
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
|
|
2
3
|
|
|
3
|
-
|
|
4
|
-
source("{{biopipen_dir}}/utils/plot.R")
|
|
4
|
+
library(dplyr)
|
|
5
5
|
|
|
6
6
|
infile = {{in.infile | quote}}
|
|
7
7
|
outfile = {{out.outfile | quote}}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Script for regulatory.MotifAffinityTest
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
3
|
|
|
3
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
4
4
|
library(BiocParallel)
|
|
5
5
|
library(BSgenome)
|
|
6
6
|
library(universalmotif)
|
|
@@ -215,12 +215,8 @@ tool <- match.arg(tool, c("motifbreakr", "atsnp"))
|
|
|
215
215
|
|
|
216
216
|
if (tool == "motifbreakr") {
|
|
217
217
|
motifbreakr_args <- {{envs.motifbreakr_args | r}}
|
|
218
|
-
{
|
|
219
|
-
# {{ sourcefile | getmtime }}
|
|
220
|
-
source("{{sourcefile}}")
|
|
218
|
+
{{ biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_MotifBreakR.R" | source_r }}
|
|
221
219
|
} else { # atsnp
|
|
222
220
|
atsnp_args <- {{envs.atsnp_args | r}}
|
|
223
|
-
{
|
|
224
|
-
# {{ sourcefile | getmtime }}
|
|
225
|
-
source("{{sourcefile}}")
|
|
221
|
+
{{ biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_AtSNP.R" | source_r }}
|
|
226
222
|
}
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
-
|
|
3
1
|
library(rlang)
|
|
4
2
|
library(hdf5r)
|
|
5
3
|
library(dplyr)
|
|
@@ -8,6 +6,7 @@ library(Seurat)
|
|
|
8
6
|
sobjfile <- {{in.sobjfile | r}}
|
|
9
7
|
outfile <- {{out.outfile | r}}
|
|
10
8
|
newcol <- {{envs.newcol | r}}
|
|
9
|
+
merge_same_labels <- {{envs.merge | r}}
|
|
11
10
|
celltypist_args <- {{envs.celltypist_args | r}}
|
|
12
11
|
|
|
13
12
|
outdir <- dirname(outfile)
|
|
@@ -33,6 +32,7 @@ if (!file.exists(modelfile)) {
|
|
|
33
32
|
sobj <- NULL
|
|
34
33
|
outtype <- tolower(tools::file_ext(outfile)) # .rds, .h5ad, .h5seurat
|
|
35
34
|
if (!endsWith(sobjfile, ".h5ad")) {
|
|
35
|
+
log_info("Convert input to H5AD ...")
|
|
36
36
|
library(SeuratDisk)
|
|
37
37
|
|
|
38
38
|
assay <- celltypist_args$assay
|
|
@@ -123,8 +123,7 @@ if (file.exists(celltypist_outfile) &&
|
|
|
123
123
|
if (isTRUE(celltypist_args$majority_voting)) {
|
|
124
124
|
command <- paste(command, "-v")
|
|
125
125
|
}
|
|
126
|
-
|
|
127
|
-
print(command)
|
|
126
|
+
log_info("Running celltypist:")
|
|
128
127
|
log_debug("- {command}")
|
|
129
128
|
rc <- system(command)
|
|
130
129
|
if (rc != 0) {
|
|
@@ -135,11 +134,21 @@ if (file.exists(celltypist_outfile) &&
|
|
|
135
134
|
if (outtype == "h5ad") {
|
|
136
135
|
# log_info("Using H5AD from celltypist as output directly ...")
|
|
137
136
|
# file.rename(paste0(out_prefix, ".h5ad"), outfile)
|
|
137
|
+
if (merge_same_labels) {
|
|
138
|
+
log_warn("- Merging clusters with the same labels is not supported for h5ad outfile ...")
|
|
139
|
+
}
|
|
138
140
|
} else if (outtype == "h5seurat") {
|
|
139
141
|
log_info("Converting H5AD from celltypist to H5Seurat ...")
|
|
140
142
|
# outfile is cleaned by the pipeline anyway
|
|
141
143
|
Convert(
|
|
142
|
-
celltypist_outfile,
|
|
144
|
+
celltypist_outfile,
|
|
145
|
+
assay = assay %||% 'RNA',
|
|
146
|
+
dest = outfile,
|
|
147
|
+
overwrite = TRUE
|
|
148
|
+
)
|
|
149
|
+
if (merge_same_labels) {
|
|
150
|
+
log_warn("- Merging clusters with the same labels is not supported for h5seurat outfile ...")
|
|
151
|
+
}
|
|
143
152
|
} else if (outtype == "rds") {
|
|
144
153
|
if (is.null(sobj)) {
|
|
145
154
|
log_info("Converting H5AD from celltypist to RDS ...")
|
|
@@ -178,7 +187,10 @@ if (outtype == "h5ad") {
|
|
|
178
187
|
# end
|
|
179
188
|
|
|
180
189
|
sobj <- LoadH5Seurat(h5seurat_file)
|
|
181
|
-
|
|
190
|
+
if (merge_same_labels) {
|
|
191
|
+
log_info("Merging clusters with the same labels ...")
|
|
192
|
+
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
193
|
+
}
|
|
182
194
|
} else {
|
|
183
195
|
log_info("Attaching celltypist results to Seurat object ...")
|
|
184
196
|
|
|
@@ -228,9 +240,13 @@ if (outtype == "h5ad") {
|
|
|
228
240
|
} else if (!is.null(newcol)) {
|
|
229
241
|
sobj@meta.data[[newcol]] <- sobj@meta.data[["predicted_labels"]]
|
|
230
242
|
}
|
|
231
|
-
|
|
232
|
-
|
|
243
|
+
if (merge_same_labels) {
|
|
244
|
+
log_info("Merging clusters with the same labels ...")
|
|
245
|
+
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
246
|
+
}
|
|
233
247
|
}
|
|
248
|
+
log_info("Saving Seurat object in RDS ...")
|
|
249
|
+
saveRDS(sobj, outfile)
|
|
234
250
|
} else {
|
|
235
251
|
stop(paste0("Unknown output type: ", outtype))
|
|
236
252
|
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
merge_clusters_with_same_labels <- function(sobj, newcol) {
|
|
2
|
+
if (is.null(newcol)) {
|
|
3
|
+
sobj@meta.data$seurat_clusters <- sub("\\.\\d+$", "", sobj@meta.data$seurat_clusters)
|
|
4
|
+
Idents(sobj) <- "seurat_clusters"
|
|
5
|
+
} else {
|
|
6
|
+
sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
sobj
|
|
10
|
+
}
|
|
@@ -1,14 +1,17 @@
|
|
|
1
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
1
|
library(Seurat)
|
|
3
2
|
|
|
4
3
|
sobjfile <- {{in.sobjfile | r}}
|
|
5
4
|
outfile <- {{out.outfile | r}}
|
|
6
5
|
celltypes <- {{envs.cell_types | r}}
|
|
7
6
|
newcol <- {{envs.newcol | r}}
|
|
7
|
+
merge_same_labels <- {{envs.merge | r}}
|
|
8
8
|
|
|
9
9
|
if (is.null(celltypes) || length(celltypes) == 0) {
|
|
10
10
|
log_warn("No cell types are given!")
|
|
11
11
|
|
|
12
|
+
if (merge_same_labels) {
|
|
13
|
+
log_warn("Ignoring 'envs.merge' because no cell types are given!")
|
|
14
|
+
}
|
|
12
15
|
# create a symbolic link to the input file
|
|
13
16
|
file.symlink(sobjfile, outfile)
|
|
14
17
|
} else {
|
|
@@ -55,5 +58,10 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
55
58
|
Idents(sobj) <- "seurat_clusters"
|
|
56
59
|
}
|
|
57
60
|
|
|
61
|
+
if (merge_same_labels) {
|
|
62
|
+
log_info("Merging clusters with the same labels ...")
|
|
63
|
+
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
64
|
+
}
|
|
65
|
+
|
|
58
66
|
saveRDS(sobj, outfile)
|
|
59
67
|
}
|
|
@@ -2,21 +2,20 @@ library(Seurat)
|
|
|
2
2
|
library(dplyr)
|
|
3
3
|
library(hitype)
|
|
4
4
|
|
|
5
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
6
|
-
|
|
7
5
|
sobjfile = {{in.sobjfile | r}}
|
|
8
6
|
outfile = {{out.outfile | r}}
|
|
9
7
|
tissue = {{envs.hitype_tissue | r}}
|
|
10
8
|
db = {{envs.hitype_db | r}}
|
|
11
9
|
newcol = {{envs.newcol | r}}
|
|
10
|
+
merge_same_labels = {{envs.merge | r}}
|
|
12
11
|
|
|
13
12
|
if (is.null(db)) { stop("`envs.hitype_db` is not set") }
|
|
14
13
|
|
|
15
|
-
|
|
14
|
+
log_info("Reading Seurat object...")
|
|
16
15
|
sobj = readRDS(sobjfile)
|
|
17
16
|
|
|
18
17
|
# prepare gene sets
|
|
19
|
-
|
|
18
|
+
log_info("Preparing gene sets...")
|
|
20
19
|
if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
|
|
21
20
|
gs_list = gs_prepare(eval(as.symbol(db)), tissue)
|
|
22
21
|
} else {
|
|
@@ -24,10 +23,10 @@ if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
|
|
|
24
23
|
}
|
|
25
24
|
|
|
26
25
|
# run RunHitype
|
|
27
|
-
|
|
26
|
+
log_info("Running RunHitype...")
|
|
28
27
|
sobj = RunHitype(sobj, gs_list, threshold = 0.0, make_unique = TRUE)
|
|
29
28
|
|
|
30
|
-
|
|
29
|
+
log_info("Renaming cell types...")
|
|
31
30
|
hitype_levels = sobj@meta.data %>%
|
|
32
31
|
select(seurat_clusters, hitype) %>%
|
|
33
32
|
distinct(seurat_clusters, .keep_all = TRUE) %>%
|
|
@@ -42,10 +41,15 @@ if (is.null(newcol)) {
|
|
|
42
41
|
sobj[[newcol]] = factor(sobj$hitype, levels = hitype_levels)
|
|
43
42
|
}
|
|
44
43
|
|
|
45
|
-
|
|
44
|
+
if (merge_same_labels) {
|
|
45
|
+
log_info("Merging clusters with the same labels...")
|
|
46
|
+
sobj = merge_clusters_with_same_labels(sobj, newcol)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
log_info("Saving Seurat object...")
|
|
46
50
|
saveRDS(sobj, outfile)
|
|
47
51
|
|
|
48
|
-
|
|
52
|
+
log_info("Saving the mappings ...")
|
|
49
53
|
if (is.null(newcol)) {
|
|
50
54
|
celltypes = sobj@meta.data %>%
|
|
51
55
|
group_by(seurat_clusters_id) %>%
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
1
|
library(scCATCH)
|
|
3
2
|
library(Seurat)
|
|
4
3
|
|
|
@@ -6,6 +5,7 @@ sobjfile = {{in.sobjfile | r}}
|
|
|
6
5
|
outfile = {{out.outfile | r}}
|
|
7
6
|
sccatch_args = {{envs.sccatch_args | r}}
|
|
8
7
|
newcol = {{envs.newcol | r}}
|
|
8
|
+
merge_same_labels = {{envs.merge | r}}
|
|
9
9
|
|
|
10
10
|
if (!is.null(sccatch_args$marker)) {
|
|
11
11
|
cellmatch = readRDS(sccatch_args$marker)
|
|
@@ -17,14 +17,20 @@ if (is.integer(sccatch_args$use_method)) {
|
|
|
17
17
|
sccatch_args$use_method = as.character(sccatch_args$use_method)
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
+
log_info("Reading Seurat object...")
|
|
20
21
|
sobj = readRDS(sobjfile)
|
|
21
22
|
|
|
23
|
+
log_info("Running createscCATCH ...")
|
|
22
24
|
obj = createscCATCH(data = GetAssayData(sobj), cluster = as.character(Idents(sobj)))
|
|
23
25
|
sccatch_args$object = obj
|
|
24
26
|
|
|
27
|
+
log_info("Running findmarkergene ...")
|
|
25
28
|
obj = do_call(findmarkergene, sccatch_args)
|
|
29
|
+
|
|
30
|
+
log_info("Running findcelltype ...")
|
|
26
31
|
obj = findcelltype(object = obj)
|
|
27
32
|
|
|
33
|
+
log_info("Saving the mappings ...")
|
|
28
34
|
write.table(
|
|
29
35
|
obj@celltype,
|
|
30
36
|
file = file.path(dirname(outfile), "cluster2celltype.tsv"),
|
|
@@ -36,7 +42,7 @@ celltypes = as.list(obj@celltype$cell_type)
|
|
|
36
42
|
names(celltypes) = obj@celltype$cluster
|
|
37
43
|
|
|
38
44
|
if (length(celltypes) == 0) {
|
|
39
|
-
|
|
45
|
+
log_warn("- No cell types annotated from the database!")
|
|
40
46
|
} else {
|
|
41
47
|
if (is.null(newcol)) {
|
|
42
48
|
sobj$seurat_clusters_id = Idents(sobj)
|
|
@@ -49,5 +55,12 @@ if (length(celltypes) == 0) {
|
|
|
49
55
|
sobj[[newcol]] = Idents(sobj)
|
|
50
56
|
Idents(sobj) = "seurat_clusters"
|
|
51
57
|
}
|
|
58
|
+
|
|
59
|
+
if (merge_same_labels) {
|
|
60
|
+
log_info("Merging clusters with the same labels ...")
|
|
61
|
+
sobj = merge_clusters_with_same_labels(sobj, newcol)
|
|
62
|
+
}
|
|
52
63
|
}
|
|
64
|
+
|
|
65
|
+
log_info("Saving Seurat object ...")
|
|
53
66
|
saveRDS(sobj, outfile)
|
|
@@ -1,34 +1,37 @@
|
|
|
1
1
|
library(dplyr)
|
|
2
2
|
library(HGNChelper)
|
|
3
3
|
library(Seurat)
|
|
4
|
+
library(rlang)
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
source("{{biopipen_dir}}/scripts/scrna/sctype.R")
|
|
6
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "sctype.R" | source_r }}
|
|
7
7
|
|
|
8
8
|
sobjfile = {{in.sobjfile | r}}
|
|
9
9
|
outfile = {{out.outfile | r}}
|
|
10
10
|
tissue = {{envs.sctype_tissue | r}}
|
|
11
11
|
db = {{envs.sctype_db | r}}
|
|
12
12
|
newcol = {{envs.newcol | r}}
|
|
13
|
+
merge_same_labels = {{envs.merge | r}}
|
|
13
14
|
|
|
14
15
|
if (is.null(db)) { stop("`envs.sctype_args.db` is not set") }
|
|
15
16
|
|
|
16
|
-
|
|
17
|
+
log_info("Reading Seurat object...")
|
|
17
18
|
sobj = readRDS(sobjfile)
|
|
18
19
|
|
|
19
20
|
# prepare gene sets
|
|
20
|
-
|
|
21
|
+
log_info("Preparing gene sets...")
|
|
21
22
|
gs_list = gene_sets_prepare(db, tissue)
|
|
22
23
|
|
|
23
24
|
scRNAseqData = GetAssayData(sobj, layer = "scale.data")
|
|
24
25
|
idents = as.character(unique(Idents(sobj)))
|
|
25
26
|
idents = idents[order(as.numeric(idents))]
|
|
26
27
|
|
|
28
|
+
log_info("Working on different levels of cell type labels ...")
|
|
27
29
|
cell_types_list = list()
|
|
28
30
|
for (i in seq_along(gs_list)) {
|
|
31
|
+
log_info("- Working on level {i} ...")
|
|
29
32
|
if (is.null(gs_list[[i]])) next
|
|
30
33
|
|
|
31
|
-
|
|
34
|
+
log_info(" Calculating cell-type scores ...")
|
|
32
35
|
es.max = sctype_score(
|
|
33
36
|
scRNAseqData = scRNAseqData,
|
|
34
37
|
scaled = TRUE,
|
|
@@ -36,7 +39,7 @@ for (i in seq_along(gs_list)) {
|
|
|
36
39
|
gs2 = gs_list[[i]]$gs_negative
|
|
37
40
|
)
|
|
38
41
|
|
|
39
|
-
|
|
42
|
+
log_info(" Merging cell-type scores by cluster ...")
|
|
40
43
|
cl_resutls = do_call(
|
|
41
44
|
"rbind",
|
|
42
45
|
lapply(
|
|
@@ -59,12 +62,12 @@ for (i in seq_along(gs_list)) {
|
|
|
59
62
|
write("\n####### sctype_scores_count ########", stderr())
|
|
60
63
|
write(capture.output(sctype_scores_count), stderr())
|
|
61
64
|
write("\n####################################", stderr())
|
|
62
|
-
|
|
65
|
+
log_info(" Scores tied in the above clusters.", immediate. = TRUE)
|
|
63
66
|
}
|
|
64
67
|
|
|
65
68
|
if (length(gs_list) == 1 || i > 1) {
|
|
66
69
|
# set low-confident (low ScType score) clusters to "unknown"
|
|
67
|
-
|
|
70
|
+
log_info(" Setting low-confident clusters to 'Unknown'...")
|
|
68
71
|
sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
|
|
69
72
|
}
|
|
70
73
|
|
|
@@ -82,7 +85,7 @@ for (i in seq_along(gs_list)) {
|
|
|
82
85
|
if (length(cell_types_list) == 1) {
|
|
83
86
|
celltypes = cell_types_list[[1]]
|
|
84
87
|
} else {
|
|
85
|
-
|
|
88
|
+
log_info("Merging cell types at all levels ...")
|
|
86
89
|
celltypes = list()
|
|
87
90
|
|
|
88
91
|
for (i in idents) {
|
|
@@ -97,7 +100,18 @@ if (length(cell_types_list) == 1) {
|
|
|
97
100
|
}
|
|
98
101
|
|
|
99
102
|
|
|
100
|
-
|
|
103
|
+
log_info("Renaming cell types...")
|
|
104
|
+
ct_numbering = list()
|
|
105
|
+
for (key in names(celltypes)) {
|
|
106
|
+
ct = celltypes[[key]]
|
|
107
|
+
ct_numbering[[ct]] = ct_numbering[[ct]] %||% 0
|
|
108
|
+
if (ct_numbering[[ct]] > 0) {
|
|
109
|
+
celltypes[[key]] = paste0(ct, ".", ct_numbering[[ct]])
|
|
110
|
+
}
|
|
111
|
+
ct_numbering[[ct]] = ct_numbering[[ct]] + 1
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
celltypes = as.list(celltypes)
|
|
101
115
|
if (is.null(newcol)) {
|
|
102
116
|
sobj$seurat_clusters_id = sobj$seurat_clusters
|
|
103
117
|
celltypes$object = sobj
|
|
@@ -109,12 +123,18 @@ if (is.null(newcol)) {
|
|
|
109
123
|
sobj[[newcol]] = Idents(sobj)
|
|
110
124
|
Idents(sobj) = "seurat_clusters"
|
|
111
125
|
}
|
|
112
|
-
|
|
113
|
-
print("- Saving Seurat object...")
|
|
114
|
-
saveRDS(sobj, outfile)
|
|
115
|
-
|
|
116
|
-
print("- Saving the mappings ...")
|
|
117
126
|
celltypes$object = NULL
|
|
127
|
+
gc()
|
|
128
|
+
|
|
129
|
+
if (merge_same_labels) {
|
|
130
|
+
log_info("Merging clusters with the same labels...")
|
|
131
|
+
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
132
|
+
celltypes <- lapply(celltypes, function(ct) {
|
|
133
|
+
sub("\\.\\d+$", "", ct)
|
|
134
|
+
})
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
log_info("Saving the mappings ...")
|
|
118
138
|
write.table(
|
|
119
139
|
data.frame(
|
|
120
140
|
Cluster = names(celltypes),
|
|
@@ -126,3 +146,6 @@ write.table(
|
|
|
126
146
|
quote = FALSE,
|
|
127
147
|
row.names = FALSE
|
|
128
148
|
)
|
|
149
|
+
|
|
150
|
+
log_info("Saving Seurat object...")
|
|
151
|
+
saveRDS(sobj, outfile)
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
set.seed(8525)
|
|
2
2
|
|
|
3
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
4
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "CellTypeAnnotation-common.R" | source_r }}
|
|
5
|
+
|
|
3
6
|
{% if envs.tool == "hitype" %}
|
|
4
7
|
{% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-hitype.R" %}
|
|
5
8
|
{% elif envs.tool == "sctype" %}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
3
|
+
|
|
3
4
|
library(Seurat)
|
|
4
5
|
library(rlang)
|
|
5
6
|
library(tidyr)
|
|
@@ -324,7 +325,7 @@ do_case <- function(name, case) {
|
|
|
324
325
|
geom_col(width=.01, position="fill", color = "#888888") +
|
|
325
326
|
geom_bar(stat = "identity", position = position_fill(reverse = TRUE)) +
|
|
326
327
|
coord_polar("y", start = 0) +
|
|
327
|
-
|
|
328
|
+
scale_fill_manual(name = "Cluster", values = pal_biopipen()(length(levels(all_clusters)))) +
|
|
328
329
|
theme_void() +
|
|
329
330
|
theme(
|
|
330
331
|
plot.margin = plot.margin,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "caching.R" | source_r }}
|
|
3
|
+
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
4
4
|
|
|
5
5
|
library(rlang)
|
|
6
6
|
library(dplyr)
|
|
@@ -70,8 +70,8 @@ if (defassay == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@commands)) {
|
|
|
70
70
|
|
|
71
71
|
srtobj <- PrepSCTFindMarkers(srtobj)
|
|
72
72
|
# compose a new SeuratCommand to record it to srtobj@commands
|
|
73
|
-
commands <- names(
|
|
74
|
-
scommand <-
|
|
73
|
+
commands <- names(pbmc_small@commands)
|
|
74
|
+
scommand <- pbmc_small@commands[[commands[length(commands)]]]
|
|
75
75
|
scommand@name <- "PrepSCTFindMarkers"
|
|
76
76
|
scommand@time.stamp <- Sys.time()
|
|
77
77
|
scommand@assay.used <- "SCT"
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
3
3
|
|
|
4
4
|
library(rlang)
|
|
5
5
|
library(dplyr)
|
|
@@ -41,8 +41,8 @@ if (DefaultAssay(srtobj) == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@com
|
|
|
41
41
|
|
|
42
42
|
srtobj <- PrepSCTFindMarkers(srtobj)
|
|
43
43
|
# compose a new SeuratCommand to record it to srtobj@commands
|
|
44
|
-
commands <- names(
|
|
45
|
-
scommand <-
|
|
44
|
+
commands <- names(pbmc_small@commands)
|
|
45
|
+
scommand <- pbmc_small@commands[[commands[length(commands)]]]
|
|
46
46
|
scommand@name <- "PrepSCTFindMarkers"
|
|
47
47
|
scommand@time.stamp <- Sys.time()
|
|
48
48
|
scommand@assay.used <- "SCT"
|
biopipen/scripts/scrna/ScFGSEA.R
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
|
|
3
|
+
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
4
|
+
|
|
4
5
|
library(rlang)
|
|
5
6
|
library(Seurat)
|
|
6
7
|
library(tidyseurat)
|