biopipen 0.29.1__py3-none-any.whl → 0.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +2 -0
- biopipen/core/filters.py +21 -0
- biopipen/ns/plot.py +55 -0
- biopipen/ns/scrna.py +49 -13
- biopipen/ns/web.py +87 -5
- biopipen/scripts/bam/CNAClinic.R +2 -1
- biopipen/scripts/cellranger/CellRangerCount.py +3 -3
- biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
- biopipen/scripts/cnv/AneuploidyScore.R +1 -1
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +3 -2
- biopipen/scripts/gene/GeneNameConversion.R +2 -2
- biopipen/scripts/gsea/Enrichr.R +3 -3
- biopipen/scripts/gsea/FGSEA.R +2 -2
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +2 -1
- biopipen/scripts/plot/QQPlot.R +1 -1
- biopipen/scripts/plot/ROC.R +1 -1
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +3 -3
- biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
- biopipen/scripts/rnaseq/Simulation.R +1 -1
- biopipen/scripts/rnaseq/UnitConversion.R +2 -1
- biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
- biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
- biopipen/scripts/scrna/CellsDistribution.R +3 -2
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
- biopipen/scripts/scrna/MarkersFinder.R +5 -4
- biopipen/scripts/scrna/MetaMarkers.R +22 -4
- biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +4 -3
- biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-features.R +8 -5
- biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
- biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
- biopipen/scripts/scrna/SeuratClustering.R +10 -170
- biopipen/scripts/scrna/SeuratMap2Ref.R +65 -31
- biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
- biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
- biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
- biopipen/scripts/scrna/SeuratPreparing.R +22 -562
- biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
- biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +7 -4
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +7 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
- biopipen/scripts/snp/MatrixEQTL.R +1 -1
- biopipen/scripts/snp/PlinkCallRate.R +2 -2
- biopipen/scripts/snp/PlinkFreq.R +2 -2
- biopipen/scripts/snp/PlinkHWE.R +2 -2
- biopipen/scripts/snp/PlinkHet.R +2 -2
- biopipen/scripts/snp/PlinkIBD.R +2 -2
- biopipen/scripts/stats/ChowTest.R +1 -1
- biopipen/scripts/stats/DiffCoexpr.R +1 -1
- biopipen/scripts/stats/LiquidAssoc.R +1 -1
- biopipen/scripts/stats/Mediation.R +26 -12
- biopipen/scripts/stats/MetaPvalue.R +4 -1
- biopipen/scripts/stats/MetaPvalue1.R +4 -1
- biopipen/scripts/tcr/Attach2Seurat.R +1 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
- biopipen/scripts/tcr/CloneResidency.R +2 -2
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch-basic.R +0 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
- biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
- biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
- biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
- biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
- biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
- biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
- biopipen/scripts/tcr/Immunarch.R +43 -11
- biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
- biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +2 -2
- biopipen/scripts/tcr/TESSA.R +2 -2
- biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/METADATA +1 -1
- {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/RECORD +105 -96
- {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/WHEEL +0 -0
- {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Script for regulatory.MotifAffinityTest
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
3
|
|
|
3
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
4
4
|
library(BiocParallel)
|
|
5
5
|
library(BSgenome)
|
|
6
6
|
library(universalmotif)
|
|
@@ -215,12 +215,8 @@ tool <- match.arg(tool, c("motifbreakr", "atsnp"))
|
|
|
215
215
|
|
|
216
216
|
if (tool == "motifbreakr") {
|
|
217
217
|
motifbreakr_args <- {{envs.motifbreakr_args | r}}
|
|
218
|
-
{
|
|
219
|
-
# {{ sourcefile | getmtime }}
|
|
220
|
-
source("{{sourcefile}}")
|
|
218
|
+
{{ biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_MotifBreakR.R" | source_r }}
|
|
221
219
|
} else { # atsnp
|
|
222
220
|
atsnp_args <- {{envs.atsnp_args | r}}
|
|
223
|
-
{
|
|
224
|
-
# {{ sourcefile | getmtime }}
|
|
225
|
-
source("{{sourcefile}}")
|
|
221
|
+
{{ biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_AtSNP.R" | source_r }}
|
|
226
222
|
}
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
-
|
|
3
1
|
library(rlang)
|
|
4
2
|
library(hdf5r)
|
|
5
3
|
library(dplyr)
|
|
@@ -8,6 +6,7 @@ library(Seurat)
|
|
|
8
6
|
sobjfile <- {{in.sobjfile | r}}
|
|
9
7
|
outfile <- {{out.outfile | r}}
|
|
10
8
|
newcol <- {{envs.newcol | r}}
|
|
9
|
+
merge_same_labels <- {{envs.merge | r}}
|
|
11
10
|
celltypist_args <- {{envs.celltypist_args | r}}
|
|
12
11
|
|
|
13
12
|
outdir <- dirname(outfile)
|
|
@@ -33,6 +32,7 @@ if (!file.exists(modelfile)) {
|
|
|
33
32
|
sobj <- NULL
|
|
34
33
|
outtype <- tolower(tools::file_ext(outfile)) # .rds, .h5ad, .h5seurat
|
|
35
34
|
if (!endsWith(sobjfile, ".h5ad")) {
|
|
35
|
+
log_info("Convert input to H5AD ...")
|
|
36
36
|
library(SeuratDisk)
|
|
37
37
|
|
|
38
38
|
assay <- celltypist_args$assay
|
|
@@ -123,8 +123,7 @@ if (file.exists(celltypist_outfile) &&
|
|
|
123
123
|
if (isTRUE(celltypist_args$majority_voting)) {
|
|
124
124
|
command <- paste(command, "-v")
|
|
125
125
|
}
|
|
126
|
-
|
|
127
|
-
print(command)
|
|
126
|
+
log_info("Running celltypist:")
|
|
128
127
|
log_debug("- {command}")
|
|
129
128
|
rc <- system(command)
|
|
130
129
|
if (rc != 0) {
|
|
@@ -135,11 +134,21 @@ if (file.exists(celltypist_outfile) &&
|
|
|
135
134
|
if (outtype == "h5ad") {
|
|
136
135
|
# log_info("Using H5AD from celltypist as output directly ...")
|
|
137
136
|
# file.rename(paste0(out_prefix, ".h5ad"), outfile)
|
|
137
|
+
if (merge_same_labels) {
|
|
138
|
+
log_warn("- Merging clusters with the same labels is not supported for h5ad outfile ...")
|
|
139
|
+
}
|
|
138
140
|
} else if (outtype == "h5seurat") {
|
|
139
141
|
log_info("Converting H5AD from celltypist to H5Seurat ...")
|
|
140
142
|
# outfile is cleaned by the pipeline anyway
|
|
141
143
|
Convert(
|
|
142
|
-
celltypist_outfile,
|
|
144
|
+
celltypist_outfile,
|
|
145
|
+
assay = assay %||% 'RNA',
|
|
146
|
+
dest = outfile,
|
|
147
|
+
overwrite = TRUE
|
|
148
|
+
)
|
|
149
|
+
if (merge_same_labels) {
|
|
150
|
+
log_warn("- Merging clusters with the same labels is not supported for h5seurat outfile ...")
|
|
151
|
+
}
|
|
143
152
|
} else if (outtype == "rds") {
|
|
144
153
|
if (is.null(sobj)) {
|
|
145
154
|
log_info("Converting H5AD from celltypist to RDS ...")
|
|
@@ -178,7 +187,10 @@ if (outtype == "h5ad") {
|
|
|
178
187
|
# end
|
|
179
188
|
|
|
180
189
|
sobj <- LoadH5Seurat(h5seurat_file)
|
|
181
|
-
|
|
190
|
+
if (merge_same_labels) {
|
|
191
|
+
log_info("Merging clusters with the same labels ...")
|
|
192
|
+
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
193
|
+
}
|
|
182
194
|
} else {
|
|
183
195
|
log_info("Attaching celltypist results to Seurat object ...")
|
|
184
196
|
|
|
@@ -228,9 +240,13 @@ if (outtype == "h5ad") {
|
|
|
228
240
|
} else if (!is.null(newcol)) {
|
|
229
241
|
sobj@meta.data[[newcol]] <- sobj@meta.data[["predicted_labels"]]
|
|
230
242
|
}
|
|
231
|
-
|
|
232
|
-
|
|
243
|
+
if (merge_same_labels) {
|
|
244
|
+
log_info("Merging clusters with the same labels ...")
|
|
245
|
+
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
246
|
+
}
|
|
233
247
|
}
|
|
248
|
+
log_info("Saving Seurat object in RDS ...")
|
|
249
|
+
saveRDS(sobj, outfile)
|
|
234
250
|
} else {
|
|
235
251
|
stop(paste0("Unknown output type: ", outtype))
|
|
236
252
|
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
merge_clusters_with_same_labels <- function(sobj, newcol) {
|
|
2
|
+
if (is.null(newcol)) {
|
|
3
|
+
sobj@meta.data$seurat_clusters <- sub("\\.\\d+$", "", sobj@meta.data$seurat_clusters)
|
|
4
|
+
Idents(sobj) <- "seurat_clusters"
|
|
5
|
+
} else {
|
|
6
|
+
sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
sobj
|
|
10
|
+
}
|
|
@@ -1,14 +1,17 @@
|
|
|
1
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
1
|
library(Seurat)
|
|
3
2
|
|
|
4
3
|
sobjfile <- {{in.sobjfile | r}}
|
|
5
4
|
outfile <- {{out.outfile | r}}
|
|
6
5
|
celltypes <- {{envs.cell_types | r}}
|
|
7
6
|
newcol <- {{envs.newcol | r}}
|
|
7
|
+
merge_same_labels <- {{envs.merge | r}}
|
|
8
8
|
|
|
9
9
|
if (is.null(celltypes) || length(celltypes) == 0) {
|
|
10
10
|
log_warn("No cell types are given!")
|
|
11
11
|
|
|
12
|
+
if (merge_same_labels) {
|
|
13
|
+
log_warn("Ignoring 'envs.merge' because no cell types are given!")
|
|
14
|
+
}
|
|
12
15
|
# create a symbolic link to the input file
|
|
13
16
|
file.symlink(sobjfile, outfile)
|
|
14
17
|
} else {
|
|
@@ -55,5 +58,10 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
55
58
|
Idents(sobj) <- "seurat_clusters"
|
|
56
59
|
}
|
|
57
60
|
|
|
61
|
+
if (merge_same_labels) {
|
|
62
|
+
log_info("Merging clusters with the same labels ...")
|
|
63
|
+
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
64
|
+
}
|
|
65
|
+
|
|
58
66
|
saveRDS(sobj, outfile)
|
|
59
67
|
}
|
|
@@ -2,21 +2,20 @@ library(Seurat)
|
|
|
2
2
|
library(dplyr)
|
|
3
3
|
library(hitype)
|
|
4
4
|
|
|
5
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
6
|
-
|
|
7
5
|
sobjfile = {{in.sobjfile | r}}
|
|
8
6
|
outfile = {{out.outfile | r}}
|
|
9
7
|
tissue = {{envs.hitype_tissue | r}}
|
|
10
8
|
db = {{envs.hitype_db | r}}
|
|
11
9
|
newcol = {{envs.newcol | r}}
|
|
10
|
+
merge_same_labels = {{envs.merge | r}}
|
|
12
11
|
|
|
13
12
|
if (is.null(db)) { stop("`envs.hitype_db` is not set") }
|
|
14
13
|
|
|
15
|
-
|
|
14
|
+
log_info("Reading Seurat object...")
|
|
16
15
|
sobj = readRDS(sobjfile)
|
|
17
16
|
|
|
18
17
|
# prepare gene sets
|
|
19
|
-
|
|
18
|
+
log_info("Preparing gene sets...")
|
|
20
19
|
if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
|
|
21
20
|
gs_list = gs_prepare(eval(as.symbol(db)), tissue)
|
|
22
21
|
} else {
|
|
@@ -24,10 +23,10 @@ if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
|
|
|
24
23
|
}
|
|
25
24
|
|
|
26
25
|
# run RunHitype
|
|
27
|
-
|
|
26
|
+
log_info("Running RunHitype...")
|
|
28
27
|
sobj = RunHitype(sobj, gs_list, threshold = 0.0, make_unique = TRUE)
|
|
29
28
|
|
|
30
|
-
|
|
29
|
+
log_info("Renaming cell types...")
|
|
31
30
|
hitype_levels = sobj@meta.data %>%
|
|
32
31
|
select(seurat_clusters, hitype) %>%
|
|
33
32
|
distinct(seurat_clusters, .keep_all = TRUE) %>%
|
|
@@ -42,10 +41,15 @@ if (is.null(newcol)) {
|
|
|
42
41
|
sobj[[newcol]] = factor(sobj$hitype, levels = hitype_levels)
|
|
43
42
|
}
|
|
44
43
|
|
|
45
|
-
|
|
44
|
+
if (merge_same_labels) {
|
|
45
|
+
log_info("Merging clusters with the same labels...")
|
|
46
|
+
sobj = merge_clusters_with_same_labels(sobj, newcol)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
log_info("Saving Seurat object...")
|
|
46
50
|
saveRDS(sobj, outfile)
|
|
47
51
|
|
|
48
|
-
|
|
52
|
+
log_info("Saving the mappings ...")
|
|
49
53
|
if (is.null(newcol)) {
|
|
50
54
|
celltypes = sobj@meta.data %>%
|
|
51
55
|
group_by(seurat_clusters_id) %>%
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
1
|
library(scCATCH)
|
|
3
2
|
library(Seurat)
|
|
4
3
|
|
|
@@ -6,6 +5,7 @@ sobjfile = {{in.sobjfile | r}}
|
|
|
6
5
|
outfile = {{out.outfile | r}}
|
|
7
6
|
sccatch_args = {{envs.sccatch_args | r}}
|
|
8
7
|
newcol = {{envs.newcol | r}}
|
|
8
|
+
merge_same_labels = {{envs.merge | r}}
|
|
9
9
|
|
|
10
10
|
if (!is.null(sccatch_args$marker)) {
|
|
11
11
|
cellmatch = readRDS(sccatch_args$marker)
|
|
@@ -17,14 +17,20 @@ if (is.integer(sccatch_args$use_method)) {
|
|
|
17
17
|
sccatch_args$use_method = as.character(sccatch_args$use_method)
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
+
log_info("Reading Seurat object...")
|
|
20
21
|
sobj = readRDS(sobjfile)
|
|
21
22
|
|
|
23
|
+
log_info("Running createscCATCH ...")
|
|
22
24
|
obj = createscCATCH(data = GetAssayData(sobj), cluster = as.character(Idents(sobj)))
|
|
23
25
|
sccatch_args$object = obj
|
|
24
26
|
|
|
27
|
+
log_info("Running findmarkergene ...")
|
|
25
28
|
obj = do_call(findmarkergene, sccatch_args)
|
|
29
|
+
|
|
30
|
+
log_info("Running findcelltype ...")
|
|
26
31
|
obj = findcelltype(object = obj)
|
|
27
32
|
|
|
33
|
+
log_info("Saving the mappings ...")
|
|
28
34
|
write.table(
|
|
29
35
|
obj@celltype,
|
|
30
36
|
file = file.path(dirname(outfile), "cluster2celltype.tsv"),
|
|
@@ -36,7 +42,7 @@ celltypes = as.list(obj@celltype$cell_type)
|
|
|
36
42
|
names(celltypes) = obj@celltype$cluster
|
|
37
43
|
|
|
38
44
|
if (length(celltypes) == 0) {
|
|
39
|
-
|
|
45
|
+
log_warn("- No cell types annotated from the database!")
|
|
40
46
|
} else {
|
|
41
47
|
if (is.null(newcol)) {
|
|
42
48
|
sobj$seurat_clusters_id = Idents(sobj)
|
|
@@ -49,5 +55,12 @@ if (length(celltypes) == 0) {
|
|
|
49
55
|
sobj[[newcol]] = Idents(sobj)
|
|
50
56
|
Idents(sobj) = "seurat_clusters"
|
|
51
57
|
}
|
|
58
|
+
|
|
59
|
+
if (merge_same_labels) {
|
|
60
|
+
log_info("Merging clusters with the same labels ...")
|
|
61
|
+
sobj = merge_clusters_with_same_labels(sobj, newcol)
|
|
62
|
+
}
|
|
52
63
|
}
|
|
64
|
+
|
|
65
|
+
log_info("Saving Seurat object ...")
|
|
53
66
|
saveRDS(sobj, outfile)
|
|
@@ -1,34 +1,37 @@
|
|
|
1
1
|
library(dplyr)
|
|
2
2
|
library(HGNChelper)
|
|
3
3
|
library(Seurat)
|
|
4
|
+
library(rlang)
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
source("{{biopipen_dir}}/scripts/scrna/sctype.R")
|
|
6
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "sctype.R" | source_r }}
|
|
7
7
|
|
|
8
8
|
sobjfile = {{in.sobjfile | r}}
|
|
9
9
|
outfile = {{out.outfile | r}}
|
|
10
10
|
tissue = {{envs.sctype_tissue | r}}
|
|
11
11
|
db = {{envs.sctype_db | r}}
|
|
12
12
|
newcol = {{envs.newcol | r}}
|
|
13
|
+
merge_same_labels = {{envs.merge | r}}
|
|
13
14
|
|
|
14
15
|
if (is.null(db)) { stop("`envs.sctype_args.db` is not set") }
|
|
15
16
|
|
|
16
|
-
|
|
17
|
+
log_info("Reading Seurat object...")
|
|
17
18
|
sobj = readRDS(sobjfile)
|
|
18
19
|
|
|
19
20
|
# prepare gene sets
|
|
20
|
-
|
|
21
|
+
log_info("Preparing gene sets...")
|
|
21
22
|
gs_list = gene_sets_prepare(db, tissue)
|
|
22
23
|
|
|
23
24
|
scRNAseqData = GetAssayData(sobj, layer = "scale.data")
|
|
24
25
|
idents = as.character(unique(Idents(sobj)))
|
|
25
26
|
idents = idents[order(as.numeric(idents))]
|
|
26
27
|
|
|
28
|
+
log_info("Working on different levels of cell type labels ...")
|
|
27
29
|
cell_types_list = list()
|
|
28
30
|
for (i in seq_along(gs_list)) {
|
|
31
|
+
log_info("- Working on level {i} ...")
|
|
29
32
|
if (is.null(gs_list[[i]])) next
|
|
30
33
|
|
|
31
|
-
|
|
34
|
+
log_info(" Calculating cell-type scores ...")
|
|
32
35
|
es.max = sctype_score(
|
|
33
36
|
scRNAseqData = scRNAseqData,
|
|
34
37
|
scaled = TRUE,
|
|
@@ -36,7 +39,7 @@ for (i in seq_along(gs_list)) {
|
|
|
36
39
|
gs2 = gs_list[[i]]$gs_negative
|
|
37
40
|
)
|
|
38
41
|
|
|
39
|
-
|
|
42
|
+
log_info(" Merging cell-type scores by cluster ...")
|
|
40
43
|
cl_resutls = do_call(
|
|
41
44
|
"rbind",
|
|
42
45
|
lapply(
|
|
@@ -59,12 +62,12 @@ for (i in seq_along(gs_list)) {
|
|
|
59
62
|
write("\n####### sctype_scores_count ########", stderr())
|
|
60
63
|
write(capture.output(sctype_scores_count), stderr())
|
|
61
64
|
write("\n####################################", stderr())
|
|
62
|
-
|
|
65
|
+
log_info(" Scores tied in the above clusters.", immediate. = TRUE)
|
|
63
66
|
}
|
|
64
67
|
|
|
65
68
|
if (length(gs_list) == 1 || i > 1) {
|
|
66
69
|
# set low-confident (low ScType score) clusters to "unknown"
|
|
67
|
-
|
|
70
|
+
log_info(" Setting low-confident clusters to 'Unknown'...")
|
|
68
71
|
sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
|
|
69
72
|
}
|
|
70
73
|
|
|
@@ -82,7 +85,7 @@ for (i in seq_along(gs_list)) {
|
|
|
82
85
|
if (length(cell_types_list) == 1) {
|
|
83
86
|
celltypes = cell_types_list[[1]]
|
|
84
87
|
} else {
|
|
85
|
-
|
|
88
|
+
log_info("Merging cell types at all levels ...")
|
|
86
89
|
celltypes = list()
|
|
87
90
|
|
|
88
91
|
for (i in idents) {
|
|
@@ -97,7 +100,18 @@ if (length(cell_types_list) == 1) {
|
|
|
97
100
|
}
|
|
98
101
|
|
|
99
102
|
|
|
100
|
-
|
|
103
|
+
log_info("Renaming cell types...")
|
|
104
|
+
ct_numbering = list()
|
|
105
|
+
for (key in names(celltypes)) {
|
|
106
|
+
ct = celltypes[[key]]
|
|
107
|
+
ct_numbering[[ct]] = ct_numbering[[ct]] %||% 0
|
|
108
|
+
if (ct_numbering[[ct]] > 0) {
|
|
109
|
+
celltypes[[key]] = paste0(ct, ".", ct_numbering[[ct]])
|
|
110
|
+
}
|
|
111
|
+
ct_numbering[[ct]] = ct_numbering[[ct]] + 1
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
celltypes = as.list(celltypes)
|
|
101
115
|
if (is.null(newcol)) {
|
|
102
116
|
sobj$seurat_clusters_id = sobj$seurat_clusters
|
|
103
117
|
celltypes$object = sobj
|
|
@@ -109,12 +123,18 @@ if (is.null(newcol)) {
|
|
|
109
123
|
sobj[[newcol]] = Idents(sobj)
|
|
110
124
|
Idents(sobj) = "seurat_clusters"
|
|
111
125
|
}
|
|
112
|
-
|
|
113
|
-
print("- Saving Seurat object...")
|
|
114
|
-
saveRDS(sobj, outfile)
|
|
115
|
-
|
|
116
|
-
print("- Saving the mappings ...")
|
|
117
126
|
celltypes$object = NULL
|
|
127
|
+
gc()
|
|
128
|
+
|
|
129
|
+
if (merge_same_labels) {
|
|
130
|
+
log_info("Merging clusters with the same labels...")
|
|
131
|
+
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
132
|
+
celltypes <- lapply(celltypes, function(ct) {
|
|
133
|
+
sub("\\.\\d+$", "", ct)
|
|
134
|
+
})
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
log_info("Saving the mappings ...")
|
|
118
138
|
write.table(
|
|
119
139
|
data.frame(
|
|
120
140
|
Cluster = names(celltypes),
|
|
@@ -126,3 +146,6 @@ write.table(
|
|
|
126
146
|
quote = FALSE,
|
|
127
147
|
row.names = FALSE
|
|
128
148
|
)
|
|
149
|
+
|
|
150
|
+
log_info("Saving Seurat object...")
|
|
151
|
+
saveRDS(sobj, outfile)
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
set.seed(8525)
|
|
2
2
|
|
|
3
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
4
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "CellTypeAnnotation-common.R" | source_r }}
|
|
5
|
+
|
|
3
6
|
{% if envs.tool == "hitype" %}
|
|
4
7
|
{% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-hitype.R" %}
|
|
5
8
|
{% elif envs.tool == "sctype" %}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
3
|
+
|
|
3
4
|
library(Seurat)
|
|
4
5
|
library(rlang)
|
|
5
6
|
library(tidyr)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "caching.R" | source_r }}
|
|
3
|
+
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
4
4
|
|
|
5
5
|
library(rlang)
|
|
6
6
|
library(dplyr)
|
|
@@ -70,7 +70,8 @@ if (defassay == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@commands)) {
|
|
|
70
70
|
|
|
71
71
|
srtobj <- PrepSCTFindMarkers(srtobj)
|
|
72
72
|
# compose a new SeuratCommand to record it to srtobj@commands
|
|
73
|
-
|
|
73
|
+
commands <- names(pbmc_small@commands)
|
|
74
|
+
scommand <- pbmc_small@commands[[commands[length(commands)]]]
|
|
74
75
|
scommand@name <- "PrepSCTFindMarkers"
|
|
75
76
|
scommand@time.stamp <- Sys.time()
|
|
76
77
|
scommand@assay.used <- "SCT"
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
3
3
|
|
|
4
4
|
library(rlang)
|
|
5
5
|
library(dplyr)
|
|
@@ -36,6 +36,20 @@ set.seed(8525)
|
|
|
36
36
|
|
|
37
37
|
log_info("- Reading Seurat object ...")
|
|
38
38
|
srtobj <- readRDS(srtfile)
|
|
39
|
+
if (DefaultAssay(srtobj) == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@commands)) {
|
|
40
|
+
log_warn("- SCTransform used but PrepSCTFindMarkers not applied, running ...")
|
|
41
|
+
|
|
42
|
+
srtobj <- PrepSCTFindMarkers(srtobj)
|
|
43
|
+
# compose a new SeuratCommand to record it to srtobj@commands
|
|
44
|
+
commands <- names(pbmc_small@commands)
|
|
45
|
+
scommand <- pbmc_small@commands[[commands[length(commands)]]]
|
|
46
|
+
scommand@name <- "PrepSCTFindMarkers"
|
|
47
|
+
scommand@time.stamp <- Sys.time()
|
|
48
|
+
scommand@assay.used <- "SCT"
|
|
49
|
+
scommand@call.string <- "PrepSCTFindMarkers(object = srtobj)"
|
|
50
|
+
scommand@params <- list()
|
|
51
|
+
srtobj@commands$PrepSCTFindMarkers <- scommand
|
|
52
|
+
}
|
|
39
53
|
|
|
40
54
|
log_info("- Mutate meta data if needed ...")
|
|
41
55
|
if (!is.null(mutaters) && length(mutaters)) {
|
|
@@ -79,13 +93,13 @@ expand_each <- function(name, case) {
|
|
|
79
93
|
by = make.names(paste0("..", name, "_", case$each, "_", each))
|
|
80
94
|
idents <- case$idents
|
|
81
95
|
if (is.null(idents) || length(idents) == 0) {
|
|
82
|
-
srtobj@meta.data
|
|
96
|
+
srtobj@meta.data <<- srtobj@meta.data %>%
|
|
83
97
|
mutate(
|
|
84
98
|
!!sym(by) := if_else(!!sym(case$each) == each, !!sym(case$group_by), NA)
|
|
85
99
|
)
|
|
86
100
|
idents <- srtobj@meta.data %>% pull(case$group_by) %>% unique() %>% na.omit()
|
|
87
101
|
} else {
|
|
88
|
-
srtobj@meta.data
|
|
102
|
+
srtobj@meta.data <<- srtobj@meta.data %>%
|
|
89
103
|
mutate(
|
|
90
104
|
!!sym(by) := if_else(
|
|
91
105
|
!!sym(case$each) == each & !!sym(case$group_by) %in% case$idents,
|
|
@@ -204,6 +218,10 @@ do_case <- function(casename) {
|
|
|
204
218
|
if (is.null(df)) {
|
|
205
219
|
msg <- "No markers found. May be due to too few cells or features."
|
|
206
220
|
} else {
|
|
221
|
+
df <- df[
|
|
222
|
+
apply(df, 1, function(x) !all(is.na(x)) && !all(x == x[1])), ,
|
|
223
|
+
drop = FALSE
|
|
224
|
+
]
|
|
207
225
|
genes <- rownames(df)
|
|
208
226
|
# rows: cells, cols: genes
|
|
209
227
|
df <- cbind(as.data.frame(scale(Matrix::t(df))), sobj@meta.data[, case$group_by])
|
biopipen/scripts/scrna/ScFGSEA.R
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
|
|
3
|
+
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
4
|
+
|
|
4
5
|
library(rlang)
|
|
5
6
|
library(Seurat)
|
|
6
7
|
library(tidyseurat)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# srtobj, clustrees_defaults, clustrees
|
|
2
|
+
log_info("clustrees:")
|
|
3
|
+
if (
|
|
4
|
+
(is.null(clustrees) || length(clustrees) == 0) &&
|
|
5
|
+
(is.null(clustrees_defaults$prefix) || clustrees_defaults$prefix == "")) {
|
|
6
|
+
log_warn("- no cases, skipping intentionally ...")
|
|
7
|
+
} else { # clustrees set or prefix is not empty
|
|
8
|
+
library(clustree)
|
|
9
|
+
odir = file.path(outdir, "clustrees")
|
|
10
|
+
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
11
|
+
|
|
12
|
+
if ((is.null(clustrees) || length(clustrees) == 0) && clustrees_defaults$prefix == "_auto") {
|
|
13
|
+
clustrees <- list()
|
|
14
|
+
for (key in names(srtobj@commands)) {
|
|
15
|
+
if (startsWith(key, "FindClusters") && length(srtobj@commands[[key]]$resolution) > 1) {
|
|
16
|
+
pref <- substring(key, 14)
|
|
17
|
+
if (pref == "") {
|
|
18
|
+
pref <- "seurat_clusters"
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
clustrees[[pref]] <- list(prefix = pref)
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
if (length(clustrees) == 0) {
|
|
26
|
+
log_warn("- no cases found, skipping ...")
|
|
27
|
+
} else {
|
|
28
|
+
reports <- list()
|
|
29
|
+
for (name in names(clustrees)) {
|
|
30
|
+
if (is.null(clustrees[[name]]$prefix)) {
|
|
31
|
+
stop(paste0("clustrees: prefix is required for case: ", name))
|
|
32
|
+
}
|
|
33
|
+
case <- list_update(clustrees_defaults, clustrees[[name]])
|
|
34
|
+
|
|
35
|
+
devpars <- case$devpars
|
|
36
|
+
devpars$width <- devpars$width %||% clustrees_defaults$devpars$width %||% 800
|
|
37
|
+
devpars$height <- devpars$height %||% clustrees_defaults$devpars$height %||% 1000
|
|
38
|
+
devpars$res <- devpars$res %||% clustrees_defaults$devpars$res %||% 100
|
|
39
|
+
case$devpars <- NULL
|
|
40
|
+
prefix <- sub("\\.$", "", case$prefix)
|
|
41
|
+
log_info("- Case: {name} ...")
|
|
42
|
+
case$prefix <- paste0(prefix, ".")
|
|
43
|
+
case$x <- srtobj@meta.data %>% select(starts_with(case$prefix))
|
|
44
|
+
case$x <- case$x[complete.cases(case$x), , drop = FALSE]
|
|
45
|
+
|
|
46
|
+
command <- srtobj@commands[[paste0("FindClusters.", prefix)]] %||%
|
|
47
|
+
(if(prefix == "seurat_clusters") srtobj@commands$FindClusters else NULL)
|
|
48
|
+
|
|
49
|
+
clustree_file <- file.path(odir, paste0(prefix, ".clustree.png"))
|
|
50
|
+
png(clustree_file, width = devpars$width, height = devpars$height, res = devpars$res)
|
|
51
|
+
p <- do_call(clustree, case)
|
|
52
|
+
print(p)
|
|
53
|
+
dev.off()
|
|
54
|
+
|
|
55
|
+
if (is.null(command)) {
|
|
56
|
+
resolution <- substring(colnames(case$x), nchar(case$prefix) + 1)
|
|
57
|
+
} else {
|
|
58
|
+
resolution <- command$resolution
|
|
59
|
+
}
|
|
60
|
+
resolution_used <- resolution[length(resolution)]
|
|
61
|
+
|
|
62
|
+
reports[[length(reports) + 1]] <- list(
|
|
63
|
+
kind = "table_image",
|
|
64
|
+
src = clustree_file,
|
|
65
|
+
name = name,
|
|
66
|
+
descr = paste0("Resolutions: ", paste(resolution, collapse = ", "), "; resolution used: ", resolution_used)
|
|
67
|
+
)
|
|
68
|
+
}
|
|
69
|
+
reports$h1 <- "Clustree plots"
|
|
70
|
+
reports$ui <- "table_of_images"
|
|
71
|
+
do.call(add_report, reports)
|
|
72
|
+
}
|
|
73
|
+
}
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
# Loaded variables: srtfile, outdir, srtobj
|
|
2
2
|
|
|
3
|
-
dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
|
|
4
|
-
dimplots = {{envs.dimplots | r: todot="-", skip=1}}
|
|
3
|
+
# dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
|
|
4
|
+
# dimplots = {{envs.dimplots | r: todot="-", skip=1}}
|
|
5
|
+
log_info("dimplots:")
|
|
5
6
|
|
|
6
7
|
odir = file.path(outdir, "dimplots")
|
|
7
8
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
8
9
|
|
|
9
10
|
do_one_dimplot = function(name) {
|
|
10
|
-
log_info(
|
|
11
|
+
log_info("- Case: {name}")
|
|
11
12
|
|
|
12
13
|
case = list_update(dimplots_defaults, dimplots[[name]])
|
|
13
14
|
case$devpars = list_update(dimplots_defaults$devpars, dimplots[[name]]$devpars)
|