biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +290 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
|
@@ -2,16 +2,23 @@ library(rlang)
|
|
|
2
2
|
library(hdf5r)
|
|
3
3
|
library(dplyr)
|
|
4
4
|
library(Seurat)
|
|
5
|
+
library(biopipen.utils)
|
|
5
6
|
|
|
6
7
|
sobjfile <- {{in.sobjfile | r}}
|
|
7
8
|
outfile <- {{out.outfile | r}}
|
|
8
9
|
newcol <- {{envs.newcol | r}}
|
|
9
10
|
merge_same_labels <- {{envs.merge | r}}
|
|
10
11
|
celltypist_args <- {{envs.celltypist_args | r}}
|
|
12
|
+
outtype <- {{envs.outtype | r }}
|
|
13
|
+
if (identical(outtype, "input")) {
|
|
14
|
+
outtype <- tolower(tools::file_ext(outfile)) # rds, h5ad, qs/qs2
|
|
15
|
+
}
|
|
11
16
|
|
|
12
17
|
outdir <- dirname(outfile)
|
|
13
18
|
outprefix <- file.path(outdir, tools::file_path_sans_ext(basename(outfile)))
|
|
14
19
|
|
|
20
|
+
log <- get_logger()
|
|
21
|
+
|
|
15
22
|
if (is.null(celltypist_args$model)) {
|
|
16
23
|
stop("Please specify a model for celltypist (envs.celltypist_args.model)")
|
|
17
24
|
} else if (!file.exists(celltypist_args$model)) {
|
|
@@ -30,74 +37,61 @@ if (!file.exists(modelfile)) {
|
|
|
30
37
|
}
|
|
31
38
|
|
|
32
39
|
sobj <- NULL
|
|
33
|
-
outtype <- tolower(tools::file_ext(outfile)) # .rds, .h5ad, .h5seurat
|
|
34
40
|
if (!endsWith(sobjfile, ".h5ad")) {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
if (!file.exists(h5s_file)) {
|
|
45
|
-
log_info("Reading RDS file ...")
|
|
46
|
-
sobj <- readRDS(sobjfile)
|
|
47
|
-
assay <- assay %||% DefaultAssay(sobj)
|
|
48
|
-
# In order to convert to h5ad
|
|
49
|
-
# https://github.com/satijalab/seurat/issues/8220#issuecomment-1871874649
|
|
50
|
-
sobj_v3 <- sobj
|
|
51
|
-
sobj_v3$RNAv3 <- as(object = sobj[[assay]], Class = "Assay")
|
|
52
|
-
DefaultAssay(sobj_v3) <- "RNAv3"
|
|
53
|
-
sobj_v3$RNA <- NULL
|
|
54
|
-
sobj_v3 <- RenameAssays(sobj_v3, RNAv3 = "RNA")
|
|
55
|
-
|
|
56
|
-
log_info("Saving to H5Seurat file ...")
|
|
57
|
-
SaveH5Seurat(sobj_v3, h5s_file)
|
|
58
|
-
rm(sobj_v3)
|
|
59
|
-
} else if (outtype == "rds") {
|
|
60
|
-
log_info("Reading RDS file ...")
|
|
61
|
-
sobj <- readRDS(sobjfile)
|
|
62
|
-
assay <- assay %||% DefaultAssay(sobj)
|
|
63
|
-
log_info("Using existing H5Seurat file ...")
|
|
64
|
-
} else {
|
|
65
|
-
log_info("Using existing H5Seurat file ...")
|
|
41
|
+
sobj <- read_obj(sobjfile)
|
|
42
|
+
if (is.null(celltypist_args$over_clustering)) {
|
|
43
|
+
# find the default ident name in meta.data
|
|
44
|
+
for (col in colnames(sobj@meta.data)) {
|
|
45
|
+
if (!is.factor(sobj@meta.data[[col]])) { next }
|
|
46
|
+
if (isTRUE(all.equal(Idents(sobj), sobj@meta.data[[col]]))) {
|
|
47
|
+
celltypist_args$over_clustering <- col
|
|
48
|
+
break
|
|
49
|
+
}
|
|
66
50
|
}
|
|
67
|
-
sobjfile <- h5s_file
|
|
68
51
|
}
|
|
69
|
-
if (
|
|
70
|
-
|
|
71
|
-
tools::file_ext(sobjfile),
|
|
72
|
-
". Supported formats: .rds, .RDS, .h5ad, .h5seurat"))
|
|
52
|
+
if (is.null(celltypist_args$over_clustering)) {
|
|
53
|
+
celltypist_args$over_clustering <- FALSE
|
|
73
54
|
}
|
|
74
|
-
if (!
|
|
55
|
+
if (!isFALSE(celltypist_args$over_clustering)) {
|
|
56
|
+
destfile <- paste0(outprefix, ".", celltypist_args$over_clustering, ".h5ad")
|
|
57
|
+
} else {
|
|
75
58
|
destfile <- paste0(outprefix, ".h5ad")
|
|
76
|
-
if (file.exists(destfile) && (file.mtime(destfile) < file.mtime(sobjfile))) {
|
|
77
|
-
file.remove(destfile)
|
|
78
|
-
}
|
|
79
|
-
if (file.exists(destfile)) {
|
|
80
|
-
log_info("Using existing H5AD file ...")
|
|
81
|
-
} else {
|
|
82
|
-
log_info("Converting to H5AD file ...")
|
|
83
|
-
Convert(sobjfile, dest = destfile, assay = assay %||% "RNA")
|
|
84
|
-
}
|
|
85
|
-
sobjfile <- destfile
|
|
86
59
|
}
|
|
60
|
+
|
|
61
|
+
if (file.exists(destfile) && (file.mtime(destfile) < file.mtime(sobjfile))) {
|
|
62
|
+
file.remove(destfile)
|
|
63
|
+
}
|
|
64
|
+
if (file.exists(destfile)) {
|
|
65
|
+
log$warn("Using existing H5AD file: {destfile} ...")
|
|
66
|
+
} else {
|
|
67
|
+
log$info("Converting to H5AD file ...")
|
|
68
|
+
ConvertSeuratToAnnData(
|
|
69
|
+
sobj,
|
|
70
|
+
outfile = destfile,
|
|
71
|
+
assay = celltypist_args$assay %||% "RNA",
|
|
72
|
+
log = log
|
|
73
|
+
)
|
|
74
|
+
}
|
|
75
|
+
sobjfile <- destfile
|
|
87
76
|
}
|
|
88
77
|
|
|
89
78
|
# sobjfile h5ad ensured
|
|
90
79
|
# use celltypist to annotate
|
|
91
|
-
|
|
80
|
+
log$info("Annotating cell types using celltypist ...")
|
|
81
|
+
# celltypist_script <- file.path(
|
|
82
|
+
# "{ {biopipen_dir} }", "scripts", "scrna", "celltypist-wrapper.py"
|
|
83
|
+
# )
|
|
84
|
+
# In case this script is running in the cloud and <biopipen_dir> can not be found in there
|
|
85
|
+
# In stead, we use the python command, which is associated with the cloud environment,
|
|
86
|
+
# to get the biopipen directory
|
|
87
|
+
biopipen_dir <- get_biopipen_dir(celltypist_args$python)
|
|
92
88
|
celltypist_script <- file.path(
|
|
93
|
-
|
|
89
|
+
biopipen_dir, "scripts", "scrna", "celltypist-wrapper.py"
|
|
94
90
|
)
|
|
95
91
|
|
|
96
92
|
if (outtype == "h5ad") {
|
|
97
93
|
celltypist_outfile <- outfile
|
|
98
|
-
} else if (outtype == "
|
|
99
|
-
celltypist_outfile <- paste0(outprefix, ".celltypist.h5ad")
|
|
100
|
-
} else if (outtype == "rds") {
|
|
94
|
+
} else if (outtype == "rds" || outtype == "qs" || outtype == "qs2") {
|
|
101
95
|
ext <- if (is.null(sobj)) ".h5ad" else ".txt"
|
|
102
96
|
celltypist_outfile <- paste0(outprefix, ".celltypist", ext)
|
|
103
97
|
} else {
|
|
@@ -106,7 +100,7 @@ if (outtype == "h5ad") {
|
|
|
106
100
|
|
|
107
101
|
if (file.exists(celltypist_outfile) &&
|
|
108
102
|
(file.mtime(celltypist_outfile) > file.mtime(sobjfile))) {
|
|
109
|
-
|
|
103
|
+
log$warn("Using existing celltypist results: {celltypist_outfile} ...")
|
|
110
104
|
} else {
|
|
111
105
|
command <- paste(
|
|
112
106
|
paste0("CELLTYPIST_FOLDER='", outdir, "'"),
|
|
@@ -123,76 +117,29 @@ if (file.exists(celltypist_outfile) &&
|
|
|
123
117
|
if (isTRUE(celltypist_args$majority_voting)) {
|
|
124
118
|
command <- paste(command, "-v")
|
|
125
119
|
}
|
|
126
|
-
|
|
127
|
-
|
|
120
|
+
log$info("Running celltypist:")
|
|
121
|
+
log$debug("- {command}")
|
|
128
122
|
rc <- system(command)
|
|
129
123
|
if (rc != 0) {
|
|
130
|
-
stop("Failed to run celltypist")
|
|
124
|
+
stop("Failed to run celltypist. Check the job.stderr file to see the error message.")
|
|
131
125
|
}
|
|
132
126
|
}
|
|
133
127
|
|
|
134
128
|
if (outtype == "h5ad") {
|
|
135
|
-
# log_info("Using H5AD from celltypist as output directly ...")
|
|
136
|
-
# file.rename(paste0(out_prefix, ".h5ad"), outfile)
|
|
137
|
-
if (merge_same_labels) {
|
|
138
|
-
log_warn("- Merging clusters with the same labels is not supported for h5ad outfile ...")
|
|
139
|
-
}
|
|
140
|
-
} else if (outtype == "h5seurat") {
|
|
141
|
-
log_info("Converting H5AD from celltypist to H5Seurat ...")
|
|
142
|
-
# outfile is cleaned by the pipeline anyway
|
|
143
|
-
Convert(
|
|
144
|
-
celltypist_outfile,
|
|
145
|
-
assay = assay %||% 'RNA',
|
|
146
|
-
dest = outfile,
|
|
147
|
-
overwrite = TRUE
|
|
148
|
-
)
|
|
149
129
|
if (merge_same_labels) {
|
|
150
|
-
|
|
130
|
+
log$warn("- Merging clusters with the same labels is not supported and is ignored for h5ad outfile ...")
|
|
151
131
|
}
|
|
152
|
-
} else if (outtype == "rds") {
|
|
132
|
+
} else if (outtype == "rds" || outtype == "qs" || outtype == "qs2") {
|
|
153
133
|
if (is.null(sobj)) {
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
Convert(
|
|
162
|
-
celltypist_outfile,
|
|
163
|
-
assay = assay %||% 'RNA', dest = h5seurat_file, overwrite = TRUE)
|
|
164
|
-
}
|
|
165
|
-
log_info("- Converting to RDS ...")
|
|
166
|
-
# Fix Missing required datasets 'levels' and 'values'
|
|
167
|
-
# https://github.com/mojaveazure/seurat-disk/issues/109#issuecomment-1722394184
|
|
168
|
-
f <- H5File$new(h5seurat_file, "r+")
|
|
169
|
-
groups <- f$ls(recursive = TRUE)
|
|
170
|
-
|
|
171
|
-
for (name in groups$name[grepl("categories", groups$name)]) {
|
|
172
|
-
names <- strsplit(name, "/")[[1]]
|
|
173
|
-
names <- c(names[1:length(names) - 1], "levels")
|
|
174
|
-
new_name <- paste(names, collapse = "/")
|
|
175
|
-
f[[new_name]] <- f[[name]]
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
for (name in groups$name[grepl("codes", groups$name)]) {
|
|
179
|
-
names <- strsplit(name, "/")[[1]]
|
|
180
|
-
names <- c(names[1:length(names) - 1], "values")
|
|
181
|
-
new_name <- paste(names, collapse = "/")
|
|
182
|
-
f[[new_name]] <- f[[name]]
|
|
183
|
-
grp <- f[[new_name]]
|
|
184
|
-
grp$write(args = list(1:grp$dims), value = grp$read() + 1)
|
|
185
|
-
}
|
|
186
|
-
f$close_all()
|
|
187
|
-
# end
|
|
188
|
-
|
|
189
|
-
sobj <- LoadH5Seurat(h5seurat_file)
|
|
190
|
-
if (merge_same_labels) {
|
|
191
|
-
log_info("Merging clusters with the same labels ...")
|
|
192
|
-
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
193
|
-
}
|
|
134
|
+
log$info("Reading H5AD from celltypist ...")
|
|
135
|
+
sobj <- ConvertAnnDataToSeurat(
|
|
136
|
+
infile = celltypist_outfile,
|
|
137
|
+
outfile = NULL,
|
|
138
|
+
assay = celltypist_args$assay %||% "RNA",
|
|
139
|
+
log = log
|
|
140
|
+
)
|
|
194
141
|
} else {
|
|
195
|
-
|
|
142
|
+
log$info("Attaching celltypist results to Seurat object ...")
|
|
196
143
|
|
|
197
144
|
celltypist_out <- read.table(
|
|
198
145
|
celltypist_outfile, sep = "\t", header = TRUE, row.names = 1)
|
|
@@ -205,48 +152,50 @@ if (outtype == "h5ad") {
|
|
|
205
152
|
drop = FALSE
|
|
206
153
|
]
|
|
207
154
|
)
|
|
155
|
+
}
|
|
208
156
|
|
|
209
|
-
|
|
210
|
-
|
|
157
|
+
if (celltypist_args$majority_voting) {
|
|
158
|
+
prediction <- "majority_voting"
|
|
211
159
|
|
|
212
|
-
|
|
213
|
-
|
|
160
|
+
if (!is.null(newcol)) {
|
|
161
|
+
sobj@meta.data[[newcol]] <- sobj@meta.data[[prediction]]
|
|
162
|
+
} else {
|
|
163
|
+
over_clustering <- celltypist_args$over_clustering
|
|
164
|
+
if (over_clustering %in% colnames(sobj@meta.data)) {
|
|
165
|
+
sobj@meta.data$seurat_clusters_id <- sobj@meta.data[[over_clustering]]
|
|
214
166
|
} else {
|
|
215
|
-
over_clustering <-
|
|
216
|
-
if (over_clustering %in% colnames(sobj@meta.data)) {
|
|
217
|
-
sobj@meta.data$seurat_clusters_id <- sobj@meta.data[[over_clustering]]
|
|
218
|
-
} else {
|
|
219
|
-
over_clustering <- "over_clustering"
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
# make a map of original cluster id to new cluster id
|
|
223
|
-
cluster_map <- data.frame(
|
|
224
|
-
seurat_clusters_id = sobj@meta.data[[over_clustering]],
|
|
225
|
-
seurat_clusters = sobj@meta.data[[prediction]]
|
|
226
|
-
) %>%
|
|
227
|
-
group_by(seurat_clusters_id) %>%
|
|
228
|
-
summarise(seurat_clusters = first(seurat_clusters), .groups = "drop") %>%
|
|
229
|
-
mutate(seurat_clusters = make.unique(seurat_clusters))
|
|
230
|
-
cluster_map <- split(cluster_map$seurat_clusters, cluster_map$seurat_clusters_id)
|
|
231
|
-
if (over_clustering != "seurat_clusters") {
|
|
232
|
-
sobj@meta.data$seurat_clusters <- sobj@meta.data[[over_clustering]]
|
|
233
|
-
}
|
|
234
|
-
Idents(sobj) <- "seurat_clusters"
|
|
235
|
-
cluster_map$object <- sobj
|
|
236
|
-
log_info("Renaming clusters ...")
|
|
237
|
-
sobj <- do_call(RenameIdents, cluster_map)
|
|
238
|
-
sobj@meta.data$seurat_clusters <- Idents(sobj)
|
|
167
|
+
over_clustering <- "over_clustering"
|
|
239
168
|
}
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
169
|
+
|
|
170
|
+
# make a map of original cluster id to new cluster id
|
|
171
|
+
cluster_map <- data.frame(
|
|
172
|
+
seurat_clusters_id = sobj@meta.data[[over_clustering]],
|
|
173
|
+
seurat_clusters = sobj@meta.data[[prediction]]
|
|
174
|
+
) %>%
|
|
175
|
+
group_by(seurat_clusters_id) %>%
|
|
176
|
+
summarise(seurat_clusters = first(seurat_clusters), .groups = "drop") %>%
|
|
177
|
+
mutate(seurat_clusters = make.unique(seurat_clusters))
|
|
178
|
+
cluster_map <- split(cluster_map$seurat_clusters, cluster_map$seurat_clusters_id)
|
|
179
|
+
if (over_clustering != "seurat_clusters") {
|
|
180
|
+
sobj@meta.data$seurat_clusters <- sobj@meta.data[[over_clustering]]
|
|
181
|
+
}
|
|
182
|
+
Idents(sobj) <- "seurat_clusters"
|
|
183
|
+
cluster_map$object <- sobj
|
|
184
|
+
log$info("Renaming clusters ...")
|
|
185
|
+
sobj <- do_call(RenameIdents, cluster_map)
|
|
186
|
+
sobj@meta.data$seurat_clusters <- Idents(sobj)
|
|
246
187
|
}
|
|
188
|
+
} else if (!is.null(newcol)) {
|
|
189
|
+
sobj@meta.data[[newcol]] <- sobj@meta.data[["predicted_labels"]]
|
|
247
190
|
}
|
|
248
|
-
|
|
249
|
-
|
|
191
|
+
|
|
192
|
+
if (merge_same_labels) {
|
|
193
|
+
log$info("Merging clusters with the same labels ...")
|
|
194
|
+
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
log$info("Saving the object ...")
|
|
198
|
+
save_obj(sobj, outfile)
|
|
250
199
|
} else {
|
|
251
200
|
stop(paste0("Unknown output type: ", outtype))
|
|
252
201
|
}
|
|
@@ -6,17 +6,19 @@ celltypes <- {{envs.cell_types | r}}
|
|
|
6
6
|
newcol <- {{envs.newcol | r}}
|
|
7
7
|
merge_same_labels <- {{envs.merge | r}}
|
|
8
8
|
|
|
9
|
+
log <- biopipen.utils::get_logger()
|
|
10
|
+
|
|
9
11
|
if (is.null(celltypes) || length(celltypes) == 0) {
|
|
10
|
-
|
|
12
|
+
log$warn("No cell types are given!")
|
|
11
13
|
|
|
12
14
|
if (merge_same_labels) {
|
|
13
|
-
|
|
15
|
+
log$warn("Ignoring 'envs.merge' because no cell types are given!")
|
|
14
16
|
}
|
|
15
17
|
# create a symbolic link to the input file
|
|
16
18
|
file.symlink(sobjfile, outfile)
|
|
17
19
|
} else {
|
|
18
|
-
|
|
19
|
-
sobj <-
|
|
20
|
+
log$info("Loading Seurat object ...")
|
|
21
|
+
sobj <- biopipen.utils::read_obj(sobjfile)
|
|
20
22
|
idents <- Idents(sobj)
|
|
21
23
|
if (is.factor(idents)) {
|
|
22
24
|
idents <- levels(idents)
|
|
@@ -28,7 +30,7 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
28
30
|
celltypes <- c(celltypes, idents[(length(celltypes) + 1):length(idents)])
|
|
29
31
|
} else if (length(celltypes) > length(idents)) {
|
|
30
32
|
celltypes <- celltypes[1:length(idents)]
|
|
31
|
-
|
|
33
|
+
log$warn("The length of cell types is longer than the number of clusters!")
|
|
32
34
|
}
|
|
33
35
|
for (i in seq_along(celltypes)) {
|
|
34
36
|
if (celltypes[i] == "-" || celltypes[i] == "") {
|
|
@@ -37,7 +39,7 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
37
39
|
}
|
|
38
40
|
names(celltypes) <- idents
|
|
39
41
|
|
|
40
|
-
|
|
42
|
+
log$info("Renaming cell types ...")
|
|
41
43
|
if (is.null(newcol)) {
|
|
42
44
|
has_na <- "NA" %in% unlist(celltypes) || anyNA(unlist(celltypes))
|
|
43
45
|
sobj$seurat_clusters_id <- Idents(sobj)
|
|
@@ -45,7 +47,7 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
45
47
|
sobj <- do_call(RenameIdents, celltypes)
|
|
46
48
|
sobj$seurat_clusters <- Idents(sobj)
|
|
47
49
|
if (has_na) {
|
|
48
|
-
|
|
50
|
+
log$info("Filtering clusters if NA ...")
|
|
49
51
|
sobj <- subset(
|
|
50
52
|
sobj,
|
|
51
53
|
subset = seurat_clusters != "NA" & !is.na(seurat_clusters)
|
|
@@ -59,9 +61,9 @@ if (is.null(celltypes) || length(celltypes) == 0) {
|
|
|
59
61
|
}
|
|
60
62
|
|
|
61
63
|
if (merge_same_labels) {
|
|
62
|
-
|
|
64
|
+
log$info("Merging clusters with the same labels ...")
|
|
63
65
|
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
64
66
|
}
|
|
65
67
|
|
|
66
|
-
|
|
68
|
+
biopipen.utils::save_obj(sobj, outfile)
|
|
67
69
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
library(Seurat)
|
|
2
2
|
library(dplyr)
|
|
3
3
|
library(hitype)
|
|
4
|
+
library(biopipen.utils)
|
|
4
5
|
|
|
5
6
|
sobjfile = {{in.sobjfile | r}}
|
|
6
7
|
outfile = {{out.outfile | r}}
|
|
@@ -11,11 +12,13 @@ merge_same_labels = {{envs.merge | r}}
|
|
|
11
12
|
|
|
12
13
|
if (is.null(db)) { stop("`envs.hitype_db` is not set") }
|
|
13
14
|
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
log <- get_logger()
|
|
16
|
+
|
|
17
|
+
log$info("Reading Seurat object...")
|
|
18
|
+
sobj = biopipen.utils::read_obj(sobjfile)
|
|
16
19
|
|
|
17
20
|
# prepare gene sets
|
|
18
|
-
|
|
21
|
+
log$info("Preparing gene sets...")
|
|
19
22
|
if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
|
|
20
23
|
gs_list = gs_prepare(eval(as.symbol(db)), tissue)
|
|
21
24
|
} else {
|
|
@@ -23,10 +26,10 @@ if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
|
|
|
23
26
|
}
|
|
24
27
|
|
|
25
28
|
# run RunHitype
|
|
26
|
-
|
|
29
|
+
log$info("Running RunHitype...")
|
|
27
30
|
sobj = RunHitype(sobj, gs_list, threshold = 0.0, make_unique = TRUE)
|
|
28
31
|
|
|
29
|
-
|
|
32
|
+
log$info("Renaming cell types...")
|
|
30
33
|
hitype_levels = sobj@meta.data %>%
|
|
31
34
|
select(seurat_clusters, hitype) %>%
|
|
32
35
|
distinct(seurat_clusters, .keep_all = TRUE) %>%
|
|
@@ -42,14 +45,14 @@ if (is.null(newcol)) {
|
|
|
42
45
|
}
|
|
43
46
|
|
|
44
47
|
if (merge_same_labels) {
|
|
45
|
-
|
|
48
|
+
log$info("Merging clusters with the same labels...")
|
|
46
49
|
sobj = merge_clusters_with_same_labels(sobj, newcol)
|
|
47
50
|
}
|
|
48
51
|
|
|
49
|
-
|
|
50
|
-
|
|
52
|
+
log$info("Saving Seurat object...")
|
|
53
|
+
biopipen.utils::save_obj(sobj, outfile)
|
|
51
54
|
|
|
52
|
-
|
|
55
|
+
log$info("Saving the mappings ...")
|
|
53
56
|
if (is.null(newcol)) {
|
|
54
57
|
celltypes = sobj@meta.data %>%
|
|
55
58
|
group_by(seurat_clusters_id) %>%
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
library(scCATCH)
|
|
2
2
|
library(Seurat)
|
|
3
|
+
library(biopipen.utils)
|
|
3
4
|
|
|
4
5
|
sobjfile = {{in.sobjfile | r}}
|
|
5
6
|
outfile = {{out.outfile | r}}
|
|
@@ -7,8 +8,10 @@ sccatch_args = {{envs.sccatch_args | r}}
|
|
|
7
8
|
newcol = {{envs.newcol | r}}
|
|
8
9
|
merge_same_labels = {{envs.merge | r}}
|
|
9
10
|
|
|
11
|
+
log <- get_logger()
|
|
12
|
+
|
|
10
13
|
if (!is.null(sccatch_args$marker)) {
|
|
11
|
-
cellmatch =
|
|
14
|
+
cellmatch = read_obj(sccatch_args$marker)
|
|
12
15
|
sccatch_args$if_use_custom_marker = TRUE
|
|
13
16
|
}
|
|
14
17
|
sccatch_args$marker = cellmatch
|
|
@@ -17,20 +20,20 @@ if (is.integer(sccatch_args$use_method)) {
|
|
|
17
20
|
sccatch_args$use_method = as.character(sccatch_args$use_method)
|
|
18
21
|
}
|
|
19
22
|
|
|
20
|
-
|
|
21
|
-
sobj =
|
|
23
|
+
log$info("Reading Seurat object...")
|
|
24
|
+
sobj = read_obj(sobjfile)
|
|
22
25
|
|
|
23
|
-
|
|
26
|
+
log$info("Running createscCATCH ...")
|
|
24
27
|
obj = createscCATCH(data = GetAssayData(sobj), cluster = as.character(Idents(sobj)))
|
|
25
28
|
sccatch_args$object = obj
|
|
26
29
|
|
|
27
|
-
|
|
30
|
+
log$info("Running findmarkergene ...")
|
|
28
31
|
obj = do_call(findmarkergene, sccatch_args)
|
|
29
32
|
|
|
30
|
-
|
|
33
|
+
log$info("Running findcelltype ...")
|
|
31
34
|
obj = findcelltype(object = obj)
|
|
32
35
|
|
|
33
|
-
|
|
36
|
+
log$info("Saving the mappings ...")
|
|
34
37
|
write.table(
|
|
35
38
|
obj@celltype,
|
|
36
39
|
file = file.path(dirname(outfile), "cluster2celltype.tsv"),
|
|
@@ -42,7 +45,7 @@ celltypes = as.list(obj@celltype$cell_type)
|
|
|
42
45
|
names(celltypes) = obj@celltype$cluster
|
|
43
46
|
|
|
44
47
|
if (length(celltypes) == 0) {
|
|
45
|
-
|
|
48
|
+
log$warn("- No cell types annotated from the database!")
|
|
46
49
|
} else {
|
|
47
50
|
if (is.null(newcol)) {
|
|
48
51
|
sobj$seurat_clusters_id = Idents(sobj)
|
|
@@ -57,10 +60,10 @@ if (length(celltypes) == 0) {
|
|
|
57
60
|
}
|
|
58
61
|
|
|
59
62
|
if (merge_same_labels) {
|
|
60
|
-
|
|
63
|
+
log$info("Merging clusters with the same labels ...")
|
|
61
64
|
sobj = merge_clusters_with_same_labels(sobj, newcol)
|
|
62
65
|
}
|
|
63
66
|
}
|
|
64
67
|
|
|
65
|
-
|
|
66
|
-
|
|
68
|
+
log$info("Saving Seurat object ...")
|
|
69
|
+
save_obj(sobj, outfile)
|
|
@@ -2,8 +2,9 @@ library(dplyr)
|
|
|
2
2
|
library(HGNChelper)
|
|
3
3
|
library(Seurat)
|
|
4
4
|
library(rlang)
|
|
5
|
+
library(biopipen.utils)
|
|
5
6
|
|
|
6
|
-
{
|
|
7
|
+
{% include biopipen_dir + "/scripts/scrna/sctype.R" %}
|
|
7
8
|
|
|
8
9
|
sobjfile = {{in.sobjfile | r}}
|
|
9
10
|
outfile = {{out.outfile | r}}
|
|
@@ -14,24 +15,26 @@ merge_same_labels = {{envs.merge | r}}
|
|
|
14
15
|
|
|
15
16
|
if (is.null(db)) { stop("`envs.sctype_args.db` is not set") }
|
|
16
17
|
|
|
17
|
-
|
|
18
|
-
|
|
18
|
+
log <- get_logger()
|
|
19
|
+
|
|
20
|
+
log$info("Reading Seurat object...")
|
|
21
|
+
sobj = biopipen.utils::read_obj(sobjfile)
|
|
19
22
|
|
|
20
23
|
# prepare gene sets
|
|
21
|
-
|
|
24
|
+
log$info("Preparing gene sets...")
|
|
22
25
|
gs_list = gene_sets_prepare(db, tissue)
|
|
23
26
|
|
|
24
27
|
scRNAseqData = GetAssayData(sobj, layer = "scale.data")
|
|
25
28
|
idents = as.character(unique(Idents(sobj)))
|
|
26
29
|
idents = idents[order(as.numeric(idents))]
|
|
27
30
|
|
|
28
|
-
|
|
31
|
+
log$info("Working on different levels of cell type labels ...")
|
|
29
32
|
cell_types_list = list()
|
|
30
33
|
for (i in seq_along(gs_list)) {
|
|
31
|
-
|
|
34
|
+
log$info("- Working on level {i} ...")
|
|
32
35
|
if (is.null(gs_list[[i]])) next
|
|
33
36
|
|
|
34
|
-
|
|
37
|
+
log$info(" Calculating cell-type scores ...")
|
|
35
38
|
es.max = sctype_score(
|
|
36
39
|
scRNAseqData = scRNAseqData,
|
|
37
40
|
scaled = TRUE,
|
|
@@ -39,7 +42,7 @@ for (i in seq_along(gs_list)) {
|
|
|
39
42
|
gs2 = gs_list[[i]]$gs_negative
|
|
40
43
|
)
|
|
41
44
|
|
|
42
|
-
|
|
45
|
+
log$info(" Merging cell-type scores by cluster ...")
|
|
43
46
|
cl_resutls = do_call(
|
|
44
47
|
"rbind",
|
|
45
48
|
lapply(
|
|
@@ -62,12 +65,12 @@ for (i in seq_along(gs_list)) {
|
|
|
62
65
|
write("\n####### sctype_scores_count ########", stderr())
|
|
63
66
|
write(capture.output(sctype_scores_count), stderr())
|
|
64
67
|
write("\n####################################", stderr())
|
|
65
|
-
|
|
68
|
+
log$info(" Scores tied in the above clusters.", immediate. = TRUE)
|
|
66
69
|
}
|
|
67
70
|
|
|
68
71
|
if (length(gs_list) == 1 || i > 1) {
|
|
69
72
|
# set low-confident (low ScType score) clusters to "unknown"
|
|
70
|
-
|
|
73
|
+
log$info(" Setting low-confident clusters to 'Unknown'...")
|
|
71
74
|
sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
|
|
72
75
|
}
|
|
73
76
|
|
|
@@ -85,7 +88,7 @@ for (i in seq_along(gs_list)) {
|
|
|
85
88
|
if (length(cell_types_list) == 1) {
|
|
86
89
|
celltypes = cell_types_list[[1]]
|
|
87
90
|
} else {
|
|
88
|
-
|
|
91
|
+
log$info("Merging cell types at all levels ...")
|
|
89
92
|
celltypes = list()
|
|
90
93
|
|
|
91
94
|
for (i in idents) {
|
|
@@ -100,7 +103,7 @@ if (length(cell_types_list) == 1) {
|
|
|
100
103
|
}
|
|
101
104
|
|
|
102
105
|
|
|
103
|
-
|
|
106
|
+
log$info("Renaming cell types...")
|
|
104
107
|
ct_numbering = list()
|
|
105
108
|
for (key in names(celltypes)) {
|
|
106
109
|
ct = celltypes[[key]]
|
|
@@ -127,14 +130,14 @@ celltypes$object = NULL
|
|
|
127
130
|
gc()
|
|
128
131
|
|
|
129
132
|
if (merge_same_labels) {
|
|
130
|
-
|
|
133
|
+
log$info("Merging clusters with the same labels...")
|
|
131
134
|
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
132
135
|
celltypes <- lapply(celltypes, function(ct) {
|
|
133
136
|
sub("\\.\\d+$", "", ct)
|
|
134
137
|
})
|
|
135
138
|
}
|
|
136
139
|
|
|
137
|
-
|
|
140
|
+
log$info("Saving the mappings ...")
|
|
138
141
|
write.table(
|
|
139
142
|
data.frame(
|
|
140
143
|
Cluster = names(celltypes),
|
|
@@ -147,5 +150,5 @@ write.table(
|
|
|
147
150
|
row.names = FALSE
|
|
148
151
|
)
|
|
149
152
|
|
|
150
|
-
|
|
151
|
-
|
|
153
|
+
log$info("Saving Seurat object...")
|
|
154
|
+
biopipen.utils::save_obj(sobj, outfile)
|
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
set.seed(8525)
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
merge_clusters_with_same_labels <- function(sobj, newcol) {
|
|
4
|
+
if (is.null(newcol)) {
|
|
5
|
+
sobj@meta.data$seurat_clusters <- sub("\\.\\d+$", "", sobj@meta.data$seurat_clusters)
|
|
6
|
+
Idents(sobj) <- "seurat_clusters"
|
|
7
|
+
} else {
|
|
8
|
+
sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
sobj
|
|
12
|
+
}
|
|
5
13
|
|
|
6
14
|
{% if envs.tool == "hitype" %}
|
|
7
15
|
{% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-hitype.R" %}
|
|
@@ -37,7 +37,7 @@ cases <- {{envs.cases | r}} # nolint
|
|
|
37
37
|
overlap <- overlap %||% c()
|
|
38
38
|
overlaps <- list()
|
|
39
39
|
log_info("- Loading seurat object ...")
|
|
40
|
-
srtobj <-
|
|
40
|
+
srtobj <- biopipen.utils::read_obj(srtfile)
|
|
41
41
|
|
|
42
42
|
if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
43
43
|
log_info("- Mutating seurat object ...")
|