biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +307 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +14 -2
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,19 +1,12 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
|
-
library(parallel)
|
|
4
1
|
library(Seurat)
|
|
5
|
-
library(SeuratDisk)
|
|
6
2
|
library(rlang)
|
|
7
|
-
library(
|
|
8
|
-
library(tidyr)
|
|
9
|
-
library(ggplot2)
|
|
10
|
-
library(ggprism)
|
|
3
|
+
library(biopipen.utils)
|
|
11
4
|
|
|
12
5
|
set.seed(8525)
|
|
13
|
-
theme_set(theme_prism())
|
|
14
6
|
|
|
15
7
|
sobjfile = {{in.sobjfile | r}}
|
|
16
8
|
outfile = {{out.outfile | r}}
|
|
9
|
+
joboutdir = {{job.outdir | r}}
|
|
17
10
|
use = {{envs.use | r}}
|
|
18
11
|
ident = {{envs.ident | r}}
|
|
19
12
|
ref = {{envs.ref | r}}
|
|
@@ -25,8 +18,16 @@ skip_if_normalized = {{envs.skip_if_normalized | r}}
|
|
|
25
18
|
sctransform_args = {{envs.SCTransform | r: todot="-"}}
|
|
26
19
|
normalizedata_args = {{envs.NormalizeData | r: todot="-"}}
|
|
27
20
|
findtransferanchors_args = {{envs.FindTransferAnchors | r: todot="-"}}
|
|
28
|
-
mappingscore_args = {{envs.MappingScore | r: todot="-"}}
|
|
29
21
|
mapquery_args = {{envs.MapQuery | r: todot="-"}}
|
|
22
|
+
cache = {{envs.cache | r}}
|
|
23
|
+
plots = {{envs.plots | r}}
|
|
24
|
+
|
|
25
|
+
log <- get_logger()
|
|
26
|
+
reporter <- get_reporter()
|
|
27
|
+
|
|
28
|
+
options(future.globals.maxSize = 8 * 1024 ^ 4)
|
|
29
|
+
options(future.rng.onMisuse="ignore")
|
|
30
|
+
options(Seurat.object.assay.version = "v5")
|
|
30
31
|
|
|
31
32
|
# See if we have a reference
|
|
32
33
|
if (is.null(ref)) {
|
|
@@ -37,376 +38,74 @@ if (is.null(use)) {
|
|
|
37
38
|
stop("No use provided (envs.use), don't know which column to transfer as cluster")
|
|
38
39
|
}
|
|
39
40
|
|
|
40
|
-
if (is.null(mapquery_args$refdata) || length(mapquery_args$refdata) == 0) {
|
|
41
|
-
mapquery_args$refdata = list()
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
mapquery_args$refdata[[use]] = use
|
|
45
|
-
|
|
46
41
|
outdir = dirname(outfile)
|
|
42
|
+
if (isTRUE(cache)) {
|
|
43
|
+
cache = joboutdir
|
|
44
|
+
}
|
|
47
45
|
if (is.null(split_by)) {
|
|
48
46
|
options(future.globals.maxSize = 8 * 1024 ^ 4)
|
|
49
47
|
future::plan(strategy = "multicore", workers = ncores)
|
|
50
48
|
}
|
|
51
49
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
# Expand dims from 30 to 1:30
|
|
58
|
-
if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
|
|
59
|
-
args[[name]] = 1:args[[name]]
|
|
60
|
-
}
|
|
61
|
-
args
|
|
62
|
-
}
|
|
63
|
-
findtransferanchors_args = .expand_dims(findtransferanchors_args)
|
|
64
|
-
|
|
65
|
-
# Load reference
|
|
66
|
-
log_info("- Loading reference")
|
|
67
|
-
if (endsWith(ref, ".rds") || endsWith(ref, ".RDS")) {
|
|
68
|
-
reference = readRDS(ref)
|
|
69
|
-
} else if (endsWith(ref, ".h5ad") || endsWith(ref, ".H5AD")) {
|
|
70
|
-
reference = ReadH5AD(ref)
|
|
50
|
+
log$info("Loading reference ...")
|
|
51
|
+
if (endsWith(ref, ".rds") || endsWith(ref, ".RDS") || endsWith(ref, ".qs") || endsWith(ref, ".qs2")) {
|
|
52
|
+
reference <- read_obj(ref)
|
|
53
|
+
} else if (endsWith(ref, ".h5seurat") || endsWith(ref, ".H5Seurat")) {
|
|
54
|
+
reference <- SeuratDisk::LoadH5Seurat(ref)
|
|
71
55
|
} else {
|
|
72
|
-
|
|
73
|
-
}
|
|
74
|
-
reference =
|
|
75
|
-
reference
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
if (startsWith(use_name, "predicted.")) {
|
|
90
|
-
stop(paste0(
|
|
91
|
-
"Do you mean: ", substring(use_name, 11),
|
|
92
|
-
))
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
if (refnorm == "auto") {
|
|
98
|
-
refnorm = ifelse (.is_sct(reference), "SCTransform", "NormalizeData")
|
|
99
|
-
}
|
|
100
|
-
if (refnorm == "SCTransform") {
|
|
101
|
-
# Check if the reference is SCTransform'ed
|
|
102
|
-
if (!.is_sct(reference)) {
|
|
103
|
-
stop("Reference is not SCTransform'ed")
|
|
104
|
-
}
|
|
105
|
-
n_models = length(x = slot(object = reference[[DefaultAssay(reference)]], name = "SCTModel.list"))
|
|
106
|
-
if (n_models == 0) {
|
|
107
|
-
stop("Reference doesn't contain SCTModel.")
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
log_info(" Normalization method used: {refnorm}")
|
|
112
|
-
if (refnorm == "SCTransform") {
|
|
113
|
-
findtransferanchors_args$normalization.method = "SCT"
|
|
114
|
-
} else if (refnorm == "NormalizeData") {
|
|
115
|
-
findtransferanchors_args$normalization.method = "LogNormalize"
|
|
116
|
-
} else {
|
|
117
|
-
stop(paste0("Unknown normalization method: ", refnorm))
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
# Load Seurat object
|
|
121
|
-
log_info("- Loading Seurat object")
|
|
122
|
-
sobj = readRDS(sobjfile)
|
|
123
|
-
defassay <- DefaultAssay(sobj)
|
|
124
|
-
|
|
125
|
-
if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
126
|
-
log_info("- Applying mutaters")
|
|
127
|
-
sobj@meta.data <- sobj@meta.data %>% mutate(!!!lapply(mutaters, parse_expr))
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
if (!is.null(split_by)) {
|
|
131
|
-
# check if each split has more than 100 cells
|
|
132
|
-
cellno = table(sobj@meta.data[[split_by]])
|
|
133
|
-
cellno = cellno[cellno < 100]
|
|
134
|
-
if (length(cellno) > 0) {
|
|
135
|
-
# stop and print the splits with # cells
|
|
136
|
-
stop(paste0(
|
|
137
|
-
"The following splits have less than 100 cells: \n",
|
|
138
|
-
paste0("- ", names(cellno), ": ", cellno, collapse = "\n"),
|
|
139
|
-
"\n\n",
|
|
140
|
-
"You can use `envs.mutaters` to merge these splits and use `newsplit` as `envs.split_by`: \n",
|
|
141
|
-
"> mutaters = {\n",
|
|
142
|
-
"> newsplit = \"if_else(oldsplit %in% c('split1', 'split2'), 'mergedsplit', oldsplit)\"\n",
|
|
143
|
-
"> }\n"
|
|
144
|
-
))
|
|
145
|
-
}
|
|
146
|
-
sobj = SplitObject(sobj, split.by = split_by)
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
# Normalize data
|
|
150
|
-
log_info("- Normalizing data")
|
|
151
|
-
if (refnorm == "SCTransform") {
|
|
152
|
-
if (defassay == "SCT" && skip_if_normalized) {
|
|
153
|
-
log_warn(" Skipping normalization as the object is already SCTransform'ed")
|
|
154
|
-
} else {
|
|
155
|
-
log_info(" Using SCTransform normalization")
|
|
156
|
-
sctransform_args$residual.features = rownames(x = reference)
|
|
157
|
-
if (is.null(split_by)) {
|
|
158
|
-
sctransform_args$object = sobj
|
|
159
|
-
sobj = do_call(SCTransform, sctransform_args)
|
|
160
|
-
sctransform_args$object <- NULL
|
|
161
|
-
rm(sctransform_args)
|
|
162
|
-
gc()
|
|
163
|
-
} else {
|
|
164
|
-
sobj = mclapply(
|
|
165
|
-
X = sobj,
|
|
166
|
-
FUN = function(x) {
|
|
167
|
-
sctransform_args$object = x
|
|
168
|
-
do_call(SCTransform, sctransform_args)
|
|
169
|
-
},
|
|
170
|
-
mc.cores = ncores
|
|
171
|
-
)
|
|
172
|
-
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
173
|
-
stop(paste0("\nmclapply (SCTransform) error:", sobj))
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
} else {
|
|
178
|
-
if (defassay == "RNA" && skip_if_normalized) {
|
|
179
|
-
log_warn(" Skipping normalization as the object is already LogNormalize'd")
|
|
180
|
-
} else {
|
|
181
|
-
log_info(" Using NormalizeData normalization")
|
|
182
|
-
if (is.null(split_by)) {
|
|
183
|
-
normalizedata_args$object = sobj
|
|
184
|
-
sobj = do_call(NormalizeData, normalizedata_args)
|
|
185
|
-
} else {
|
|
186
|
-
sobj = mclapply(
|
|
187
|
-
X = sobj,
|
|
188
|
-
FUN = function(x) {
|
|
189
|
-
normalizedata_args$object = x
|
|
190
|
-
do_call(NormalizeData, normalizedata_args)
|
|
191
|
-
},
|
|
192
|
-
mc.cores = ncores
|
|
193
|
-
)
|
|
194
|
-
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
195
|
-
stop(paste0("\nmclapply (NormalizeData) error:", sobj))
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
normalizedata_args$object <- NULL
|
|
199
|
-
rm(normalizedata_args)
|
|
200
|
-
gc()
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
# Find anchors between query and reference
|
|
205
|
-
log_info("- Finding anchors")
|
|
206
|
-
findtransferanchors_args$reference = reference
|
|
207
|
-
if (is.null(split_by)) {
|
|
208
|
-
findtransferanchors_args$query = sobj
|
|
209
|
-
anchors = do_call(FindTransferAnchors, findtransferanchors_args)
|
|
210
|
-
findtransferanchors_args$reference = NULL
|
|
211
|
-
findtransferanchors_args$query = NULL
|
|
212
|
-
rm(findtransferanchors_args)
|
|
213
|
-
gc()
|
|
214
|
-
} else {
|
|
215
|
-
anchors = mclapply(
|
|
216
|
-
X = sobj,
|
|
217
|
-
FUN = function(x) {
|
|
218
|
-
findtransferanchors_args$query = x
|
|
219
|
-
do_call(FindTransferAnchors, findtransferanchors_args)
|
|
220
|
-
},
|
|
221
|
-
mc.cores = ncores
|
|
222
|
-
)
|
|
223
|
-
if (any(unlist(lapply(anchors, class)) == "try-error")) {
|
|
224
|
-
stop(paste0("\nmclapply (FindTransferAnchors) error:", anchors))
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
# Map query to reference
|
|
229
|
-
log_info("- Mapping query to reference")
|
|
230
|
-
mapquery_args$reference = reference
|
|
231
|
-
if (is.null(split_by)) {
|
|
232
|
-
mapquery_args$query = sobj
|
|
233
|
-
mapquery_args$anchorset = anchors
|
|
234
|
-
sobj = do_call(MapQuery, mapquery_args)
|
|
235
|
-
mapquery_args$reference = NULL
|
|
236
|
-
mapquery_args$query = NULL
|
|
237
|
-
mapquery_args$anchorset = NULL
|
|
238
|
-
gc()
|
|
239
|
-
} else {
|
|
240
|
-
sobj = mclapply(
|
|
241
|
-
X = seq_along(sobj),
|
|
242
|
-
FUN = function(i) {
|
|
243
|
-
mapquery_args$query = sobj[[i]]
|
|
244
|
-
mapquery_args$anchorset = anchors[[i]]
|
|
245
|
-
do_call(MapQuery, mapquery_args)
|
|
246
|
-
},
|
|
247
|
-
mc.cores = ncores
|
|
248
|
-
)
|
|
249
|
-
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
250
|
-
stop(paste0("\nmclapply (MapQuery) error:", sobj))
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
# Calculating mapping score
|
|
255
|
-
log_info("- Calculating mapping score")
|
|
256
|
-
mappingscore_sob_msg = paste0(
|
|
257
|
-
"While calculating mapping score, the following error was encountered: \n",
|
|
258
|
-
"subscript out of bounds. \n\n",
|
|
259
|
-
"You may want to try a smaller `ndim` (default: 50) in `envs.MappingScore`."
|
|
260
|
-
)
|
|
261
|
-
if (is.null(split_by)) {
|
|
262
|
-
mappingscore_args$anchors = anchors
|
|
263
|
-
mappingscore = tryCatch({
|
|
264
|
-
do_call(MappingScore, mappingscore_args)
|
|
265
|
-
}, error = function(e) {
|
|
266
|
-
if (e$message == "subscript out of bounds") stop(mappingscore_sob_msg)
|
|
267
|
-
stop(e)
|
|
268
|
-
})
|
|
269
|
-
mappingscore_args$anchors = NULL
|
|
270
|
-
rm(mappingscore_args)
|
|
271
|
-
gc()
|
|
272
|
-
} else {
|
|
273
|
-
mappingscore = mclapply(
|
|
274
|
-
X = seq_along(sobj),
|
|
275
|
-
FUN = function(i) {
|
|
276
|
-
mappingscore_args$anchors = anchors[[i]]
|
|
277
|
-
tryCatch({
|
|
278
|
-
do_call(MappingScore, mappingscore_args)
|
|
279
|
-
}, error = function(e) {
|
|
280
|
-
if (e$message == "subscript out of bounds") stop(mappingscore_sob_msg)
|
|
281
|
-
stop(e)
|
|
282
|
-
})
|
|
283
|
-
},
|
|
284
|
-
mc.cores = ncores
|
|
285
|
-
)
|
|
286
|
-
if (any(unlist(lapply(mappingscore, class)) == "try-error")) {
|
|
287
|
-
stop(paste0("\nmclapply (MappingScore) error:", mappingscore))
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
# Calculate mapping score and add to metadata
|
|
292
|
-
log_info("- Adding mapping score to metadata")
|
|
293
|
-
if (is.null(split_by)) {
|
|
294
|
-
sobj = AddMetaData(
|
|
295
|
-
object = sobj,
|
|
296
|
-
metadata = mappingscore,
|
|
297
|
-
col.name = "mapping.score"
|
|
298
|
-
)
|
|
299
|
-
} else {
|
|
300
|
-
sobj = mclapply(
|
|
301
|
-
X = seq_along(sobj),
|
|
302
|
-
FUN = function(i) {
|
|
303
|
-
AddMetaData(
|
|
304
|
-
object = sobj[[i]],
|
|
305
|
-
metadata = mappingscore[[i]],
|
|
306
|
-
col.name = "mapping.score"
|
|
307
|
-
)
|
|
308
|
-
},
|
|
309
|
-
mc.cores = ncores
|
|
310
|
-
)
|
|
311
|
-
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
312
|
-
stop(paste0("\nmclapply (AddMetaData) error:", sobj))
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
# Combine the results
|
|
316
|
-
log_info("- Merging the results")
|
|
317
|
-
gc()
|
|
318
|
-
# Memory efficient way to merge the results
|
|
319
|
-
# query = Reduce(function(x, y) merge(x, y, merge.dr = "ref.umap"), query)
|
|
320
|
-
sobj = merge(sobj[[1]], sobj[2:length(sobj)], merge.dr = "ref.umap")
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
# Add the alias to the metadata for the clusters
|
|
324
|
-
log_info("- Adding ident to metadata and set as ident")
|
|
325
|
-
sobj@meta.data = sobj@meta.data %>% mutate(
|
|
326
|
-
!!sym(ident) := as.factor(!!parse_expr(paste0("predicted.", use)))
|
|
56
|
+
stop("Reference file must be .qs, .qs2, .rds, .RDS, .h5seurat or .H5Seurat")
|
|
57
|
+
}
|
|
58
|
+
reference <- tryCatch(JoinLayers(reference), error = function(e) {reference})
|
|
59
|
+
Idents(reference) <- reference@meta.data[[use]]
|
|
60
|
+
|
|
61
|
+
log$info("Loading query data ...")
|
|
62
|
+
sobj <- read_obj(sobjfile)
|
|
63
|
+
|
|
64
|
+
sobj <- RunSeuratMap2Ref(
|
|
65
|
+
object = sobj, ref = reference, use = use,
|
|
66
|
+
ident = ident, refnorm = refnorm, skip_if_normalized = skip_if_normalized,
|
|
67
|
+
split_by = split_by, ncores = ncores,
|
|
68
|
+
SCTransformArgs = sctransform_args,
|
|
69
|
+
NormalizeDataArgs = normalizedata_args,
|
|
70
|
+
FindTransferAnchorsArgs = findtransferanchors_args,
|
|
71
|
+
MapQueryArgs = mapquery_args,
|
|
72
|
+
log = log, cache = cache
|
|
327
73
|
)
|
|
328
|
-
Idents(sobj) = ident
|
|
329
|
-
|
|
330
|
-
# Check if PrepSCTFindMarkers is done
|
|
331
|
-
if (.is_sct(sobj) && is.null(sobj@commands$PrepSCTFindMarkers)) {
|
|
332
|
-
log_info("- Running PrepSCTFindMarkers ...")
|
|
333
|
-
sobj <- PrepSCTFindMarkers(sobj)
|
|
334
|
-
# compose a new SeuratCommand to record it to sobj@commands
|
|
335
|
-
commands <- names(pbmc_small@commands)
|
|
336
|
-
scommand <- pbmc_small@commands[[commands[length(commands)]]]
|
|
337
|
-
scommand@time.stamp <- Sys.time()
|
|
338
|
-
scommand@assay.used <- DefaultAssay(sobj)
|
|
339
|
-
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
340
|
-
scommand@params <- list()
|
|
341
|
-
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
342
|
-
}
|
|
343
74
|
|
|
344
75
|
# Save
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
# ############################
|
|
350
|
-
# Some plots
|
|
351
|
-
# ############################
|
|
352
|
-
log_info("- Plotting mapping score ...")
|
|
353
|
-
p <- FeaturePlot(
|
|
354
|
-
object = sobj,
|
|
355
|
-
reduction = "ref.umap",
|
|
356
|
-
features = "mapping.score",
|
|
357
|
-
cols = c("white", "blue"),
|
|
358
|
-
pt.size = 0.5
|
|
359
|
-
) + ggtitle("Mapping score for query cells")
|
|
360
|
-
save_plot(p, file.path(outdir, "mapping_score"), list(width = 800, height = 600, res = 100))
|
|
76
|
+
gc()
|
|
77
|
+
log$info("Saving result ...")
|
|
78
|
+
save_obj(sobj, file = outfile)
|
|
361
79
|
|
|
362
|
-
log_info("- Plotting for transferred data ...")
|
|
363
|
-
ref.reduction = mapquery_args$reduction.model %||% "wnn.umap"
|
|
364
|
-
for (qname in names(mapquery_args$refdata)) {
|
|
365
|
-
rname <- mapquery_args$refdata[[qname]]
|
|
366
80
|
|
|
367
|
-
|
|
368
|
-
|
|
81
|
+
### Plotting
|
|
82
|
+
log$info("Plotting features ...")
|
|
83
|
+
for (name in names(plots)) {
|
|
84
|
+
if (is.null(plots[[name]])) {
|
|
369
85
|
next
|
|
370
86
|
}
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
prefix <- file.path(outdir, paste0("UMAPs-", slugify(qname)))
|
|
393
|
-
save_plot(p, prefix, list(width = 1500, height = 700, res = 100))
|
|
394
|
-
|
|
395
|
-
# summarize the stats
|
|
396
|
-
log_info(" Summarizing stats: {qname} -> {rname}")
|
|
397
|
-
ref_stats <- as.data.frame(table(reference@meta.data[[rname]]))
|
|
398
|
-
colnames(ref_stats) <- c("CellType", "Count_Ref")
|
|
399
|
-
query_stats <- as.data.frame(table(sobj@meta.data[[paste0("predicted.", qname)]]))
|
|
400
|
-
colnames(query_stats) <- c("CellType", "Count_Query")
|
|
401
|
-
stats <- left_join(ref_stats, query_stats, by = "CellType") %>%
|
|
402
|
-
replace_na(list(Count_Query = 0)) %>%
|
|
403
|
-
arrange(desc(Count_Query), desc(Count_Ref))
|
|
404
|
-
|
|
405
|
-
write.table(
|
|
406
|
-
stats,
|
|
407
|
-
file = file.path(outdir, paste0("stats-", slugify(qname), ".txt")),
|
|
408
|
-
row.names = FALSE,
|
|
409
|
-
quote = FALSE,
|
|
410
|
-
sep = "\t"
|
|
87
|
+
log$info("- {name} ...")
|
|
88
|
+
plots[[name]]$features <- gsub("{use}", use, plots[[name]]$features, fixed = TRUE)
|
|
89
|
+
plots[[name]]$features <- gsub("{ident}", ident, plots[[name]]$features, fixed = TRUE)
|
|
90
|
+
|
|
91
|
+
plots[[name]]$devpars <- plots[[name]]$devpars %||% list()
|
|
92
|
+
plots[[name]]$devpars$res <- plots[[name]]$devpars$res %||% 100
|
|
93
|
+
plots[[name]]$devpars$width <- plots[[name]]$devpars$width %||% 1200
|
|
94
|
+
plots[[name]]$devpars$height <- plots[[name]]$devpars$height %||% 720
|
|
95
|
+
plots[[name]]$more_formats <- plots[[name]]$more_formats %||% character()
|
|
96
|
+
plots[[name]]$save_code <- FALSE
|
|
97
|
+
plots[[name]]$descr <- plots[[name]]$descr %||% name
|
|
98
|
+
extract_vars(plots[[name]], "devpars", "more_formats", "save_code", "descr")
|
|
99
|
+
|
|
100
|
+
plot_fn <- gglogger::register(VizSeuratMap2Ref)
|
|
101
|
+
p <- do_call(plot_fn, c(list(query = sobj, ref = reference), plots[[name]]))
|
|
102
|
+
prefix <- file.path(outdir, paste0(slugify(name), ".map2ref"))
|
|
103
|
+
save_plot(p, prefix, devpars, formats = c("png", more_formats))
|
|
104
|
+
|
|
105
|
+
reporter$add(
|
|
106
|
+
reporter$image(prefix, more_formats, save_code = FALSE, kind = "image"),
|
|
107
|
+
h1 = name
|
|
411
108
|
)
|
|
412
109
|
}
|
|
110
|
+
|
|
111
|
+
reporter$save(joboutdir)
|
|
@@ -1,17 +1,15 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
|
|
3
|
-
|
|
4
1
|
library(rlang)
|
|
5
2
|
library(tibble)
|
|
6
3
|
library(dplyr)
|
|
7
4
|
library(Seurat)
|
|
5
|
+
library(biopipen.utils)
|
|
8
6
|
|
|
9
|
-
srtobj = {{in.srtobj |
|
|
7
|
+
srtobj = {{in.srtobj | r}}
|
|
10
8
|
metafile = {{in.metafile | r}}
|
|
11
9
|
mutaters = {{envs.mutaters | r}}
|
|
12
|
-
|
|
10
|
+
outfile = {{out.outfile | r}}
|
|
13
11
|
|
|
14
|
-
srt =
|
|
12
|
+
srt = read_obj(srtobj)
|
|
15
13
|
metadata = srt@meta.data
|
|
16
14
|
|
|
17
15
|
if (!is.null(metafile)) {
|
|
@@ -40,4 +38,4 @@ if (!is.null(expr) && length(expr) > 0) {
|
|
|
40
38
|
srt@meta.data = metadata
|
|
41
39
|
}
|
|
42
40
|
|
|
43
|
-
|
|
41
|
+
save_obj(srt, outfile)
|
|
@@ -5,9 +5,9 @@ library(dplyr)
|
|
|
5
5
|
library(glue)
|
|
6
6
|
library(biopipen.utils)
|
|
7
7
|
|
|
8
|
-
metafile <- {{in.metafile |
|
|
9
|
-
|
|
10
|
-
joboutdir <- {{job.outdir |
|
|
8
|
+
metafile <- {{in.metafile | r}}
|
|
9
|
+
outfile <- {{out.outfile | r}}
|
|
10
|
+
joboutdir <- {{job.outdir | r}}
|
|
11
11
|
envs <- {{envs | r: todot = "-", skip = 1}}
|
|
12
12
|
|
|
13
13
|
if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
|
|
@@ -30,7 +30,9 @@ reporter$add(
|
|
|
30
30
|
"<p>Cell filters: ", html_escape(envs$cell_qc), "</p>",
|
|
31
31
|
"<p>Gene filters: </p>",
|
|
32
32
|
"<p>- Min Cells: ", envs$gene_qc$min_cells, "</p>",
|
|
33
|
-
"<p>- Excludes: ",
|
|
33
|
+
"<p>- Excludes: ",
|
|
34
|
+
ifelse(is.null(envs$gene_qc$excludes), "Not set", paste(envs$gene_qc$excludes, collapse = ", ")),
|
|
35
|
+
"</p>"
|
|
34
36
|
)
|
|
35
37
|
),
|
|
36
38
|
h1 = "Filters and QC"
|
|
@@ -57,43 +59,77 @@ dir.create(qcdir, showWarnings = FALSE, recursive = TRUE)
|
|
|
57
59
|
|
|
58
60
|
sobj <- LoadSeuratAndPerformQC(
|
|
59
61
|
metadata,
|
|
60
|
-
|
|
62
|
+
min_cells = envs$min_cells,
|
|
63
|
+
min_features = envs$min_features,
|
|
61
64
|
cell_qc = envs$cell_qc,
|
|
62
65
|
gene_qc = envs$gene_qc,
|
|
63
66
|
tmpdir = joboutdir,
|
|
64
67
|
log = log,
|
|
65
68
|
cache = envs$cache)
|
|
66
69
|
|
|
67
|
-
log$info("Saving
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
nCells = c(nrow(sobj@misc$cell_qc_df), sum(sobj@misc$cell_qc_df$.QC)),
|
|
71
|
-
nGenes = c(sobj@misc$gene_qc$before, sobj@misc$gene_qc$after)
|
|
72
|
-
)
|
|
73
|
-
write.table(dim_df, file = file.path(qcdir, "dim.txt"),
|
|
70
|
+
log$info("Saving and visualizing QC results ...")
|
|
71
|
+
cell_qc_df <- VizSeuratCellQC(sobj, plot_type = "table")
|
|
72
|
+
write.table(cell_qc_df, file = file.path(qcdir, "cell_qc.txt"),
|
|
74
73
|
row.names = FALSE, quote = FALSE, sep = "\t")
|
|
75
74
|
|
|
76
75
|
reporter$add(
|
|
77
76
|
list(
|
|
78
|
-
|
|
79
|
-
|
|
77
|
+
name = "Cell QC metrics",
|
|
78
|
+
contents = list(
|
|
79
|
+
list(
|
|
80
|
+
kind = "descr",
|
|
81
|
+
content = paste0(
|
|
82
|
+
"The table below show the number of cells in each sample that failed and passed the QC filters. ",
|
|
83
|
+
"The last row shows the total number of cells that failed and passed the QC filters across all samples. "
|
|
84
|
+
)
|
|
85
|
+
),
|
|
86
|
+
list(kind = "table", src = file.path(qcdir, "cell_qc.txt"))
|
|
87
|
+
)
|
|
80
88
|
),
|
|
89
|
+
h1 = "Filters and QC",
|
|
90
|
+
h2 = "Cell-level Quality Control",
|
|
91
|
+
ui = "tabs"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
gene_qc_df <- VizSeuratGeneQC(sobj, plot_type = "table")
|
|
95
|
+
write.table(gene_qc_df, file = file.path(qcdir, "gene_qc.txt"),
|
|
96
|
+
row.names = FALSE, quote = FALSE, sep = "\t")
|
|
97
|
+
|
|
98
|
+
reporter$add(
|
|
81
99
|
list(
|
|
82
|
-
|
|
83
|
-
|
|
100
|
+
name = "Gene QC metrics",
|
|
101
|
+
contents = list(
|
|
102
|
+
list(
|
|
103
|
+
kind = "descr",
|
|
104
|
+
content = paste0(
|
|
105
|
+
"The table below show the number of genes in each sample that failed and passed the QC filters. ",
|
|
106
|
+
"The last row shows the final number of genes that failed and passed the QC filters across all samples. ",
|
|
107
|
+
"Any gene that failed the QC filters will be excluded in the merged Seurat object."
|
|
108
|
+
)
|
|
109
|
+
),
|
|
110
|
+
list(kind = "table", src = file.path(qcdir, "gene_qc.txt")),
|
|
111
|
+
list(kind = "list", items = list(paste0(
|
|
112
|
+
"We may still end up with features slightly less than the final passed ones. ",
|
|
113
|
+
"For example, when SCTransform is used, the number of features may be less than the number of genes that passed the QC filters. ",
|
|
114
|
+
"This is because SCTransform selects the top N features based on variance. "
|
|
115
|
+
)))
|
|
116
|
+
)
|
|
84
117
|
),
|
|
85
118
|
h1 = "Filters and QC",
|
|
86
|
-
h2 = "
|
|
119
|
+
h2 = "Gene-level Quality Control",
|
|
120
|
+
ui = "tabs"
|
|
87
121
|
)
|
|
88
122
|
|
|
89
|
-
log$info("Visualizing QC metrics ...")
|
|
90
123
|
for (pname in names(envs$qc_plots)) {
|
|
124
|
+
if (is.null(envs$qc_plots[[pname]])) next
|
|
125
|
+
log$info("- {pname} ...")
|
|
91
126
|
args <- envs$qc_plots[[pname]]
|
|
92
127
|
args$kind <- args$kind %||% "cell"
|
|
93
128
|
args$devpars <- args$devpars %||% list()
|
|
94
129
|
args$more_formats <- args$more_formats %||% character()
|
|
95
130
|
args$save_code <- args$save_code %||% FALSE
|
|
96
|
-
|
|
131
|
+
args$descr <- args$descr %||% pname
|
|
132
|
+
extract_vars(args, "kind", "devpars", "more_formats", "save_code", "descr")
|
|
97
133
|
if (kind == "gene") kind <- "gene_qc"
|
|
98
134
|
if (kind == "cell") kind <- "cell_qc"
|
|
99
135
|
args$object <- sobj
|
|
@@ -103,21 +139,31 @@ for (pname in names(envs$qc_plots)) {
|
|
|
103
139
|
gglogger::register(VizSeuratGeneQC)
|
|
104
140
|
}
|
|
105
141
|
p <- do_call(plot_fn, args)
|
|
106
|
-
prefix <- file.path(qcdir, paste0(slugify(pname), "
|
|
142
|
+
prefix <- file.path(qcdir, paste0(slugify(pname), ".", kind))
|
|
107
143
|
save_plot(p, prefix, devpars, formats = c("png", more_formats))
|
|
108
144
|
if (save_code) {
|
|
109
145
|
save_plotcode(p, prefix,
|
|
110
|
-
setup = c("library(biopipen.utils)", "load('data.RData')", "invisible(list2env(
|
|
146
|
+
setup = c("library(biopipen.utils)", "load('data.RData')", "invisible(list2env(args, envir = .GlobalEnv))"),
|
|
111
147
|
"args",
|
|
112
148
|
auto_data_setup = FALSE)
|
|
113
149
|
}
|
|
114
150
|
reporter$add(
|
|
115
|
-
|
|
151
|
+
list(
|
|
152
|
+
name = pname,
|
|
153
|
+
contents = list(
|
|
154
|
+
list(kind = "descr", content = descr),
|
|
155
|
+
reporter$image(prefix, more_formats, save_code, kind = "image")
|
|
156
|
+
)
|
|
157
|
+
),
|
|
116
158
|
h1 = "Filters and QC",
|
|
117
|
-
h2 =
|
|
159
|
+
h2 = ifelse(kind == "cell_qc", "Cell-level Quality Control", "Gene-level Quality Control"),
|
|
160
|
+
ui = "tabs"
|
|
118
161
|
)
|
|
119
162
|
}
|
|
120
163
|
|
|
164
|
+
log$info("Filtering with QC criteria ...")
|
|
165
|
+
sobj <- FinishSeuratQC(sobj)
|
|
166
|
+
|
|
121
167
|
sobj <- RunSeuratTransformation(
|
|
122
168
|
sobj,
|
|
123
169
|
use_sct = envs$use_sct,
|
|
@@ -194,6 +240,12 @@ if (!identical(envs$doublet_detector, "none")) {
|
|
|
194
240
|
sobj <- subset(sobj, subset = !!sym(paste0(sobj@misc$doublets$tool, "_DropletType")) != "doublet")
|
|
195
241
|
}
|
|
196
242
|
|
|
243
|
+
if (!is.null(envs$mutaters) && length(envs$mutaters) > 0) {
|
|
244
|
+
log$info("Mutating metadata ...")
|
|
245
|
+
sobj@meta.data <- sobj@meta.data %>%
|
|
246
|
+
mutate(!!!lapply(envs$mutaters, rlang::parse_expr))
|
|
247
|
+
}
|
|
248
|
+
|
|
197
249
|
log$info("Saving QC'ed seurat object ...")
|
|
198
250
|
reporter$save(joboutdir)
|
|
199
|
-
|
|
251
|
+
save_obj(sobj, outfile)
|