biopipen 0.29.2__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +2 -0
- biopipen/core/filters.py +21 -0
- biopipen/ns/plot.py +55 -0
- biopipen/ns/scrna.py +110 -21
- biopipen/ns/web.py +87 -5
- biopipen/scripts/bam/CNAClinic.R +2 -1
- biopipen/scripts/cellranger/CellRangerCount.py +3 -3
- biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
- biopipen/scripts/cnv/AneuploidyScore.R +1 -1
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +3 -2
- biopipen/scripts/gene/GeneNameConversion.R +2 -2
- biopipen/scripts/gsea/Enrichr.R +3 -3
- biopipen/scripts/gsea/FGSEA.R +2 -2
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +2 -1
- biopipen/scripts/plot/QQPlot.R +1 -1
- biopipen/scripts/plot/ROC.R +1 -1
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +3 -3
- biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
- biopipen/scripts/rnaseq/Simulation.R +1 -1
- biopipen/scripts/rnaseq/UnitConversion.R +2 -1
- biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
- biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
- biopipen/scripts/scrna/CellsDistribution.R +4 -3
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
- biopipen/scripts/scrna/MarkersFinder.R +5 -5
- biopipen/scripts/scrna/MetaMarkers.R +4 -4
- biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +4 -3
- biopipen/scripts/scrna/ScSimulation.R +64 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -5
- biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +20 -25
- biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
- biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
- biopipen/scripts/scrna/SeuratClustering.R +10 -170
- biopipen/scripts/scrna/SeuratMap2Ref.R +98 -54
- biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
- biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
- biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
- biopipen/scripts/scrna/SeuratPreparing.R +22 -562
- biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
- biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +2 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +2 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
- biopipen/scripts/snp/MatrixEQTL.R +1 -1
- biopipen/scripts/snp/PlinkCallRate.R +2 -2
- biopipen/scripts/snp/PlinkFreq.R +2 -2
- biopipen/scripts/snp/PlinkHWE.R +2 -2
- biopipen/scripts/snp/PlinkHet.R +2 -2
- biopipen/scripts/snp/PlinkIBD.R +2 -2
- biopipen/scripts/stats/ChowTest.R +1 -1
- biopipen/scripts/stats/DiffCoexpr.R +1 -1
- biopipen/scripts/stats/LiquidAssoc.R +1 -1
- biopipen/scripts/stats/Mediation.R +11 -9
- biopipen/scripts/stats/MetaPvalue.R +4 -1
- biopipen/scripts/stats/MetaPvalue1.R +4 -1
- biopipen/scripts/tcr/Attach2Seurat.R +1 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
- biopipen/scripts/tcr/CloneResidency.R +2 -2
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch-basic.R +0 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
- biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
- biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
- biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
- biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
- biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
- biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
- biopipen/scripts/tcr/Immunarch.R +43 -11
- biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
- biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +2 -2
- biopipen/scripts/tcr/TESSA.R +2 -2
- biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/METADATA +7 -7
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/RECORD +106 -96
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/WHEEL +0 -0
- {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "caching.R" | source_r }}
|
|
3
3
|
|
|
4
4
|
library(Seurat)
|
|
5
5
|
library(future)
|
|
@@ -7,7 +7,6 @@ library(rlang)
|
|
|
7
7
|
library(tidyr)
|
|
8
8
|
library(dplyr)
|
|
9
9
|
library(digest)
|
|
10
|
-
library(clustree)
|
|
11
10
|
|
|
12
11
|
set.seed(8525)
|
|
13
12
|
|
|
@@ -24,16 +23,10 @@ options(str = strOptions(vec.len = 5, digits.d = 5))
|
|
|
24
23
|
options(future.globals.maxSize = 80000 * 1024^2)
|
|
25
24
|
plan(strategy = "multicore", workers = envs$ncores)
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
# Expand dims from 30 to 1:30
|
|
29
|
-
if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
|
|
30
|
-
args[[name]] <- 1:args[[name]]
|
|
31
|
-
}
|
|
32
|
-
args
|
|
33
|
-
}
|
|
26
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClustering-common.R" | source_r }}
|
|
34
27
|
|
|
35
|
-
envs$RunUMAP <-
|
|
36
|
-
envs$FindNeighbors <-
|
|
28
|
+
envs$RunUMAP <- expand_dims(envs$RunUMAP)
|
|
29
|
+
envs$FindNeighbors <- expand_dims(envs$FindNeighbors)
|
|
37
30
|
|
|
38
31
|
log_info("Reading Seurat object ...")
|
|
39
32
|
sobj <- readRDS(srtfile)
|
|
@@ -53,164 +46,11 @@ if (is.character(envs$cache)) {
|
|
|
53
46
|
writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
|
|
54
47
|
}
|
|
55
48
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
if (is.null(cached$data)) {
|
|
62
|
-
log_info("Running ScaleData ...")
|
|
63
|
-
envs$ScaleData$object <- sobj
|
|
64
|
-
sobj <- do_call(ScaleData, envs$ScaleData)
|
|
65
|
-
cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
|
|
66
|
-
save_to_cache(cached, "ScaleData", cache_dir)
|
|
67
|
-
} else {
|
|
68
|
-
log_info("Loading cached ScaleData ...")
|
|
69
|
-
sobj@assays$RNA <- cached$data$assay
|
|
70
|
-
sobj@commands <- cached$data$commands
|
|
71
|
-
DefaultAssay(sobj) <- "RNA"
|
|
72
|
-
}
|
|
73
|
-
} else if (length(envs$SCTransform) > 0) {
|
|
74
|
-
if (DefaultAssay(sobj) != "SCT") {
|
|
75
|
-
stop("SCT assay not detected, but SCTransform is specified. Use ScaleData instead.")
|
|
76
|
-
}
|
|
77
|
-
cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
|
|
78
|
-
asssay <- envs$SCTransform$new.assay.name %||% "SCT"
|
|
79
|
-
if (is.null(cached$data)) {
|
|
80
|
-
log_info("Running SCTransform ...")
|
|
81
|
-
envs$SCTransform$object <- sobj
|
|
82
|
-
sobj <- do_call(SCTransform, envs$SCTransform)
|
|
83
|
-
cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
|
|
84
|
-
save_to_cache(cached, "SCTransform", cache_dir)
|
|
85
|
-
} else {
|
|
86
|
-
log_info("Loading cached SCTransform ...")
|
|
87
|
-
sobj@assays[[assay]] <- cached$data$assay
|
|
88
|
-
sobj@commands <- cached$data$commands
|
|
89
|
-
DefaultAssay(sobj) <- assay
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
cached <- get_cached(envs$RunUMAP, "RunUMAP", cache_dir)
|
|
94
|
-
reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
|
|
95
|
-
if (is.null(cached$data)) {
|
|
96
|
-
log_info("Running RunUMAP ...")
|
|
97
|
-
umap_args <- list_setdefault(
|
|
98
|
-
envs$RunUMAP,
|
|
99
|
-
object = sobj,
|
|
100
|
-
dims = 1:30,
|
|
101
|
-
reduction = sobj@misc$integrated_new_reduction %||% "pca"
|
|
102
|
-
)
|
|
103
|
-
ncells <- ncol(sobj)
|
|
104
|
-
umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
|
|
105
|
-
umap_method <- envs$RunUMAP$umap.method %||% "uwot"
|
|
106
|
-
if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
|
|
107
|
-
# https://github.com/satijalab/seurat/issues/4312
|
|
108
|
-
umap_args$n.neighbors <- min(ncells - 1, 30)
|
|
109
|
-
}
|
|
110
|
-
sobj <- do_call(RunUMAP, umap_args)
|
|
111
|
-
cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
|
|
112
|
-
save_to_cache(cached, "RunUMAP", cache_dir)
|
|
113
|
-
} else {
|
|
114
|
-
log_info("Loading cached RunUMAP ...")
|
|
115
|
-
sobj@reductions[[reduc_name]] <- cached$data$reduc
|
|
116
|
-
sobj@commands <- cached$data$commands
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
cached <- get_cached(envs$FindNeighbors, "FindNeighbors", cache_dir)
|
|
120
|
-
if (is.null(cached$data)) {
|
|
121
|
-
log_info("Running FindNeighbors ...")
|
|
122
|
-
envs$FindNeighbors$object <- sobj
|
|
123
|
-
envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
124
|
-
sobj <- do_call(FindNeighbors, envs$FindNeighbors)
|
|
125
|
-
cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
|
|
126
|
-
save_to_cache(cached, "FindNeighbors", cache_dir)
|
|
127
|
-
} else {
|
|
128
|
-
log_info("Loading cached FindNeighbors ...")
|
|
129
|
-
sobj@graphs <- cached$data$graphs
|
|
130
|
-
sobj@commands <- cached$data$commands
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
envs$FindClusters$random.seed <- envs$FindClusters$random.seed %||% 8525
|
|
134
|
-
expand_resolution <- function(resolution) {
|
|
135
|
-
expanded_res <- c()
|
|
136
|
-
for (res in resolution) {
|
|
137
|
-
if (is.numeric(res)) {
|
|
138
|
-
expanded_res <- c(expanded_res, res)
|
|
139
|
-
} else {
|
|
140
|
-
# is.character
|
|
141
|
-
parts <- trimws(unlist(strsplit(res, ",")))
|
|
142
|
-
for (part in parts) {
|
|
143
|
-
if (grepl(":", part)) {
|
|
144
|
-
parts <- trimws(unlist(strsplit(part, ":")))
|
|
145
|
-
if (length(parts) == 2) { parts <- c(parts, 0.1) }
|
|
146
|
-
if (length(parts) != 3) {
|
|
147
|
-
stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
|
|
148
|
-
}
|
|
149
|
-
parts <- as.numeric(parts)
|
|
150
|
-
expanded_res <- c(expanded_res, seq(parts[1], parts[2], by = parts[3]))
|
|
151
|
-
} else {
|
|
152
|
-
expanded_res <- c(expanded_res, as.numeric(part))
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
# keep the last resolution at last
|
|
158
|
-
rev(unique(rev(expanded_res)))
|
|
159
|
-
}
|
|
160
|
-
resolution <- envs$FindClusters$resolution <- expand_resolution(envs$FindClusters$resolution %||% 0.8)
|
|
161
|
-
log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
|
|
162
|
-
|
|
163
|
-
envs$FindClusters$object <- sobj
|
|
164
|
-
sobj <- do_call(FindClusters, envs$FindClusters)
|
|
165
|
-
|
|
166
|
-
# recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
|
|
167
|
-
recode_clusters <- function(clusters) {
|
|
168
|
-
recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
|
|
169
|
-
clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
|
|
170
|
-
clusters
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
graph_name <- envs$FindClusters$graph.name %||% paste0(DefaultAssay(sobj), "_snn_res.")
|
|
174
|
-
for (res in resolution) {
|
|
175
|
-
cluster_name <- paste0(graph_name, res)
|
|
176
|
-
new_cluster_name <- paste0("seurat_clusters.", res)
|
|
177
|
-
sobj@meta.data[[new_cluster_name]] <- recode_clusters(sobj@meta.data[[cluster_name]])
|
|
178
|
-
}
|
|
179
|
-
sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
|
|
180
|
-
Idents(sobj) <- "seurat_clusters"
|
|
181
|
-
|
|
182
|
-
ident_table <- table(Idents(sobj))
|
|
183
|
-
log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
|
|
184
|
-
print(ident_table)
|
|
185
|
-
cat("\n")
|
|
186
|
-
|
|
187
|
-
# plot the tree
|
|
188
|
-
if (length(resolution) > 1) {
|
|
189
|
-
log_info("Plotting clustree ...")
|
|
190
|
-
png(
|
|
191
|
-
file.path(joboutdir, "clustree.png"),
|
|
192
|
-
res = envs$clustree_devpars$res,
|
|
193
|
-
width = envs$clustree_devpars$width,
|
|
194
|
-
height = envs$clustree_devpars$height
|
|
195
|
-
)
|
|
196
|
-
p <- clustree(sobj, prefix = "seurat_clusters.")
|
|
197
|
-
print(p)
|
|
198
|
-
dev.off()
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
if (DefaultAssay(sobj) == "SCT") {
|
|
202
|
-
# https://github.com/satijalab/seurat/issues/6968
|
|
203
|
-
log_info("Running PrepSCTFindMarkers ...")
|
|
204
|
-
sobj <- PrepSCTFindMarkers(sobj)
|
|
205
|
-
# compose a new SeuratCommand to record it to sobj@commands
|
|
206
|
-
scommand <- sobj@commands$FindClusters
|
|
207
|
-
scommand@name <- "PrepSCTFindMarkers"
|
|
208
|
-
scommand@time.stamp <- Sys.time()
|
|
209
|
-
scommand@assay.used <- "SCT"
|
|
210
|
-
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
211
|
-
scommand@params <- list()
|
|
212
|
-
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
213
|
-
}
|
|
49
|
+
sobj <- run_transformation(sobj)
|
|
50
|
+
sobj <- run_umap(sobj)
|
|
51
|
+
sobj <- run_findneighbors(sobj)
|
|
52
|
+
sobj <- run_findclusters(sobj)
|
|
53
|
+
sobj <- run_prepsctfindmarkers(sobj)
|
|
214
54
|
|
|
215
55
|
log_info("Saving results ...")
|
|
216
56
|
saveRDS(sobj, file = rdsfile)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
2
|
|
|
3
3
|
library(parallel)
|
|
4
4
|
library(Seurat)
|
|
@@ -17,6 +17,7 @@ refnorm = {{envs.refnorm | r}}
|
|
|
17
17
|
ncores = {{envs.ncores | r}}
|
|
18
18
|
split_by = {{envs.split_by | r}}
|
|
19
19
|
mutaters = {{envs.mutaters | r}}
|
|
20
|
+
skip_if_normalized = {{envs.skip_if_normalized | r}}
|
|
20
21
|
sctransform_args = {{envs.SCTransform | r: todot="-"}}
|
|
21
22
|
normalizedata_args = {{envs.NormalizeData | r: todot="-"}}
|
|
22
23
|
findtransferanchors_args = {{envs.FindTransferAnchors | r: todot="-"}}
|
|
@@ -40,7 +41,7 @@ mapquery_args$refdata[[use]] = use
|
|
|
40
41
|
|
|
41
42
|
outdir = dirname(outfile)
|
|
42
43
|
if (is.null(split_by)) {
|
|
43
|
-
options(future.globals.maxSize =
|
|
44
|
+
options(future.globals.maxSize = 8 * 1024 ^ 4)
|
|
44
45
|
future::plan(strategy = "multicore", workers = ncores)
|
|
45
46
|
}
|
|
46
47
|
|
|
@@ -98,6 +99,7 @@ if (refnorm == "SCTransform") {
|
|
|
98
99
|
# Load Seurat object
|
|
99
100
|
log_info("- Loading Seurat object")
|
|
100
101
|
sobj = readRDS(sobjfile)
|
|
102
|
+
defassay <- DefaultAssay(sobj)
|
|
101
103
|
|
|
102
104
|
if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
103
105
|
log_info("- Applying mutaters")
|
|
@@ -126,41 +128,55 @@ if (!is.null(split_by)) {
|
|
|
126
128
|
# Normalize data
|
|
127
129
|
log_info("- Normalizing data")
|
|
128
130
|
if (refnorm == "SCTransform") {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
if (is.null(split_by)) {
|
|
132
|
-
sctransform_args$object = sobj
|
|
133
|
-
query = do_call(SCTransform, sctransform_args)
|
|
131
|
+
if (defassay == "SCT" && skip_if_normalized) {
|
|
132
|
+
log_warn(" Skipping normalization as the object is already SCTransform'ed")
|
|
134
133
|
} else {
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
134
|
+
log_info(" Using SCTransform normalization")
|
|
135
|
+
sctransform_args$residual.features = rownames(x = reference)
|
|
136
|
+
if (is.null(split_by)) {
|
|
137
|
+
sctransform_args$object = sobj
|
|
138
|
+
sobj = do_call(SCTransform, sctransform_args)
|
|
139
|
+
sctransform_args$object <- NULL
|
|
140
|
+
rm(sctransform_args)
|
|
141
|
+
gc()
|
|
142
|
+
} else {
|
|
143
|
+
sobj = mclapply(
|
|
144
|
+
X = sobj,
|
|
145
|
+
FUN = function(x) {
|
|
146
|
+
sctransform_args$object = x
|
|
147
|
+
do_call(SCTransform, sctransform_args)
|
|
148
|
+
},
|
|
149
|
+
mc.cores = ncores
|
|
150
|
+
)
|
|
151
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
152
|
+
stop(paste0("\nmclapply (SCTransform) error:", sobj))
|
|
153
|
+
}
|
|
145
154
|
}
|
|
146
155
|
}
|
|
147
156
|
} else {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
normalizedata_args$object = sobj
|
|
151
|
-
query = do_call(NormalizeData, normalizedata_args)
|
|
157
|
+
if (defassay == "RNA" && skip_if_normalized) {
|
|
158
|
+
log_warn(" Skipping normalization as the object is already LogNormalize'd")
|
|
152
159
|
} else {
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
160
|
+
log_info(" Using NormalizeData normalization")
|
|
161
|
+
if (is.null(split_by)) {
|
|
162
|
+
normalizedata_args$object = sobj
|
|
163
|
+
sobj = do_call(NormalizeData, normalizedata_args)
|
|
164
|
+
} else {
|
|
165
|
+
sobj = mclapply(
|
|
166
|
+
X = sobj,
|
|
167
|
+
FUN = function(x) {
|
|
168
|
+
normalizedata_args$object = x
|
|
169
|
+
do_call(NormalizeData, normalizedata_args)
|
|
170
|
+
},
|
|
171
|
+
mc.cores = ncores
|
|
172
|
+
)
|
|
173
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
174
|
+
stop(paste0("\nmclapply (NormalizeData) error:", sobj))
|
|
175
|
+
}
|
|
163
176
|
}
|
|
177
|
+
normalizedata_args$object <- NULL
|
|
178
|
+
rm(normalizedata_args)
|
|
179
|
+
gc()
|
|
164
180
|
}
|
|
165
181
|
}
|
|
166
182
|
|
|
@@ -168,11 +184,15 @@ if (refnorm == "SCTransform") {
|
|
|
168
184
|
log_info("- Finding anchors")
|
|
169
185
|
findtransferanchors_args$reference = reference
|
|
170
186
|
if (is.null(split_by)) {
|
|
171
|
-
findtransferanchors_args$query =
|
|
187
|
+
findtransferanchors_args$query = sobj
|
|
172
188
|
anchors = do_call(FindTransferAnchors, findtransferanchors_args)
|
|
189
|
+
findtransferanchors_args$reference = NULL
|
|
190
|
+
findtransferanchors_args$query = NULL
|
|
191
|
+
rm(findtransferanchors_args)
|
|
192
|
+
gc()
|
|
173
193
|
} else {
|
|
174
194
|
anchors = mclapply(
|
|
175
|
-
X =
|
|
195
|
+
X = sobj,
|
|
176
196
|
FUN = function(x) {
|
|
177
197
|
findtransferanchors_args$query = x
|
|
178
198
|
do_call(FindTransferAnchors, findtransferanchors_args)
|
|
@@ -188,21 +208,25 @@ if (is.null(split_by)) {
|
|
|
188
208
|
log_info("- Mapping query to reference")
|
|
189
209
|
mapquery_args$reference = reference
|
|
190
210
|
if (is.null(split_by)) {
|
|
191
|
-
mapquery_args$query =
|
|
211
|
+
mapquery_args$query = sobj
|
|
192
212
|
mapquery_args$anchorset = anchors
|
|
193
|
-
|
|
213
|
+
sobj = do_call(MapQuery, mapquery_args)
|
|
214
|
+
mapquery_args$reference = NULL
|
|
215
|
+
mapquery_args$query = NULL
|
|
216
|
+
mapquery_args$anchorset = NULL
|
|
217
|
+
gc()
|
|
194
218
|
} else {
|
|
195
|
-
|
|
196
|
-
X = seq_along(
|
|
219
|
+
sobj = mclapply(
|
|
220
|
+
X = seq_along(sobj),
|
|
197
221
|
FUN = function(i) {
|
|
198
|
-
mapquery_args$query =
|
|
222
|
+
mapquery_args$query = sobj[[i]]
|
|
199
223
|
mapquery_args$anchorset = anchors[[i]]
|
|
200
224
|
do_call(MapQuery, mapquery_args)
|
|
201
225
|
},
|
|
202
226
|
mc.cores = ncores
|
|
203
227
|
)
|
|
204
|
-
if (any(unlist(lapply(
|
|
205
|
-
stop(paste0("\nmclapply (MapQuery) error:",
|
|
228
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
229
|
+
stop(paste0("\nmclapply (MapQuery) error:", sobj))
|
|
206
230
|
}
|
|
207
231
|
}
|
|
208
232
|
|
|
@@ -221,9 +245,12 @@ if (is.null(split_by)) {
|
|
|
221
245
|
if (e$message == "subscript out of bounds") stop(mappingscore_sob_msg)
|
|
222
246
|
stop(e)
|
|
223
247
|
})
|
|
248
|
+
mappingscore_args$anchors = NULL
|
|
249
|
+
rm(mappingscore_args)
|
|
250
|
+
gc()
|
|
224
251
|
} else {
|
|
225
252
|
mappingscore = mclapply(
|
|
226
|
-
X = seq_along(
|
|
253
|
+
X = seq_along(sobj),
|
|
227
254
|
FUN = function(i) {
|
|
228
255
|
mappingscore_args$anchors = anchors[[i]]
|
|
229
256
|
tryCatch({
|
|
@@ -243,42 +270,59 @@ if (is.null(split_by)) {
|
|
|
243
270
|
# Calculate mapping score and add to metadata
|
|
244
271
|
log_info("- Adding mapping score to metadata")
|
|
245
272
|
if (is.null(split_by)) {
|
|
246
|
-
|
|
247
|
-
object =
|
|
273
|
+
sobj = AddMetaData(
|
|
274
|
+
object = sobj,
|
|
248
275
|
metadata = mappingscore,
|
|
249
276
|
col.name = "mapping.score"
|
|
250
277
|
)
|
|
251
278
|
} else {
|
|
252
|
-
|
|
253
|
-
X = seq_along(
|
|
279
|
+
sobj = mclapply(
|
|
280
|
+
X = seq_along(sobj),
|
|
254
281
|
FUN = function(i) {
|
|
255
282
|
AddMetaData(
|
|
256
|
-
object =
|
|
283
|
+
object = sobj[[i]],
|
|
257
284
|
metadata = mappingscore[[i]],
|
|
258
285
|
col.name = "mapping.score"
|
|
259
286
|
)
|
|
260
287
|
},
|
|
261
288
|
mc.cores = ncores
|
|
262
289
|
)
|
|
263
|
-
if (any(unlist(lapply(
|
|
264
|
-
stop(paste0("\nmclapply (AddMetaData) error:",
|
|
290
|
+
if (any(unlist(lapply(sobj, class)) == "try-error")) {
|
|
291
|
+
stop(paste0("\nmclapply (AddMetaData) error:", sobj))
|
|
265
292
|
}
|
|
266
293
|
|
|
267
294
|
# Combine the results
|
|
268
295
|
log_info("- Merging the results")
|
|
269
|
-
|
|
296
|
+
gc()
|
|
297
|
+
# Memory efficient way to merge the results
|
|
298
|
+
# query = Reduce(function(x, y) merge(x, y, merge.dr = "ref.umap"), query)
|
|
299
|
+
sobj = merge(sobj[[1]], sobj[2:length(sobj)], merge.dr = "ref.umap")
|
|
270
300
|
}
|
|
271
301
|
|
|
272
302
|
# Add the alias to the metadata for the clusters
|
|
273
303
|
log_info("- Adding ident to metadata and set as ident")
|
|
274
|
-
|
|
304
|
+
sobj@meta.data = sobj@meta.data %>% mutate(
|
|
275
305
|
!!sym(ident) := as.factor(!!parse_expr(paste0("predicted.", use)))
|
|
276
306
|
)
|
|
277
|
-
Idents(
|
|
307
|
+
Idents(sobj) = ident
|
|
308
|
+
|
|
309
|
+
# Check if PrepSCTFindMarkers is done
|
|
310
|
+
if (DefaultAssay(sobj) == "SCT") {
|
|
311
|
+
log_info("- Running PrepSCTFindMarkers ...")
|
|
312
|
+
sobj <- PrepSCTFindMarkers(sobj)
|
|
313
|
+
# compose a new SeuratCommand to record it to sobj@commands
|
|
314
|
+
commands <- names(pbmc_small@commands)
|
|
315
|
+
scommand <- pbmc_small@commands[[commands[length(commands)]]]
|
|
316
|
+
scommand@time.stamp <- Sys.time()
|
|
317
|
+
scommand@assay.used <- "SCT"
|
|
318
|
+
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
319
|
+
scommand@params <- list()
|
|
320
|
+
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
321
|
+
}
|
|
278
322
|
|
|
279
323
|
# Save
|
|
280
324
|
log_info("- Saving result ...")
|
|
281
|
-
saveRDS(
|
|
325
|
+
saveRDS(sobj, file = outfile)
|
|
282
326
|
|
|
283
327
|
|
|
284
328
|
# ############################
|
|
@@ -291,7 +335,7 @@ ref.reduction = mapquery_args$reduction.model %||% "wnn.umap"
|
|
|
291
335
|
for (qname in names(mapquery_args$refdata)) {
|
|
292
336
|
rname <- mapquery_args$refdata[[qname]]
|
|
293
337
|
|
|
294
|
-
if (grepl("Array", class(reference[[rname]])) && grepl("Array", class(
|
|
338
|
+
if (grepl("Array", class(reference[[rname]])) && grepl("Array", class(sobj[[qname]]))) {
|
|
295
339
|
log_warn(" Skipping transferred array: {qname} -> {rname}")
|
|
296
340
|
next
|
|
297
341
|
}
|
|
@@ -308,7 +352,7 @@ for (qname in names(mapquery_args$refdata)) {
|
|
|
308
352
|
) + NoLegend()
|
|
309
353
|
|
|
310
354
|
query_p <- DimPlot(
|
|
311
|
-
object =
|
|
355
|
+
object = sobj,
|
|
312
356
|
reduction = "ref.umap",
|
|
313
357
|
group.by = paste0("predicted.", qname),
|
|
314
358
|
label = TRUE,
|