biopipen 0.29.2__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (106) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +2 -0
  3. biopipen/core/filters.py +21 -0
  4. biopipen/ns/plot.py +55 -0
  5. biopipen/ns/scrna.py +110 -21
  6. biopipen/ns/web.py +87 -5
  7. biopipen/scripts/bam/CNAClinic.R +2 -1
  8. biopipen/scripts/cellranger/CellRangerCount.py +3 -3
  9. biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
  10. biopipen/scripts/cnv/AneuploidyScore.R +1 -1
  11. biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
  12. biopipen/scripts/delim/RowsBinder.R +1 -1
  13. biopipen/scripts/delim/SampleInfo.R +3 -2
  14. biopipen/scripts/gene/GeneNameConversion.R +2 -2
  15. biopipen/scripts/gsea/Enrichr.R +3 -3
  16. biopipen/scripts/gsea/FGSEA.R +2 -2
  17. biopipen/scripts/gsea/GSEA.R +2 -2
  18. biopipen/scripts/gsea/PreRank.R +2 -2
  19. biopipen/scripts/plot/Heatmap.R +3 -3
  20. biopipen/scripts/plot/Manhattan.R +2 -1
  21. biopipen/scripts/plot/QQPlot.R +1 -1
  22. biopipen/scripts/plot/ROC.R +1 -1
  23. biopipen/scripts/plot/Scatter.R +112 -0
  24. biopipen/scripts/plot/VennDiagram.R +3 -3
  25. biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
  26. biopipen/scripts/rnaseq/Simulation.R +1 -1
  27. biopipen/scripts/rnaseq/UnitConversion.R +2 -1
  28. biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
  29. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
  30. biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
  31. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
  32. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
  33. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
  34. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
  35. biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
  36. biopipen/scripts/scrna/CellsDistribution.R +4 -3
  37. biopipen/scripts/scrna/DimPlots.R +1 -1
  38. biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
  39. biopipen/scripts/scrna/MarkersFinder.R +5 -5
  40. biopipen/scripts/scrna/MetaMarkers.R +4 -4
  41. biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
  42. biopipen/scripts/scrna/RadarPlots.R +1 -1
  43. biopipen/scripts/scrna/ScFGSEA.R +4 -3
  44. biopipen/scripts/scrna/ScSimulation.R +64 -0
  45. biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
  46. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
  47. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
  48. biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -5
  49. biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
  50. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
  51. biopipen/scripts/scrna/SeuratClusterStats-stats.R +20 -25
  52. biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
  53. biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
  54. biopipen/scripts/scrna/SeuratClustering.R +10 -170
  55. biopipen/scripts/scrna/SeuratMap2Ref.R +98 -54
  56. biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
  57. biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
  58. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
  59. biopipen/scripts/scrna/SeuratPreparing.R +22 -562
  60. biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
  61. biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
  62. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +2 -2
  63. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +2 -2
  64. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
  65. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
  66. biopipen/scripts/snp/MatrixEQTL.R +1 -1
  67. biopipen/scripts/snp/PlinkCallRate.R +2 -2
  68. biopipen/scripts/snp/PlinkFreq.R +2 -2
  69. biopipen/scripts/snp/PlinkHWE.R +2 -2
  70. biopipen/scripts/snp/PlinkHet.R +2 -2
  71. biopipen/scripts/snp/PlinkIBD.R +2 -2
  72. biopipen/scripts/stats/ChowTest.R +1 -1
  73. biopipen/scripts/stats/DiffCoexpr.R +1 -1
  74. biopipen/scripts/stats/LiquidAssoc.R +1 -1
  75. biopipen/scripts/stats/Mediation.R +11 -9
  76. biopipen/scripts/stats/MetaPvalue.R +4 -1
  77. biopipen/scripts/stats/MetaPvalue1.R +4 -1
  78. biopipen/scripts/tcr/Attach2Seurat.R +1 -1
  79. biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
  80. biopipen/scripts/tcr/CloneResidency.R +2 -2
  81. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  82. biopipen/scripts/tcr/Immunarch-basic.R +0 -4
  83. biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
  84. biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
  85. biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
  86. biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
  87. biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
  88. biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
  89. biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
  90. biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
  91. biopipen/scripts/tcr/Immunarch.R +43 -11
  92. biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
  93. biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
  94. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  95. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  96. biopipen/scripts/tcr/TCRClustering.R +2 -2
  97. biopipen/scripts/tcr/TESSA.R +2 -2
  98. biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
  99. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  100. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  101. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  102. biopipen/scripts/web/gcloud_common.py +49 -0
  103. {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/METADATA +7 -7
  104. {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/RECORD +106 -96
  105. {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/WHEEL +0 -0
  106. {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/caching.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "caching.R" | source_r }}
3
3
 
4
4
  library(Seurat)
5
5
  library(future)
@@ -7,7 +7,6 @@ library(rlang)
7
7
  library(tidyr)
8
8
  library(dplyr)
9
9
  library(digest)
10
- library(clustree)
11
10
 
12
11
  set.seed(8525)
13
12
 
@@ -24,16 +23,10 @@ options(str = strOptions(vec.len = 5, digits.d = 5))
24
23
  options(future.globals.maxSize = 80000 * 1024^2)
25
24
  plan(strategy = "multicore", workers = envs$ncores)
26
25
 
27
- .expand_dims <- function(args, name = "dims") {
28
- # Expand dims from 30 to 1:30
29
- if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
30
- args[[name]] <- 1:args[[name]]
31
- }
32
- args
33
- }
26
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClustering-common.R" | source_r }}
34
27
 
35
- envs$RunUMAP <- .expand_dims(envs$RunUMAP)
36
- envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
28
+ envs$RunUMAP <- expand_dims(envs$RunUMAP)
29
+ envs$FindNeighbors <- expand_dims(envs$FindNeighbors)
37
30
 
38
31
  log_info("Reading Seurat object ...")
39
32
  sobj <- readRDS(srtfile)
@@ -53,164 +46,11 @@ if (is.character(envs$cache)) {
53
46
  writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
54
47
  }
55
48
 
56
- if (length(envs$ScaleData) > 0) {
57
- if (DefaultAssay(sobj) == "SCT") {
58
- stop("SCT assay detected, but ScaleData is specified. Use SCTransform instead.")
59
- }
60
- cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
61
- if (is.null(cached$data)) {
62
- log_info("Running ScaleData ...")
63
- envs$ScaleData$object <- sobj
64
- sobj <- do_call(ScaleData, envs$ScaleData)
65
- cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
66
- save_to_cache(cached, "ScaleData", cache_dir)
67
- } else {
68
- log_info("Loading cached ScaleData ...")
69
- sobj@assays$RNA <- cached$data$assay
70
- sobj@commands <- cached$data$commands
71
- DefaultAssay(sobj) <- "RNA"
72
- }
73
- } else if (length(envs$SCTransform) > 0) {
74
- if (DefaultAssay(sobj) != "SCT") {
75
- stop("SCT assay not detected, but SCTransform is specified. Use ScaleData instead.")
76
- }
77
- cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
78
- asssay <- envs$SCTransform$new.assay.name %||% "SCT"
79
- if (is.null(cached$data)) {
80
- log_info("Running SCTransform ...")
81
- envs$SCTransform$object <- sobj
82
- sobj <- do_call(SCTransform, envs$SCTransform)
83
- cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
84
- save_to_cache(cached, "SCTransform", cache_dir)
85
- } else {
86
- log_info("Loading cached SCTransform ...")
87
- sobj@assays[[assay]] <- cached$data$assay
88
- sobj@commands <- cached$data$commands
89
- DefaultAssay(sobj) <- assay
90
- }
91
- }
92
-
93
- cached <- get_cached(envs$RunUMAP, "RunUMAP", cache_dir)
94
- reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
95
- if (is.null(cached$data)) {
96
- log_info("Running RunUMAP ...")
97
- umap_args <- list_setdefault(
98
- envs$RunUMAP,
99
- object = sobj,
100
- dims = 1:30,
101
- reduction = sobj@misc$integrated_new_reduction %||% "pca"
102
- )
103
- ncells <- ncol(sobj)
104
- umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
105
- umap_method <- envs$RunUMAP$umap.method %||% "uwot"
106
- if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
107
- # https://github.com/satijalab/seurat/issues/4312
108
- umap_args$n.neighbors <- min(ncells - 1, 30)
109
- }
110
- sobj <- do_call(RunUMAP, umap_args)
111
- cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
112
- save_to_cache(cached, "RunUMAP", cache_dir)
113
- } else {
114
- log_info("Loading cached RunUMAP ...")
115
- sobj@reductions[[reduc_name]] <- cached$data$reduc
116
- sobj@commands <- cached$data$commands
117
- }
118
-
119
- cached <- get_cached(envs$FindNeighbors, "FindNeighbors", cache_dir)
120
- if (is.null(cached$data)) {
121
- log_info("Running FindNeighbors ...")
122
- envs$FindNeighbors$object <- sobj
123
- envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
124
- sobj <- do_call(FindNeighbors, envs$FindNeighbors)
125
- cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
126
- save_to_cache(cached, "FindNeighbors", cache_dir)
127
- } else {
128
- log_info("Loading cached FindNeighbors ...")
129
- sobj@graphs <- cached$data$graphs
130
- sobj@commands <- cached$data$commands
131
- }
132
-
133
- envs$FindClusters$random.seed <- envs$FindClusters$random.seed %||% 8525
134
- expand_resolution <- function(resolution) {
135
- expanded_res <- c()
136
- for (res in resolution) {
137
- if (is.numeric(res)) {
138
- expanded_res <- c(expanded_res, res)
139
- } else {
140
- # is.character
141
- parts <- trimws(unlist(strsplit(res, ",")))
142
- for (part in parts) {
143
- if (grepl(":", part)) {
144
- parts <- trimws(unlist(strsplit(part, ":")))
145
- if (length(parts) == 2) { parts <- c(parts, 0.1) }
146
- if (length(parts) != 3) {
147
- stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
148
- }
149
- parts <- as.numeric(parts)
150
- expanded_res <- c(expanded_res, seq(parts[1], parts[2], by = parts[3]))
151
- } else {
152
- expanded_res <- c(expanded_res, as.numeric(part))
153
- }
154
- }
155
- }
156
- }
157
- # keep the last resolution at last
158
- rev(unique(rev(expanded_res)))
159
- }
160
- resolution <- envs$FindClusters$resolution <- expand_resolution(envs$FindClusters$resolution %||% 0.8)
161
- log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
162
-
163
- envs$FindClusters$object <- sobj
164
- sobj <- do_call(FindClusters, envs$FindClusters)
165
-
166
- # recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
167
- recode_clusters <- function(clusters) {
168
- recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
169
- clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
170
- clusters
171
- }
172
-
173
- graph_name <- envs$FindClusters$graph.name %||% paste0(DefaultAssay(sobj), "_snn_res.")
174
- for (res in resolution) {
175
- cluster_name <- paste0(graph_name, res)
176
- new_cluster_name <- paste0("seurat_clusters.", res)
177
- sobj@meta.data[[new_cluster_name]] <- recode_clusters(sobj@meta.data[[cluster_name]])
178
- }
179
- sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
180
- Idents(sobj) <- "seurat_clusters"
181
-
182
- ident_table <- table(Idents(sobj))
183
- log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
184
- print(ident_table)
185
- cat("\n")
186
-
187
- # plot the tree
188
- if (length(resolution) > 1) {
189
- log_info("Plotting clustree ...")
190
- png(
191
- file.path(joboutdir, "clustree.png"),
192
- res = envs$clustree_devpars$res,
193
- width = envs$clustree_devpars$width,
194
- height = envs$clustree_devpars$height
195
- )
196
- p <- clustree(sobj, prefix = "seurat_clusters.")
197
- print(p)
198
- dev.off()
199
- }
200
-
201
- if (DefaultAssay(sobj) == "SCT") {
202
- # https://github.com/satijalab/seurat/issues/6968
203
- log_info("Running PrepSCTFindMarkers ...")
204
- sobj <- PrepSCTFindMarkers(sobj)
205
- # compose a new SeuratCommand to record it to sobj@commands
206
- scommand <- sobj@commands$FindClusters
207
- scommand@name <- "PrepSCTFindMarkers"
208
- scommand@time.stamp <- Sys.time()
209
- scommand@assay.used <- "SCT"
210
- scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
211
- scommand@params <- list()
212
- sobj@commands$PrepSCTFindMarkers <- scommand
213
- }
49
+ sobj <- run_transformation(sobj)
50
+ sobj <- run_umap(sobj)
51
+ sobj <- run_findneighbors(sobj)
52
+ sobj <- run_findclusters(sobj)
53
+ sobj <- run_prepsctfindmarkers(sobj)
214
54
 
215
55
  log_info("Saving results ...")
216
56
  saveRDS(sobj, file = rdsfile)
@@ -1,4 +1,4 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
2
 
3
3
  library(parallel)
4
4
  library(Seurat)
@@ -17,6 +17,7 @@ refnorm = {{envs.refnorm | r}}
17
17
  ncores = {{envs.ncores | r}}
18
18
  split_by = {{envs.split_by | r}}
19
19
  mutaters = {{envs.mutaters | r}}
20
+ skip_if_normalized = {{envs.skip_if_normalized | r}}
20
21
  sctransform_args = {{envs.SCTransform | r: todot="-"}}
21
22
  normalizedata_args = {{envs.NormalizeData | r: todot="-"}}
22
23
  findtransferanchors_args = {{envs.FindTransferAnchors | r: todot="-"}}
@@ -40,7 +41,7 @@ mapquery_args$refdata[[use]] = use
40
41
 
41
42
  outdir = dirname(outfile)
42
43
  if (is.null(split_by)) {
43
- options(future.globals.maxSize = 80000 * 1024^2)
44
+ options(future.globals.maxSize = 8 * 1024 ^ 4)
44
45
  future::plan(strategy = "multicore", workers = ncores)
45
46
  }
46
47
 
@@ -98,6 +99,7 @@ if (refnorm == "SCTransform") {
98
99
  # Load Seurat object
99
100
  log_info("- Loading Seurat object")
100
101
  sobj = readRDS(sobjfile)
102
+ defassay <- DefaultAssay(sobj)
101
103
 
102
104
  if (!is.null(mutaters) && length(mutaters) > 0) {
103
105
  log_info("- Applying mutaters")
@@ -126,41 +128,55 @@ if (!is.null(split_by)) {
126
128
  # Normalize data
127
129
  log_info("- Normalizing data")
128
130
  if (refnorm == "SCTransform") {
129
- log_info(" Using SCTransform normalization")
130
- sctransform_args$residual.features = rownames(x = reference)
131
- if (is.null(split_by)) {
132
- sctransform_args$object = sobj
133
- query = do_call(SCTransform, sctransform_args)
131
+ if (defassay == "SCT" && skip_if_normalized) {
132
+ log_warn(" Skipping normalization as the object is already SCTransform'ed")
134
133
  } else {
135
- query = mclapply(
136
- X = sobj,
137
- FUN = function(x) {
138
- sctransform_args$object = x
139
- do_call(SCTransform, sctransform_args)
140
- },
141
- mc.cores = ncores
142
- )
143
- if (any(unlist(lapply(query, class)) == "try-error")) {
144
- stop(paste0("\nmclapply (SCTransform) error:", query))
134
+ log_info(" Using SCTransform normalization")
135
+ sctransform_args$residual.features = rownames(x = reference)
136
+ if (is.null(split_by)) {
137
+ sctransform_args$object = sobj
138
+ sobj = do_call(SCTransform, sctransform_args)
139
+ sctransform_args$object <- NULL
140
+ rm(sctransform_args)
141
+ gc()
142
+ } else {
143
+ sobj = mclapply(
144
+ X = sobj,
145
+ FUN = function(x) {
146
+ sctransform_args$object = x
147
+ do_call(SCTransform, sctransform_args)
148
+ },
149
+ mc.cores = ncores
150
+ )
151
+ if (any(unlist(lapply(sobj, class)) == "try-error")) {
152
+ stop(paste0("\nmclapply (SCTransform) error:", sobj))
153
+ }
145
154
  }
146
155
  }
147
156
  } else {
148
- log_info(" Using NormalizeData normalization")
149
- if (is.null(split_by)) {
150
- normalizedata_args$object = sobj
151
- query = do_call(NormalizeData, normalizedata_args)
157
+ if (defassay == "RNA" && skip_if_normalized) {
158
+ log_warn(" Skipping normalization as the object is already LogNormalize'd")
152
159
  } else {
153
- query = mclapply(
154
- X = sobj,
155
- FUN = function(x) {
156
- normalizedata_args$object = x
157
- do_call(NormalizeData, normalizedata_args)
158
- },
159
- mc.cores = ncores
160
- )
161
- if (any(unlist(lapply(query, class)) == "try-error")) {
162
- stop(paste0("\nmclapply (NormalizeData) error:", query))
160
+ log_info(" Using NormalizeData normalization")
161
+ if (is.null(split_by)) {
162
+ normalizedata_args$object = sobj
163
+ sobj = do_call(NormalizeData, normalizedata_args)
164
+ } else {
165
+ sobj = mclapply(
166
+ X = sobj,
167
+ FUN = function(x) {
168
+ normalizedata_args$object = x
169
+ do_call(NormalizeData, normalizedata_args)
170
+ },
171
+ mc.cores = ncores
172
+ )
173
+ if (any(unlist(lapply(sobj, class)) == "try-error")) {
174
+ stop(paste0("\nmclapply (NormalizeData) error:", sobj))
175
+ }
163
176
  }
177
+ normalizedata_args$object <- NULL
178
+ rm(normalizedata_args)
179
+ gc()
164
180
  }
165
181
  }
166
182
 
@@ -168,11 +184,15 @@ if (refnorm == "SCTransform") {
168
184
  log_info("- Finding anchors")
169
185
  findtransferanchors_args$reference = reference
170
186
  if (is.null(split_by)) {
171
- findtransferanchors_args$query = query
187
+ findtransferanchors_args$query = sobj
172
188
  anchors = do_call(FindTransferAnchors, findtransferanchors_args)
189
+ findtransferanchors_args$reference = NULL
190
+ findtransferanchors_args$query = NULL
191
+ rm(findtransferanchors_args)
192
+ gc()
173
193
  } else {
174
194
  anchors = mclapply(
175
- X = query,
195
+ X = sobj,
176
196
  FUN = function(x) {
177
197
  findtransferanchors_args$query = x
178
198
  do_call(FindTransferAnchors, findtransferanchors_args)
@@ -188,21 +208,25 @@ if (is.null(split_by)) {
188
208
  log_info("- Mapping query to reference")
189
209
  mapquery_args$reference = reference
190
210
  if (is.null(split_by)) {
191
- mapquery_args$query = query
211
+ mapquery_args$query = sobj
192
212
  mapquery_args$anchorset = anchors
193
- query = do_call(MapQuery, mapquery_args)
213
+ sobj = do_call(MapQuery, mapquery_args)
214
+ mapquery_args$reference = NULL
215
+ mapquery_args$query = NULL
216
+ mapquery_args$anchorset = NULL
217
+ gc()
194
218
  } else {
195
- query = mclapply(
196
- X = seq_along(query),
219
+ sobj = mclapply(
220
+ X = seq_along(sobj),
197
221
  FUN = function(i) {
198
- mapquery_args$query = query[[i]]
222
+ mapquery_args$query = sobj[[i]]
199
223
  mapquery_args$anchorset = anchors[[i]]
200
224
  do_call(MapQuery, mapquery_args)
201
225
  },
202
226
  mc.cores = ncores
203
227
  )
204
- if (any(unlist(lapply(query, class)) == "try-error")) {
205
- stop(paste0("\nmclapply (MapQuery) error:", query))
228
+ if (any(unlist(lapply(sobj, class)) == "try-error")) {
229
+ stop(paste0("\nmclapply (MapQuery) error:", sobj))
206
230
  }
207
231
  }
208
232
 
@@ -221,9 +245,12 @@ if (is.null(split_by)) {
221
245
  if (e$message == "subscript out of bounds") stop(mappingscore_sob_msg)
222
246
  stop(e)
223
247
  })
248
+ mappingscore_args$anchors = NULL
249
+ rm(mappingscore_args)
250
+ gc()
224
251
  } else {
225
252
  mappingscore = mclapply(
226
- X = seq_along(query),
253
+ X = seq_along(sobj),
227
254
  FUN = function(i) {
228
255
  mappingscore_args$anchors = anchors[[i]]
229
256
  tryCatch({
@@ -243,42 +270,59 @@ if (is.null(split_by)) {
243
270
  # Calculate mapping score and add to metadata
244
271
  log_info("- Adding mapping score to metadata")
245
272
  if (is.null(split_by)) {
246
- query = AddMetaData(
247
- object = query,
273
+ sobj = AddMetaData(
274
+ object = sobj,
248
275
  metadata = mappingscore,
249
276
  col.name = "mapping.score"
250
277
  )
251
278
  } else {
252
- query = mclapply(
253
- X = seq_along(query),
279
+ sobj = mclapply(
280
+ X = seq_along(sobj),
254
281
  FUN = function(i) {
255
282
  AddMetaData(
256
- object = query[[i]],
283
+ object = sobj[[i]],
257
284
  metadata = mappingscore[[i]],
258
285
  col.name = "mapping.score"
259
286
  )
260
287
  },
261
288
  mc.cores = ncores
262
289
  )
263
- if (any(unlist(lapply(query, class)) == "try-error")) {
264
- stop(paste0("\nmclapply (AddMetaData) error:", query))
290
+ if (any(unlist(lapply(sobj, class)) == "try-error")) {
291
+ stop(paste0("\nmclapply (AddMetaData) error:", sobj))
265
292
  }
266
293
 
267
294
  # Combine the results
268
295
  log_info("- Merging the results")
269
- query = merge(query[[1]], query[2:length(query)], merge.dr = "ref.umap")
296
+ gc()
297
+ # Memory efficient way to merge the results
298
+ # query = Reduce(function(x, y) merge(x, y, merge.dr = "ref.umap"), query)
299
+ sobj = merge(sobj[[1]], sobj[2:length(sobj)], merge.dr = "ref.umap")
270
300
  }
271
301
 
272
302
  # Add the alias to the metadata for the clusters
273
303
  log_info("- Adding ident to metadata and set as ident")
274
- query@meta.data = query@meta.data %>% mutate(
304
+ sobj@meta.data = sobj@meta.data %>% mutate(
275
305
  !!sym(ident) := as.factor(!!parse_expr(paste0("predicted.", use)))
276
306
  )
277
- Idents(query) = ident
307
+ Idents(sobj) = ident
308
+
309
+ # Check if PrepSCTFindMarkers is done
310
+ if (DefaultAssay(sobj) == "SCT") {
311
+ log_info("- Running PrepSCTFindMarkers ...")
312
+ sobj <- PrepSCTFindMarkers(sobj)
313
+ # compose a new SeuratCommand to record it to sobj@commands
314
+ commands <- names(pbmc_small@commands)
315
+ scommand <- pbmc_small@commands[[commands[length(commands)]]]
316
+ scommand@time.stamp <- Sys.time()
317
+ scommand@assay.used <- "SCT"
318
+ scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
319
+ scommand@params <- list()
320
+ sobj@commands$PrepSCTFindMarkers <- scommand
321
+ }
278
322
 
279
323
  # Save
280
324
  log_info("- Saving result ...")
281
- saveRDS(query, file = outfile)
325
+ saveRDS(sobj, file = outfile)
282
326
 
283
327
 
284
328
  # ############################
@@ -291,7 +335,7 @@ ref.reduction = mapquery_args$reduction.model %||% "wnn.umap"
291
335
  for (qname in names(mapquery_args$refdata)) {
292
336
  rname <- mapquery_args$refdata[[qname]]
293
337
 
294
- if (grepl("Array", class(reference[[rname]])) && grepl("Array", class(query[[qname]]))) {
338
+ if (grepl("Array", class(reference[[rname]])) && grepl("Array", class(sobj[[qname]]))) {
295
339
  log_warn(" Skipping transferred array: {qname} -> {rname}")
296
340
  next
297
341
  }
@@ -308,7 +352,7 @@ for (qname in names(mapquery_args$refdata)) {
308
352
  ) + NoLegend()
309
353
 
310
354
  query_p <- DimPlot(
311
- object = query,
355
+ object = sobj,
312
356
  reduction = "ref.umap",
313
357
  group.by = paste0("predicted.", qname),
314
358
  label = TRUE,
@@ -1,5 +1,5 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/mutate_helpers.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
3
3
 
4
4
  library(rlang)
5
5
  library(tibble)