biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +290 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +5 -4
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  132. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  133. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  134. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  135. biopipen/reports/utils/gsea.liq +0 -110
  136. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  137. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  138. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  139. biopipen/utils/caching.R +0 -44
  140. biopipen/utils/gene.R +0 -95
  141. biopipen/utils/gsea.R +0 -329
  142. biopipen/utils/io.R +0 -20
  143. biopipen/utils/misc.R +0 -602
  144. biopipen/utils/mutate_helpers.R +0 -581
  145. biopipen/utils/plot.R +0 -209
  146. biopipen/utils/repr.R +0 -146
  147. biopipen/utils/rnaseq.R +0 -48
  148. biopipen/utils/single_cell.R +0 -207
  149. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -1,19 +1,12 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
- library(parallel)
4
1
  library(Seurat)
5
- library(SeuratDisk)
6
2
  library(rlang)
7
- library(dplyr)
8
- library(tidyr)
9
- library(ggplot2)
10
- library(ggprism)
3
+ library(biopipen.utils)
11
4
 
12
5
  set.seed(8525)
13
- theme_set(theme_prism())
14
6
 
15
7
  sobjfile = {{in.sobjfile | r}}
16
8
  outfile = {{out.outfile | r}}
9
+ joboutdir = {{job.outdir | r}}
17
10
  use = {{envs.use | r}}
18
11
  ident = {{envs.ident | r}}
19
12
  ref = {{envs.ref | r}}
@@ -25,8 +18,16 @@ skip_if_normalized = {{envs.skip_if_normalized | r}}
25
18
  sctransform_args = {{envs.SCTransform | r: todot="-"}}
26
19
  normalizedata_args = {{envs.NormalizeData | r: todot="-"}}
27
20
  findtransferanchors_args = {{envs.FindTransferAnchors | r: todot="-"}}
28
- mappingscore_args = {{envs.MappingScore | r: todot="-"}}
29
21
  mapquery_args = {{envs.MapQuery | r: todot="-"}}
22
+ cache = {{envs.cache | r}}
23
+ plots = {{envs.plots | r}}
24
+
25
+ log <- get_logger()
26
+ reporter <- get_reporter()
27
+
28
+ options(future.globals.maxSize = 8 * 1024 ^ 4)
29
+ options(future.rng.onMisuse="ignore")
30
+ options(Seurat.object.assay.version = "v5")
30
31
 
31
32
  # See if we have a reference
32
33
  if (is.null(ref)) {
@@ -37,376 +38,74 @@ if (is.null(use)) {
37
38
  stop("No use provided (envs.use), don't know which column to transfer as cluster")
38
39
  }
39
40
 
40
- if (is.null(mapquery_args$refdata) || length(mapquery_args$refdata) == 0) {
41
- mapquery_args$refdata = list()
42
- }
43
-
44
- mapquery_args$refdata[[use]] = use
45
-
46
41
  outdir = dirname(outfile)
42
+ if (isTRUE(cache)) {
43
+ cache = joboutdir
44
+ }
47
45
  if (is.null(split_by)) {
48
46
  options(future.globals.maxSize = 8 * 1024 ^ 4)
49
47
  future::plan(strategy = "multicore", workers = ncores)
50
48
  }
51
49
 
52
- .is_sct <- function(x) {
53
- return(Seurat:::IsSCT(assay = x@assays[[DefaultAssay(x)]]))
54
- }
55
-
56
- .expand_dims = function(args, name = "dims") {
57
- # Expand dims from 30 to 1:30
58
- if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
59
- args[[name]] = 1:args[[name]]
60
- }
61
- args
62
- }
63
- findtransferanchors_args = .expand_dims(findtransferanchors_args)
64
-
65
- # Load reference
66
- log_info("- Loading reference")
67
- if (endsWith(ref, ".rds") || endsWith(ref, ".RDS")) {
68
- reference = readRDS(ref)
69
- } else if (endsWith(ref, ".h5ad") || endsWith(ref, ".H5AD")) {
70
- reference = ReadH5AD(ref)
50
+ log$info("Loading reference ...")
51
+ if (endsWith(ref, ".rds") || endsWith(ref, ".RDS") || endsWith(ref, ".qs") || endsWith(ref, ".qs2")) {
52
+ reference <- read_obj(ref)
53
+ } else if (endsWith(ref, ".h5seurat") || endsWith(ref, ".H5Seurat")) {
54
+ reference <- SeuratDisk::LoadH5Seurat(ref)
71
55
  } else {
72
- reference = LoadH5Seurat(ref)
73
- }
74
- reference = UpdateSeuratObject(reference)
75
- reference = UpdateSCTAssays(reference)
76
-
77
- # check if refdata exists in the reference
78
- for (rname in names(mapquery_args$refdata)) {
79
- use_name <- mapquery_args$refdata[[rname]]
80
- # transferring an assay
81
- if (use_name %in% names(reference)) { next }
82
- # transferring a metadata column
83
- if (!use_name %in% colnames(reference@meta.data)) {
84
- stop(paste0(
85
- "The reference does not have the column '",
86
- use_name,
87
- "' in either assays or metadata. "
88
- ))
89
- if (startsWith(use_name, "predicted.")) {
90
- stop(paste0(
91
- "Do you mean: ", substring(use_name, 11),
92
- ))
93
- }
94
- }
95
- }
96
-
97
- if (refnorm == "auto") {
98
- refnorm = ifelse (.is_sct(reference), "SCTransform", "NormalizeData")
99
- }
100
- if (refnorm == "SCTransform") {
101
- # Check if the reference is SCTransform'ed
102
- if (!.is_sct(reference)) {
103
- stop("Reference is not SCTransform'ed")
104
- }
105
- n_models = length(x = slot(object = reference[[DefaultAssay(reference)]], name = "SCTModel.list"))
106
- if (n_models == 0) {
107
- stop("Reference doesn't contain SCTModel.")
108
- }
109
- }
110
-
111
- log_info(" Normalization method used: {refnorm}")
112
- if (refnorm == "SCTransform") {
113
- findtransferanchors_args$normalization.method = "SCT"
114
- } else if (refnorm == "NormalizeData") {
115
- findtransferanchors_args$normalization.method = "LogNormalize"
116
- } else {
117
- stop(paste0("Unknown normalization method: ", refnorm))
118
- }
119
-
120
- # Load Seurat object
121
- log_info("- Loading Seurat object")
122
- sobj = readRDS(sobjfile)
123
- defassay <- DefaultAssay(sobj)
124
-
125
- if (!is.null(mutaters) && length(mutaters) > 0) {
126
- log_info("- Applying mutaters")
127
- sobj@meta.data <- sobj@meta.data %>% mutate(!!!lapply(mutaters, parse_expr))
128
- }
129
-
130
- if (!is.null(split_by)) {
131
- # check if each split has more than 100 cells
132
- cellno = table(sobj@meta.data[[split_by]])
133
- cellno = cellno[cellno < 100]
134
- if (length(cellno) > 0) {
135
- # stop and print the splits with # cells
136
- stop(paste0(
137
- "The following splits have less than 100 cells: \n",
138
- paste0("- ", names(cellno), ": ", cellno, collapse = "\n"),
139
- "\n\n",
140
- "You can use `envs.mutaters` to merge these splits and use `newsplit` as `envs.split_by`: \n",
141
- "> mutaters = {\n",
142
- "> newsplit = \"if_else(oldsplit %in% c('split1', 'split2'), 'mergedsplit', oldsplit)\"\n",
143
- "> }\n"
144
- ))
145
- }
146
- sobj = SplitObject(sobj, split.by = split_by)
147
- }
148
-
149
- # Normalize data
150
- log_info("- Normalizing data")
151
- if (refnorm == "SCTransform") {
152
- if (defassay == "SCT" && skip_if_normalized) {
153
- log_warn(" Skipping normalization as the object is already SCTransform'ed")
154
- } else {
155
- log_info(" Using SCTransform normalization")
156
- sctransform_args$residual.features = rownames(x = reference)
157
- if (is.null(split_by)) {
158
- sctransform_args$object = sobj
159
- sobj = do_call(SCTransform, sctransform_args)
160
- sctransform_args$object <- NULL
161
- rm(sctransform_args)
162
- gc()
163
- } else {
164
- sobj = mclapply(
165
- X = sobj,
166
- FUN = function(x) {
167
- sctransform_args$object = x
168
- do_call(SCTransform, sctransform_args)
169
- },
170
- mc.cores = ncores
171
- )
172
- if (any(unlist(lapply(sobj, class)) == "try-error")) {
173
- stop(paste0("\nmclapply (SCTransform) error:", sobj))
174
- }
175
- }
176
- }
177
- } else {
178
- if (defassay == "RNA" && skip_if_normalized) {
179
- log_warn(" Skipping normalization as the object is already LogNormalize'd")
180
- } else {
181
- log_info(" Using NormalizeData normalization")
182
- if (is.null(split_by)) {
183
- normalizedata_args$object = sobj
184
- sobj = do_call(NormalizeData, normalizedata_args)
185
- } else {
186
- sobj = mclapply(
187
- X = sobj,
188
- FUN = function(x) {
189
- normalizedata_args$object = x
190
- do_call(NormalizeData, normalizedata_args)
191
- },
192
- mc.cores = ncores
193
- )
194
- if (any(unlist(lapply(sobj, class)) == "try-error")) {
195
- stop(paste0("\nmclapply (NormalizeData) error:", sobj))
196
- }
197
- }
198
- normalizedata_args$object <- NULL
199
- rm(normalizedata_args)
200
- gc()
201
- }
202
- }
203
-
204
- # Find anchors between query and reference
205
- log_info("- Finding anchors")
206
- findtransferanchors_args$reference = reference
207
- if (is.null(split_by)) {
208
- findtransferanchors_args$query = sobj
209
- anchors = do_call(FindTransferAnchors, findtransferanchors_args)
210
- findtransferanchors_args$reference = NULL
211
- findtransferanchors_args$query = NULL
212
- rm(findtransferanchors_args)
213
- gc()
214
- } else {
215
- anchors = mclapply(
216
- X = sobj,
217
- FUN = function(x) {
218
- findtransferanchors_args$query = x
219
- do_call(FindTransferAnchors, findtransferanchors_args)
220
- },
221
- mc.cores = ncores
222
- )
223
- if (any(unlist(lapply(anchors, class)) == "try-error")) {
224
- stop(paste0("\nmclapply (FindTransferAnchors) error:", anchors))
225
- }
226
- }
227
-
228
- # Map query to reference
229
- log_info("- Mapping query to reference")
230
- mapquery_args$reference = reference
231
- if (is.null(split_by)) {
232
- mapquery_args$query = sobj
233
- mapquery_args$anchorset = anchors
234
- sobj = do_call(MapQuery, mapquery_args)
235
- mapquery_args$reference = NULL
236
- mapquery_args$query = NULL
237
- mapquery_args$anchorset = NULL
238
- gc()
239
- } else {
240
- sobj = mclapply(
241
- X = seq_along(sobj),
242
- FUN = function(i) {
243
- mapquery_args$query = sobj[[i]]
244
- mapquery_args$anchorset = anchors[[i]]
245
- do_call(MapQuery, mapquery_args)
246
- },
247
- mc.cores = ncores
248
- )
249
- if (any(unlist(lapply(sobj, class)) == "try-error")) {
250
- stop(paste0("\nmclapply (MapQuery) error:", sobj))
251
- }
252
- }
253
-
254
- # Calculating mapping score
255
- log_info("- Calculating mapping score")
256
- mappingscore_sob_msg = paste0(
257
- "While calculating mapping score, the following error was encountered: \n",
258
- "subscript out of bounds. \n\n",
259
- "You may want to try a smaller `ndim` (default: 50) in `envs.MappingScore`."
260
- )
261
- if (is.null(split_by)) {
262
- mappingscore_args$anchors = anchors
263
- mappingscore = tryCatch({
264
- do_call(MappingScore, mappingscore_args)
265
- }, error = function(e) {
266
- if (e$message == "subscript out of bounds") stop(mappingscore_sob_msg)
267
- stop(e)
268
- })
269
- mappingscore_args$anchors = NULL
270
- rm(mappingscore_args)
271
- gc()
272
- } else {
273
- mappingscore = mclapply(
274
- X = seq_along(sobj),
275
- FUN = function(i) {
276
- mappingscore_args$anchors = anchors[[i]]
277
- tryCatch({
278
- do_call(MappingScore, mappingscore_args)
279
- }, error = function(e) {
280
- if (e$message == "subscript out of bounds") stop(mappingscore_sob_msg)
281
- stop(e)
282
- })
283
- },
284
- mc.cores = ncores
285
- )
286
- if (any(unlist(lapply(mappingscore, class)) == "try-error")) {
287
- stop(paste0("\nmclapply (MappingScore) error:", mappingscore))
288
- }
289
- }
290
-
291
- # Calculate mapping score and add to metadata
292
- log_info("- Adding mapping score to metadata")
293
- if (is.null(split_by)) {
294
- sobj = AddMetaData(
295
- object = sobj,
296
- metadata = mappingscore,
297
- col.name = "mapping.score"
298
- )
299
- } else {
300
- sobj = mclapply(
301
- X = seq_along(sobj),
302
- FUN = function(i) {
303
- AddMetaData(
304
- object = sobj[[i]],
305
- metadata = mappingscore[[i]],
306
- col.name = "mapping.score"
307
- )
308
- },
309
- mc.cores = ncores
310
- )
311
- if (any(unlist(lapply(sobj, class)) == "try-error")) {
312
- stop(paste0("\nmclapply (AddMetaData) error:", sobj))
313
- }
314
-
315
- # Combine the results
316
- log_info("- Merging the results")
317
- gc()
318
- # Memory efficient way to merge the results
319
- # query = Reduce(function(x, y) merge(x, y, merge.dr = "ref.umap"), query)
320
- sobj = merge(sobj[[1]], sobj[2:length(sobj)], merge.dr = "ref.umap")
321
- }
322
-
323
- # Add the alias to the metadata for the clusters
324
- log_info("- Adding ident to metadata and set as ident")
325
- sobj@meta.data = sobj@meta.data %>% mutate(
326
- !!sym(ident) := as.factor(!!parse_expr(paste0("predicted.", use)))
56
+ stop("Reference file must be .qs, .qs2, .rds, .RDS, .h5seurat or .H5Seurat")
57
+ }
58
+ reference <- tryCatch(JoinLayers(reference), error = function(e) {reference})
59
+ Idents(reference) <- reference@meta.data[[use]]
60
+
61
+ log$info("Loading query data ...")
62
+ sobj <- read_obj(sobjfile)
63
+
64
+ sobj <- RunSeuratMap2Ref(
65
+ object = sobj, ref = reference, use = use,
66
+ ident = ident, refnorm = refnorm, skip_if_normalized = skip_if_normalized,
67
+ split_by = split_by, ncores = ncores,
68
+ SCTransformArgs = sctransform_args,
69
+ NormalizeDataArgs = normalizedata_args,
70
+ FindTransferAnchorsArgs = findtransferanchors_args,
71
+ MapQueryArgs = mapquery_args,
72
+ log = log, cache = cache
327
73
  )
328
- Idents(sobj) = ident
329
-
330
- # Check if PrepSCTFindMarkers is done
331
- if (.is_sct(sobj) && is.null(sobj@commands$PrepSCTFindMarkers)) {
332
- log_info("- Running PrepSCTFindMarkers ...")
333
- sobj <- PrepSCTFindMarkers(sobj)
334
- # compose a new SeuratCommand to record it to sobj@commands
335
- commands <- names(pbmc_small@commands)
336
- scommand <- pbmc_small@commands[[commands[length(commands)]]]
337
- scommand@time.stamp <- Sys.time()
338
- scommand@assay.used <- DefaultAssay(sobj)
339
- scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
340
- scommand@params <- list()
341
- sobj@commands$PrepSCTFindMarkers <- scommand
342
- }
343
74
 
344
75
  # Save
345
- log_info("- Saving result ...")
346
- saveRDS(sobj, file = outfile)
347
-
348
-
349
- # ############################
350
- # Some plots
351
- # ############################
352
- log_info("- Plotting mapping score ...")
353
- p <- FeaturePlot(
354
- object = sobj,
355
- reduction = "ref.umap",
356
- features = "mapping.score",
357
- cols = c("white", "blue"),
358
- pt.size = 0.5
359
- ) + ggtitle("Mapping score for query cells")
360
- save_plot(p, file.path(outdir, "mapping_score"), list(width = 800, height = 600, res = 100))
76
+ gc()
77
+ log$info("Saving result ...")
78
+ save_obj(sobj, file = outfile)
361
79
 
362
- log_info("- Plotting for transferred data ...")
363
- ref.reduction = mapquery_args$reduction.model %||% "wnn.umap"
364
- for (qname in names(mapquery_args$refdata)) {
365
- rname <- mapquery_args$refdata[[qname]]
366
80
 
367
- if (grepl("Array", class(reference[[rname]])) && grepl("Array", class(sobj[[qname]]))) {
368
- log_warn(" Skipping transferred array: {qname} -> {rname}")
81
+ ### Plotting
82
+ log$info("Plotting features ...")
83
+ for (name in names(plots)) {
84
+ if (is.null(plots[[name]])) {
369
85
  next
370
86
  }
371
-
372
- log_info(" UMAP for transferred data: {qname} -> {rname}")
373
- ref_p <- DimPlot(
374
- object = reference,
375
- reduction = ref.reduction,
376
- group.by = rname,
377
- label = TRUE,
378
- label.size = 3,
379
- repel = TRUE,
380
- ) + NoLegend()
381
-
382
- query_p <- DimPlot(
383
- object = sobj,
384
- reduction = "ref.umap",
385
- group.by = paste0("predicted.", qname),
386
- label = TRUE,
387
- label.size = 3,
388
- repel = TRUE,
389
- ) + NoLegend()
390
-
391
- p <- ref_p | query_p
392
- prefix <- file.path(outdir, paste0("UMAPs-", slugify(qname)))
393
- save_plot(p, prefix, list(width = 1500, height = 700, res = 100))
394
-
395
- # summarize the stats
396
- log_info(" Summarizing stats: {qname} -> {rname}")
397
- ref_stats <- as.data.frame(table(reference@meta.data[[rname]]))
398
- colnames(ref_stats) <- c("CellType", "Count_Ref")
399
- query_stats <- as.data.frame(table(sobj@meta.data[[paste0("predicted.", qname)]]))
400
- colnames(query_stats) <- c("CellType", "Count_Query")
401
- stats <- left_join(ref_stats, query_stats, by = "CellType") %>%
402
- replace_na(list(Count_Query = 0)) %>%
403
- arrange(desc(Count_Query), desc(Count_Ref))
404
-
405
- write.table(
406
- stats,
407
- file = file.path(outdir, paste0("stats-", slugify(qname), ".txt")),
408
- row.names = FALSE,
409
- quote = FALSE,
410
- sep = "\t"
87
+ log$info("- {name} ...")
88
+ plots[[name]]$features <- gsub("{use}", use, plots[[name]]$features, fixed = TRUE)
89
+ plots[[name]]$features <- gsub("{ident}", ident, plots[[name]]$features, fixed = TRUE)
90
+
91
+ plots[[name]]$devpars <- plots[[name]]$devpars %||% list()
92
+ plots[[name]]$devpars$res <- plots[[name]]$devpars$res %||% 100
93
+ plots[[name]]$devpars$width <- plots[[name]]$devpars$width %||% 1200
94
+ plots[[name]]$devpars$height <- plots[[name]]$devpars$height %||% 720
95
+ plots[[name]]$more_formats <- plots[[name]]$more_formats %||% character()
96
+ plots[[name]]$save_code <- FALSE
97
+ plots[[name]]$descr <- plots[[name]]$descr %||% name
98
+ extract_vars(plots[[name]], "devpars", "more_formats", "save_code", "descr")
99
+
100
+ plot_fn <- gglogger::register(VizSeuratMap2Ref)
101
+ p <- do_call(plot_fn, c(list(query = sobj, ref = reference), plots[[name]]))
102
+ prefix <- file.path(outdir, paste0(slugify(name), ".map2ref"))
103
+ save_plot(p, prefix, devpars, formats = c("png", more_formats))
104
+
105
+ reporter$add(
106
+ reporter$image(prefix, more_formats, save_code = FALSE, kind = "image"),
107
+ h1 = name
411
108
  )
412
109
  }
110
+
111
+ reporter$save(joboutdir)
@@ -1,17 +1,15 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
3
-
4
1
  library(rlang)
5
2
  library(tibble)
6
3
  library(dplyr)
7
4
  library(Seurat)
5
+ library(biopipen.utils)
8
6
 
9
- srtobj = {{in.srtobj | quote}}
7
+ srtobj = {{in.srtobj | r}}
10
8
  metafile = {{in.metafile | r}}
11
9
  mutaters = {{envs.mutaters | r}}
12
- rdsfile = {{out.rdsfile | quote}}
10
+ outfile = {{out.outfile | r}}
13
11
 
14
- srt = readRDS(srtobj)
12
+ srt = read_obj(srtobj)
15
13
  metadata = srt@meta.data
16
14
 
17
15
  if (!is.null(metafile)) {
@@ -40,4 +38,4 @@ if (!is.null(expr) && length(expr) > 0) {
40
38
  srt@meta.data = metadata
41
39
  }
42
40
 
43
- saveRDS(srt, rdsfile)
41
+ save_obj(srt, outfile)
@@ -5,9 +5,9 @@ library(dplyr)
5
5
  library(glue)
6
6
  library(biopipen.utils)
7
7
 
8
- metafile <- {{in.metafile | quote}}
9
- rdsfile <- {{out.rdsfile | quote}}
10
- joboutdir <- {{job.outdir | quote}}
8
+ metafile <- {{in.metafile | r}}
9
+ outfile <- {{out.outfile | r}}
10
+ joboutdir <- {{job.outdir | r}}
11
11
  envs <- {{envs | r: todot = "-", skip = 1}}
12
12
 
13
13
  if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
@@ -30,7 +30,9 @@ reporter$add(
30
30
  "<p>Cell filters: ", html_escape(envs$cell_qc), "</p>",
31
31
  "<p>Gene filters: </p>",
32
32
  "<p>- Min Cells: ", envs$gene_qc$min_cells, "</p>",
33
- "<p>- Excludes: ", html_escape(envs$gene_qc$excludes %||% "Not set"), "</p>"
33
+ "<p>- Excludes: ",
34
+ ifelse(is.null(envs$gene_qc$excludes), "Not set", paste(envs$gene_qc$excludes, collapse = ", ")),
35
+ "</p>"
34
36
  )
35
37
  ),
36
38
  h1 = "Filters and QC"
@@ -57,43 +59,77 @@ dir.create(qcdir, showWarnings = FALSE, recursive = TRUE)
57
59
 
58
60
  sobj <- LoadSeuratAndPerformQC(
59
61
  metadata,
60
- per_sample_qc = envs$cell_qc_per_sample,
62
+ min_cells = envs$min_cells,
63
+ min_features = envs$min_features,
61
64
  cell_qc = envs$cell_qc,
62
65
  gene_qc = envs$gene_qc,
63
66
  tmpdir = joboutdir,
64
67
  log = log,
65
68
  cache = envs$cache)
66
69
 
67
- log$info("Saving dimension table ...")
68
- dim_df <- data.frame(
69
- when = c("Before QC", "After QC"),
70
- nCells = c(nrow(sobj@misc$cell_qc_df), sum(sobj@misc$cell_qc_df$.QC)),
71
- nGenes = c(sobj@misc$gene_qc$before, sobj@misc$gene_qc$after)
72
- )
73
- write.table(dim_df, file = file.path(qcdir, "dim.txt"),
70
+ log$info("Saving and visualizing QC results ...")
71
+ cell_qc_df <- VizSeuratCellQC(sobj, plot_type = "table")
72
+ write.table(cell_qc_df, file = file.path(qcdir, "cell_qc.txt"),
74
73
  row.names = FALSE, quote = FALSE, sep = "\t")
75
74
 
76
75
  reporter$add(
77
76
  list(
78
- kind = "descr",
79
- content = "The dimension table for the Seurat object. The table contains the number of cells and genes before and after QC. Note that the cell QC is performed before gene QC."
77
+ name = "Cell QC metrics",
78
+ contents = list(
79
+ list(
80
+ kind = "descr",
81
+ content = paste0(
82
+ "The table below show the number of cells in each sample that failed and passed the QC filters. ",
83
+ "The last row shows the total number of cells that failed and passed the QC filters across all samples. "
84
+ )
85
+ ),
86
+ list(kind = "table", src = file.path(qcdir, "cell_qc.txt"))
87
+ )
80
88
  ),
89
+ h1 = "Filters and QC",
90
+ h2 = "Cell-level Quality Control",
91
+ ui = "tabs"
92
+ )
93
+
94
+ gene_qc_df <- VizSeuratGeneQC(sobj, plot_type = "table")
95
+ write.table(gene_qc_df, file = file.path(qcdir, "gene_qc.txt"),
96
+ row.names = FALSE, quote = FALSE, sep = "\t")
97
+
98
+ reporter$add(
81
99
  list(
82
- kind = "table",
83
- data = list(path = file.path(qcdir, "dim.txt"))
100
+ name = "Gene QC metrics",
101
+ contents = list(
102
+ list(
103
+ kind = "descr",
104
+ content = paste0(
105
+ "The table below show the number of genes in each sample that failed and passed the QC filters. ",
106
+ "The last row shows the final number of genes that failed and passed the QC filters across all samples. ",
107
+ "Any gene that failed the QC filters will be excluded in the merged Seurat object."
108
+ )
109
+ ),
110
+ list(kind = "table", src = file.path(qcdir, "gene_qc.txt")),
111
+ list(kind = "list", items = list(paste0(
112
+ "We may still end up with features slightly less than the final passed ones. ",
113
+ "For example, when SCTransform is used, the number of features may be less than the number of genes that passed the QC filters. ",
114
+ "This is because SCTransform selects the top N features based on variance. "
115
+ )))
116
+ )
84
117
  ),
85
118
  h1 = "Filters and QC",
86
- h2 = "Dimension table"
119
+ h2 = "Gene-level Quality Control",
120
+ ui = "tabs"
87
121
  )
88
122
 
89
- log$info("Visualizing QC metrics ...")
90
123
  for (pname in names(envs$qc_plots)) {
124
+ if (is.null(envs$qc_plots[[pname]])) next
125
+ log$info("- {pname} ...")
91
126
  args <- envs$qc_plots[[pname]]
92
127
  args$kind <- args$kind %||% "cell"
93
128
  args$devpars <- args$devpars %||% list()
94
129
  args$more_formats <- args$more_formats %||% character()
95
130
  args$save_code <- args$save_code %||% FALSE
96
- extract_vars(args, "kind", "devpars", "more_formats", "save_code")
131
+ args$descr <- args$descr %||% pname
132
+ extract_vars(args, "kind", "devpars", "more_formats", "save_code", "descr")
97
133
  if (kind == "gene") kind <- "gene_qc"
98
134
  if (kind == "cell") kind <- "cell_qc"
99
135
  args$object <- sobj
@@ -103,21 +139,31 @@ for (pname in names(envs$qc_plots)) {
103
139
  gglogger::register(VizSeuratGeneQC)
104
140
  }
105
141
  p <- do_call(plot_fn, args)
106
- prefix <- file.path(qcdir, paste0(slugify(pname), "_", kind))
142
+ prefix <- file.path(qcdir, paste0(slugify(pname), ".", kind))
107
143
  save_plot(p, prefix, devpars, formats = c("png", more_formats))
108
144
  if (save_code) {
109
145
  save_plotcode(p, prefix,
110
- setup = c("library(biopipen.utils)", "load('data.RData')", "invisible(list2env('args'))"),
146
+ setup = c("library(biopipen.utils)", "load('data.RData')", "invisible(list2env(args, envir = .GlobalEnv))"),
111
147
  "args",
112
148
  auto_data_setup = FALSE)
113
149
  }
114
150
  reporter$add(
115
- reporter$image(prefix, more_formats, save_code, kind = "image"),
151
+ list(
152
+ name = pname,
153
+ contents = list(
154
+ list(kind = "descr", content = descr),
155
+ reporter$image(prefix, more_formats, save_code, kind = "image")
156
+ )
157
+ ),
116
158
  h1 = "Filters and QC",
117
- h2 = html_escape(pname)
159
+ h2 = ifelse(kind == "cell_qc", "Cell-level Quality Control", "Gene-level Quality Control"),
160
+ ui = "tabs"
118
161
  )
119
162
  }
120
163
 
164
+ log$info("Filtering with QC criteria ...")
165
+ sobj <- FinishSeuratQC(sobj)
166
+
121
167
  sobj <- RunSeuratTransformation(
122
168
  sobj,
123
169
  use_sct = envs$use_sct,
@@ -194,6 +240,12 @@ if (!identical(envs$doublet_detector, "none")) {
194
240
  sobj <- subset(sobj, subset = !!sym(paste0(sobj@misc$doublets$tool, "_DropletType")) != "doublet")
195
241
  }
196
242
 
243
+ if (!is.null(envs$mutaters) && length(envs$mutaters) > 0) {
244
+ log$info("Mutating metadata ...")
245
+ sobj@meta.data <- sobj@meta.data %>%
246
+ mutate(!!!lapply(envs$mutaters, rlang::parse_expr))
247
+ }
248
+
197
249
  log$info("Saving QC'ed seurat object ...")
198
250
  reporter$save(joboutdir)
199
- saveRDS(sobj, rdsfile)
251
+ save_obj(sobj, outfile)