biopipen 0.29.0__py3-none-any.whl → 0.29.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/plot.py +66 -8
- biopipen/ns/{regulation.py → regulatory.py} +3 -3
- biopipen/ns/scrna.py +16 -2
- biopipen/ns/stats.py +93 -1
- biopipen/scripts/delim/SampleInfo.R +10 -5
- biopipen/scripts/plot/Manhattan.R +6 -0
- biopipen/scripts/plot/QQPlot.R +100 -16
- biopipen/scripts/{regulation → regulatory}/MotifAffinityTest.R +3 -3
- biopipen/scripts/{regulation → regulatory}/MotifScan.py +1 -1
- biopipen/scripts/scrna/MarkersFinder.R +29 -18
- biopipen/scripts/scrna/MetaMarkers.R +20 -2
- biopipen/scripts/scrna/SeuratClusterStats-features.R +3 -1
- biopipen/scripts/scrna/SeuratClustering.R +8 -0
- biopipen/scripts/scrna/SeuratPreparing.R +252 -122
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +5 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +5 -1
- biopipen/scripts/snp/MatrixEQTL.R +2 -2
- biopipen/scripts/snp/PlinkIBD.R +3 -0
- biopipen/scripts/stats/Mediation.R +106 -0
- {biopipen-0.29.0.dist-info → biopipen-0.29.2.dist-info}/METADATA +1 -1
- {biopipen-0.29.0.dist-info → biopipen-0.29.2.dist-info}/RECORD +28 -27
- {biopipen-0.29.0.dist-info → biopipen-0.29.2.dist-info}/entry_points.txt +1 -1
- /biopipen/scripts/{regulation → regulatory}/MotifAffinityTest_AtSNP.R +0 -0
- /biopipen/scripts/{regulation → regulatory}/MotifAffinityTest_MotifBreakR.R +0 -0
- /biopipen/scripts/{regulation → regulatory}/atSNP.R +0 -0
- /biopipen/scripts/{regulation → regulatory}/motifBreakR.R +0 -0
- {biopipen-0.29.0.dist-info → biopipen-0.29.2.dist-info}/WHEEL +0 -0
|
@@ -75,6 +75,7 @@ do_one_features = function(name) {
|
|
|
75
75
|
Idents(case$object) = case$ident
|
|
76
76
|
}
|
|
77
77
|
n_uidents = length(unique(Idents(case$object)))
|
|
78
|
+
max_nchar_idents = max(nchar(unique(as.character(Idents(case$object)))))
|
|
78
79
|
|
|
79
80
|
fn = NULL
|
|
80
81
|
default_devpars = NULL
|
|
@@ -97,13 +98,14 @@ do_one_features = function(name) {
|
|
|
97
98
|
case$kind = "violin"
|
|
98
99
|
if (is.null(case$cols)) { case$cols = pal_biopipen()(n_uidents) }
|
|
99
100
|
if (is.null(case$pt.size)) { case$pt.size = 0 }
|
|
101
|
+
|
|
100
102
|
excluded_args = c(excluded_args, "reduction")
|
|
101
103
|
fn = VlnPlot
|
|
102
104
|
default_devpars = function(features, ncol) {
|
|
103
105
|
if (is.null(ncol)) { ncol = 1 }
|
|
104
106
|
list(
|
|
105
107
|
width = 400 * ncol,
|
|
106
|
-
height = ceiling(length(features) / ncol) *
|
|
108
|
+
height = ceiling(length(features) / ncol + max_nchar_idents * .05) * 150,
|
|
107
109
|
res = 100
|
|
108
110
|
)
|
|
109
111
|
}
|
|
@@ -202,6 +202,14 @@ if (DefaultAssay(sobj) == "SCT") {
|
|
|
202
202
|
# https://github.com/satijalab/seurat/issues/6968
|
|
203
203
|
log_info("Running PrepSCTFindMarkers ...")
|
|
204
204
|
sobj <- PrepSCTFindMarkers(sobj)
|
|
205
|
+
# compose a new SeuratCommand to record it to sobj@commands
|
|
206
|
+
scommand <- sobj@commands$FindClusters
|
|
207
|
+
scommand@name <- "PrepSCTFindMarkers"
|
|
208
|
+
scommand@time.stamp <- Sys.time()
|
|
209
|
+
scommand@assay.used <- "SCT"
|
|
210
|
+
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
211
|
+
scommand@params <- list()
|
|
212
|
+
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
205
213
|
}
|
|
206
214
|
|
|
207
215
|
log_info("Saving results ...")
|
|
@@ -1,19 +1,27 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/caching.R")
|
|
2
3
|
|
|
3
4
|
library(Seurat)
|
|
4
5
|
library(future)
|
|
5
6
|
library(bracer)
|
|
6
7
|
library(ggplot2)
|
|
7
8
|
library(dplyr)
|
|
8
|
-
library(tidyseurat)
|
|
9
|
+
# library(tidyseurat)
|
|
9
10
|
|
|
10
|
-
metafile
|
|
11
|
-
rdsfile
|
|
12
|
-
joboutdir
|
|
13
|
-
envs
|
|
11
|
+
metafile <- {{in.metafile | quote}}
|
|
12
|
+
rdsfile <- {{out.rdsfile | quote}}
|
|
13
|
+
joboutdir <- {{job.outdir | quote}}
|
|
14
|
+
envs <- {{envs | r: todot = "-", skip = 1}}
|
|
15
|
+
|
|
16
|
+
if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
|
|
17
|
+
if (length(envs$cache) > 1) {
|
|
18
|
+
log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
|
|
19
|
+
envs$cache <- envs$cache[1]
|
|
20
|
+
}
|
|
14
21
|
|
|
15
22
|
set.seed(8525)
|
|
16
|
-
|
|
23
|
+
# 8TB
|
|
24
|
+
options(future.globals.maxSize = 8 * 1024 ^ 4)
|
|
17
25
|
options(future.rng.onMisuse="ignore")
|
|
18
26
|
options(Seurat.object.assay.version = "v5")
|
|
19
27
|
plan(strategy = "multicore", workers = envs$ncores)
|
|
@@ -34,7 +42,7 @@ add_report(
|
|
|
34
42
|
h1 = "Filters and QC"
|
|
35
43
|
)
|
|
36
44
|
|
|
37
|
-
metadata
|
|
45
|
+
metadata <- read.table(
|
|
38
46
|
metafile,
|
|
39
47
|
header = TRUE,
|
|
40
48
|
row.names = NULL,
|
|
@@ -42,6 +50,16 @@ metadata = read.table(
|
|
|
42
50
|
check.names = FALSE
|
|
43
51
|
)
|
|
44
52
|
|
|
53
|
+
cache_sig <- capture.output(str(metadata))
|
|
54
|
+
dig_sig <- digest::digest(cache_sig, algo = "md5")
|
|
55
|
+
dig_sig <- substr(dig_sig, 1, 8)
|
|
56
|
+
cache_dir <- NULL
|
|
57
|
+
if (is.character(envs$cache)) {
|
|
58
|
+
cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seuratpreparing_cache"))
|
|
59
|
+
dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
|
|
60
|
+
writeLines(cache_sig, file.path(cache_dir, "signature.txt"))
|
|
61
|
+
}
|
|
62
|
+
|
|
45
63
|
meta_cols = colnames(metadata)
|
|
46
64
|
if (!"Sample" %in% meta_cols) {
|
|
47
65
|
stop("Error: Column `Sample` is not found in metafile.")
|
|
@@ -90,21 +108,21 @@ rename_files = function(e, sample, path) {
|
|
|
90
108
|
|
|
91
109
|
|
|
92
110
|
perform_cell_qc <- function(sobj, per_sample = FALSE) {
|
|
93
|
-
log_prefix
|
|
111
|
+
log_prefix <- ifelse(per_sample, " ", "- ")
|
|
94
112
|
log_info("{log_prefix}Adding metadata for QC ...")
|
|
95
|
-
sobj$percent.mt
|
|
96
|
-
sobj$percent.ribo
|
|
97
|
-
sobj$percent.hb
|
|
98
|
-
sobj$percent.plat
|
|
113
|
+
sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-")
|
|
114
|
+
sobj$percent.ribo <- PercentageFeatureSet(sobj, pattern = "^RP[SL]")
|
|
115
|
+
sobj$percent.hb <- PercentageFeatureSet(sobj, pattern = "^HB[^(P)]")
|
|
116
|
+
sobj$percent.plat <- PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
|
|
99
117
|
|
|
100
118
|
if (is.null(envs$cell_qc) || length(envs$cell_qc) == 0) {
|
|
101
119
|
log_warn("{log_prefix}No cell QC criteria is provided. All cells will be kept.")
|
|
102
|
-
cell_qc
|
|
120
|
+
cell_qc <- "TRUE"
|
|
103
121
|
} else {
|
|
104
|
-
cell_qc
|
|
122
|
+
cell_qc <- envs$cell_qc
|
|
105
123
|
}
|
|
106
124
|
|
|
107
|
-
sobj
|
|
125
|
+
sobj@meta.data <- sobj@meta.data %>% mutate(.QC = !!rlang::parse_expr(cell_qc))
|
|
108
126
|
|
|
109
127
|
if (is.null(cell_qc_df)) {
|
|
110
128
|
cell_qc_df <<- sobj@meta.data[, c("Sample", ".QC", feats), drop = FALSE]
|
|
@@ -114,8 +132,8 @@ perform_cell_qc <- function(sobj, per_sample = FALSE) {
|
|
|
114
132
|
|
|
115
133
|
# Do the filtering
|
|
116
134
|
log_info("{log_prefix}Filtering cells using QC criteria ...")
|
|
117
|
-
sobj
|
|
118
|
-
sobj$.QC
|
|
135
|
+
sobj <- subset(sobj, subset = .QC)
|
|
136
|
+
sobj$.QC <- NULL
|
|
119
137
|
|
|
120
138
|
return(sobj)
|
|
121
139
|
}
|
|
@@ -281,42 +299,83 @@ load_sample = function(sample) {
|
|
|
281
299
|
obj
|
|
282
300
|
}
|
|
283
301
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
302
|
+
cached <- get_cached(
|
|
303
|
+
list(cell_qc = envs$cell_qc, cell_qc_per_sample = envs$cell_qc_per_sample, use_sct = envs$use_sct),
|
|
304
|
+
"CellQC",
|
|
305
|
+
cache_dir
|
|
306
|
+
)
|
|
307
|
+
if (!is.null(cached$data)) {
|
|
308
|
+
log_info("Loading cell-QC'ed object from cache ...")
|
|
309
|
+
sobj <- cached$data$sobj
|
|
310
|
+
cell_qc_df <- cached$data$cell_qc_df
|
|
311
|
+
cached$data$sobj <- NULL
|
|
312
|
+
cached$data$cell_qc_df <- NULL
|
|
313
|
+
cached$data <- NULL
|
|
314
|
+
rm(cached)
|
|
315
|
+
gc()
|
|
316
|
+
} else {
|
|
317
|
+
# Load data
|
|
318
|
+
log_info("Reading samples individually ...")
|
|
319
|
+
obj_list = lapply(samples, load_sample)
|
|
320
|
+
|
|
321
|
+
log_info("Merging samples ...")
|
|
322
|
+
sobj = Reduce(merge, obj_list)
|
|
323
|
+
rm(obj_list)
|
|
324
|
+
gc()
|
|
325
|
+
|
|
326
|
+
if (!envs$cell_qc_per_sample) {
|
|
327
|
+
log_info("Performing cell QC ...")
|
|
328
|
+
sobj = perform_cell_qc(sobj)
|
|
329
|
+
}
|
|
290
330
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
sobj = perform_cell_qc(sobj)
|
|
331
|
+
cached$data = list(sobj = sobj, cell_qc_df = cell_qc_df)
|
|
332
|
+
save_to_cache(cached, "CellQC", cache_dir)
|
|
294
333
|
}
|
|
295
334
|
|
|
296
335
|
# plot and report the QC
|
|
297
336
|
log_info("Plotting and reporting QC ...")
|
|
298
337
|
dim_df = report_cell_qc(nrow(sobj))
|
|
299
338
|
|
|
300
|
-
log_info("Filtering genes ...")
|
|
301
339
|
if (is.list(envs$gene_qc)) {
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
340
|
+
cached <- get_cached(
|
|
341
|
+
list(
|
|
342
|
+
cell_qc = envs$cell_qc,
|
|
343
|
+
gene_qc = envs$gene_qc,
|
|
344
|
+
cell_qc_per_sample = envs$cell_qc_per_sample,
|
|
345
|
+
use_sct = envs$use_sct
|
|
346
|
+
),
|
|
347
|
+
"GeneQC",
|
|
348
|
+
cache_dir
|
|
349
|
+
)
|
|
350
|
+
if (!is.null(cached$data)) {
|
|
351
|
+
log_info("Loading gene-QC'ed object from cache ...")
|
|
352
|
+
sobj <- cached$data
|
|
353
|
+
cached$data <- NULL
|
|
354
|
+
rm(cached)
|
|
355
|
+
gc()
|
|
356
|
+
} else {
|
|
357
|
+
log_info("Filtering genes ...")
|
|
358
|
+
genes <- rownames(sobj)
|
|
359
|
+
filtered <- FALSE
|
|
360
|
+
if (!is.null(envs$gene_qc$min_cells) && envs$gene_qc$min_cells > 0) {
|
|
361
|
+
genes = genes[Matrix::rowSums(sobj) >= envs$gene_qc$min_cells]
|
|
362
|
+
filtered <- TRUE
|
|
312
363
|
}
|
|
313
|
-
|
|
314
|
-
|
|
364
|
+
excludes <- envs$gene_qc$excludes
|
|
365
|
+
if (!is.null(excludes)) {
|
|
366
|
+
if (length(excludes) == 1) {
|
|
367
|
+
excludes <- trimws(unlist(strsplit(excludes, ",")))
|
|
368
|
+
}
|
|
369
|
+
for (ex in excludes) {
|
|
370
|
+
genes <- genes[!grepl(ex, genes)]
|
|
371
|
+
}
|
|
372
|
+
filtered <- TRUE
|
|
315
373
|
}
|
|
316
|
-
filtered
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
374
|
+
if (filtered) {
|
|
375
|
+
sobj = subset(sobj, features = genes)
|
|
376
|
+
}
|
|
377
|
+
cached$data <- sobj
|
|
378
|
+
save_to_cache(cached, "GeneQC", cache_dir)
|
|
320
379
|
}
|
|
321
380
|
}
|
|
322
381
|
dim_df = rbind(
|
|
@@ -350,96 +409,167 @@ add_report(
|
|
|
350
409
|
paste(capture.output(str(args)), collapse = ", ")
|
|
351
410
|
}
|
|
352
411
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
# Default is to use the SCT assay
|
|
412
|
+
envs_cache <- envs
|
|
413
|
+
envs_cache$ncores <- NULL
|
|
414
|
+
envs_cache$DoubletFinder <- NULL
|
|
415
|
+
envs_cache$IntegrateLayers <- NULL
|
|
416
|
+
cached <- get_cached(envs_cache, "Transformed", cache_dir)
|
|
417
|
+
if (!is.null(cached$data)) {
|
|
418
|
+
log_info("Loading transformed object from cache ...")
|
|
419
|
+
sobj <- cached$data
|
|
420
|
+
cached$data <- NULL
|
|
421
|
+
rm(cached)
|
|
422
|
+
gc()
|
|
365
423
|
} else {
|
|
366
|
-
log_info("
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
424
|
+
log_info("Performing transformation/scaling ...")
|
|
425
|
+
# Not joined yet
|
|
426
|
+
# sobj[["RNA"]] <- split(sobj[["RNA"]], f = sobj$Sample)
|
|
427
|
+
if (envs$use_sct) {
|
|
428
|
+
log_info("- Running SCTransform ...")
|
|
429
|
+
SCTransformArgs <- envs$SCTransform
|
|
430
|
+
# log to stdout but don't populate it to running log
|
|
431
|
+
print(paste0(" SCTransform: ", .formatArgs(SCTransformArgs)))
|
|
432
|
+
log_debug(" SCTransform: {.formatArgs(SCTransformArgs)}")
|
|
433
|
+
SCTransformArgs$object <- sobj
|
|
434
|
+
sobj <- do_call(SCTransform, SCTransformArgs)
|
|
435
|
+
# Default is to use the SCT assay
|
|
436
|
+
|
|
437
|
+
# Cleanup memory
|
|
438
|
+
SCTransformArgs$object <- NULL
|
|
439
|
+
rm(SCTransformArgs)
|
|
440
|
+
gc()
|
|
441
|
+
} else {
|
|
442
|
+
log_info("- Running NormalizeData ...")
|
|
443
|
+
NormalizeDataArgs <- envs$NormalizeData
|
|
444
|
+
print(paste0(" NormalizeData: ", .formatArgs(NormalizeDataArgs)))
|
|
445
|
+
log_debug(" NormalizeData: {.formatArgs(NormalizeDataArgs)}")
|
|
446
|
+
NormalizeDataArgs$object <- sobj
|
|
447
|
+
sobj <- do_call(NormalizeData, NormalizeDataArgs)
|
|
448
|
+
|
|
449
|
+
# Cleanup memory
|
|
450
|
+
NormalizeDataArgs$object <- NULL
|
|
451
|
+
rm(NormalizeDataArgs)
|
|
452
|
+
gc()
|
|
453
|
+
|
|
454
|
+
log_info("- Running FindVariableFeatures ...")
|
|
455
|
+
FindVariableFeaturesArgs <- envs$FindVariableFeatures
|
|
456
|
+
print(paste0(" FindVariableFeatures: ", .formatArgs(FindVariableFeaturesArgs)))
|
|
457
|
+
log_debug(" FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
|
|
458
|
+
FindVariableFeaturesArgs$object <- sobj
|
|
459
|
+
sobj <- do_call(FindVariableFeatures, FindVariableFeaturesArgs)
|
|
460
|
+
|
|
461
|
+
# Cleanup memory
|
|
462
|
+
FindVariableFeaturesArgs$object <- NULL
|
|
463
|
+
rm(FindVariableFeaturesArgs)
|
|
464
|
+
gc()
|
|
465
|
+
|
|
466
|
+
log_info("- Running ScaleData ...")
|
|
467
|
+
ScaleDataArgs <- envs$ScaleData
|
|
468
|
+
print(paste0(" ScaleData: ", .formatArgs(ScaleDataArgs)))
|
|
469
|
+
log_debug(" ScaleData: {.formatArgs(ScaleDataArgs)}")
|
|
470
|
+
ScaleDataArgs$object <- sobj
|
|
471
|
+
sobj <- do_call(ScaleData, ScaleDataArgs)
|
|
472
|
+
|
|
473
|
+
# Cleanup memory
|
|
474
|
+
ScaleDataArgs$object <- NULL
|
|
475
|
+
rm(ScaleDataArgs)
|
|
476
|
+
gc()
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
log_info("- Running RunPCA ...")
|
|
480
|
+
RunPCAArgs <- envs$RunPCA
|
|
481
|
+
RunPCAArgs$npcs <- if (is.null(RunPCAArgs$npcs)) { 50 } else { min(RunPCAArgs$npcs, ncol(sobj) - 1) }
|
|
482
|
+
print(paste0(" RunPCA: ", .formatArgs(RunPCAArgs)))
|
|
483
|
+
log_debug(" RunPCA: {.formatArgs(RunPCAArgs)}")
|
|
484
|
+
RunPCAArgs$object <- sobj
|
|
485
|
+
sobj <- do_call(RunPCA, RunPCAArgs)
|
|
486
|
+
|
|
487
|
+
# Cleanup memory
|
|
488
|
+
RunPCAArgs$object <- NULL
|
|
489
|
+
rm(RunPCAArgs)
|
|
490
|
+
gc()
|
|
491
|
+
|
|
492
|
+
cached$data <- sobj
|
|
493
|
+
save_to_cache(cached, "Transformed", cache_dir)
|
|
386
494
|
}
|
|
387
495
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
log_info("
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
496
|
+
envs_cache <- envs
|
|
497
|
+
envs_cache$ncores <- NULL
|
|
498
|
+
envs_cache$DoubletFinder <- NULL
|
|
499
|
+
cached <- get_cached(envs_cache, "Integrated", cache_dir)
|
|
500
|
+
|
|
501
|
+
if (!is.null(cached$data)) {
|
|
502
|
+
log_info("Loading integrated/layer-joined object from cache ...")
|
|
503
|
+
sobj <- cached$data
|
|
504
|
+
cached$data <- NULL
|
|
505
|
+
rm(cached)
|
|
506
|
+
gc()
|
|
507
|
+
|
|
508
|
+
} else {
|
|
509
|
+
|
|
510
|
+
if (!envs$no_integration) {
|
|
511
|
+
log_info("- Running IntegrateLayers (method = {envs$IntegrateLayers$method}) ...")
|
|
512
|
+
IntegrateLayersArgs <- envs$IntegrateLayers
|
|
513
|
+
method <- IntegrateLayersArgs$method
|
|
514
|
+
if (!is.null(IntegrateLayersArgs$reference) && is.character(IntegrateLayersArgs$reference)) {
|
|
515
|
+
log_info(" Using reference samples: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
|
|
516
|
+
IntegrateLayersArgs$reference <- match(IntegrateLayersArgs$reference, samples)
|
|
517
|
+
log_info(" Transferred to indices: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
|
|
518
|
+
}
|
|
519
|
+
if (method %in% c("CCA", "cca")) { method <- "CCAIntegration" } else
|
|
520
|
+
if (method %in% c("RPCA", "rpca")) { method <- "RPCAIntegration" } else
|
|
521
|
+
if (method %in% c("Harmony", "harmony")) { method <- "HarmonyIntegration" } else
|
|
522
|
+
if (method %in% c("FastMNN", "fastmnn")) { method <- "FastMNNIntegration" } else
|
|
523
|
+
if (method %in% c("scVI", "scvi")) { method <- "scVIIntegration" } else
|
|
524
|
+
{ stop(paste0("Unknown integration method: ", method)) }
|
|
525
|
+
if (envs$use_sct && is.null(IntegrateLayersArgs$normalization.method)) {
|
|
526
|
+
IntegrateLayersArgs$normalization.method <- "SCT"
|
|
527
|
+
}
|
|
528
|
+
IntegrateLayersArgs$method <- eval(parse(text = method))
|
|
529
|
+
new_reductions <- list(
|
|
530
|
+
"CCAIntegration" = "integrated.cca",
|
|
531
|
+
"RPCAIntegration" = "integrated.rpca",
|
|
532
|
+
"HarmonyIntegration" = "harmony",
|
|
533
|
+
"FastMNNIntegration" = "integration.mnn",
|
|
534
|
+
"scVIIntegration" = "integrated.scvi"
|
|
535
|
+
)
|
|
536
|
+
if (is.null(IntegrateLayersArgs$new.reduction)) {
|
|
537
|
+
IntegrateLayersArgs$new.reduction <- new_reductions[[method]]
|
|
538
|
+
}
|
|
539
|
+
print(paste0(" IntegrateLayers: ", .formatArgs(IntegrateLayersArgs)))
|
|
540
|
+
log_debug(" IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
|
|
541
|
+
IntegrateLayersArgs$object <- sobj
|
|
542
|
+
sobj <- do_call(IntegrateLayers, IntegrateLayersArgs)
|
|
543
|
+
# Save it for dimension reduction plots
|
|
544
|
+
sobj@misc$integrated_new_reduction <- IntegrateLayersArgs$new.reduction
|
|
545
|
+
|
|
546
|
+
# Cleanup memory
|
|
547
|
+
IntegrateLayersArgs$object <- NULL
|
|
548
|
+
rm(IntegrateLayersArgs)
|
|
549
|
+
gc()
|
|
413
550
|
}
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
"
|
|
417
|
-
|
|
418
|
-
"HarmonyIntegration" = "harmony",
|
|
419
|
-
"FastMNNIntegration" = "integration.mnn",
|
|
420
|
-
"scVIIntegration" = "integrated.scvi"
|
|
421
|
-
)
|
|
422
|
-
if (is.null(IntegrateLayersArgs$new.reduction)) {
|
|
423
|
-
IntegrateLayersArgs$new.reduction <- new_reductions[[method]]
|
|
551
|
+
|
|
552
|
+
if (!envs$use_sct) {
|
|
553
|
+
log_info("- Joining layers ...")
|
|
554
|
+
sobj <- JoinLayers(sobj)
|
|
424
555
|
}
|
|
425
|
-
print(paste0(" IntegrateLayers: ", .formatArgs(IntegrateLayersArgs)))
|
|
426
|
-
log_debug(" IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
|
|
427
|
-
IntegrateLayersArgs$object <- sobj
|
|
428
|
-
sobj <- do_call(IntegrateLayers, IntegrateLayersArgs)
|
|
429
|
-
# Save it for dimension reduction plots
|
|
430
|
-
sobj@misc$integrated_new_reduction <- IntegrateLayersArgs$new.reduction
|
|
431
|
-
}
|
|
432
556
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
sobj <- JoinLayers(sobj)
|
|
557
|
+
cached$data <- sobj
|
|
558
|
+
save_to_cache(cached, "Integrated", cache_dir)
|
|
436
559
|
}
|
|
437
560
|
|
|
561
|
+
|
|
562
|
+
# This is the last step, doesn't need to be cached
|
|
438
563
|
if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletFinder$PCs > 0) {
|
|
439
564
|
library(DoubletFinder)
|
|
440
565
|
|
|
441
566
|
log_info("Running DoubletFinder ...")
|
|
442
567
|
log_info("- Preparing Seurat object ...")
|
|
568
|
+
|
|
569
|
+
if (is.null(envs$DoubletFinder$ncores)) {
|
|
570
|
+
envs$DoubletFinder$ncores <- envs$ncores
|
|
571
|
+
}
|
|
572
|
+
|
|
443
573
|
# More controls from envs?
|
|
444
574
|
sobj <- FindNeighbors(sobj, dims = 1:envs$DoubletFinder$PCs)
|
|
445
575
|
sobj <- FindClusters(sobj)
|
|
@@ -449,7 +579,7 @@ if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletF
|
|
|
449
579
|
sobj,
|
|
450
580
|
PCs = 1:envs$DoubletFinder$PCs,
|
|
451
581
|
sct = envs$use_sct,
|
|
452
|
-
num.cores = envs$ncores
|
|
582
|
+
num.cores = envs$DoubletFinder$ncores
|
|
453
583
|
)
|
|
454
584
|
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
|
|
455
585
|
bcmvn <- find.pK(sweep.stats)
|
|
@@ -546,7 +676,7 @@ if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletF
|
|
|
546
676
|
)
|
|
547
677
|
}
|
|
548
678
|
|
|
549
|
-
log_info("Saving
|
|
679
|
+
log_info("Saving QC'ed seurat object ...")
|
|
550
680
|
saveRDS(sobj, rdsfile)
|
|
551
681
|
|
|
552
682
|
save_report(joboutdir)
|
|
@@ -139,8 +139,11 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
|
139
139
|
if (any(unlist(lapply(x, class)) == "try-error")) {
|
|
140
140
|
stop("mclapply error")
|
|
141
141
|
}
|
|
142
|
-
|
|
143
|
-
|
|
142
|
+
for (r in x) {
|
|
143
|
+
if (!is.null(r)) {
|
|
144
|
+
do.call(add_report, r)
|
|
145
|
+
}
|
|
146
|
+
}
|
|
144
147
|
}
|
|
145
148
|
|
|
146
149
|
do_one_subset_col <- function(subset_col, subset_prefix) {
|
|
@@ -107,7 +107,7 @@ engine_params$snps = snps
|
|
|
107
107
|
engine_params$gene = gene
|
|
108
108
|
engine_params$cvrt = cvrt
|
|
109
109
|
engine_params$output_file_name = if(trans_enabled) alleqtl else NULL
|
|
110
|
-
engine_params$pvOutputThreshold = if(trans_enabled) transp else 0
|
|
110
|
+
engine_params$pvOutputThreshold = if(trans_enabled) min(transp, 1) else 0
|
|
111
111
|
engine_params$useModel = model
|
|
112
112
|
engine_params$errorCovariance = numeric()
|
|
113
113
|
engine_params$verbose = TRUE
|
|
@@ -180,7 +180,7 @@ if (cis_enabled) {
|
|
|
180
180
|
|
|
181
181
|
log_info("Running MatrixEQTL with cis-eQTLs enabled ...")
|
|
182
182
|
engine_params$output_file_name.cis = outfile
|
|
183
|
-
engine_params$pvOutputThreshold.cis = pval
|
|
183
|
+
engine_params$pvOutputThreshold.cis = min(pval, 1)
|
|
184
184
|
engine_params$cisDist = dist
|
|
185
185
|
engine_params$snpspos = snppos_data
|
|
186
186
|
engine_params$genepos = genepos_data
|
biopipen/scripts/snp/PlinkIBD.R
CHANGED
|
@@ -34,6 +34,7 @@ cmd <- c(
|
|
|
34
34
|
"--threads", ncores,
|
|
35
35
|
"--bfile", input,
|
|
36
36
|
"--indep-pairwise", indep,
|
|
37
|
+
"--keep-allele-order",
|
|
37
38
|
# One should be mindful of running this with < 50 samples
|
|
38
39
|
# "--bad-ld",
|
|
39
40
|
"--out", output
|
|
@@ -49,6 +50,7 @@ cmd <- c(
|
|
|
49
50
|
"--threads", ncores,
|
|
50
51
|
"--bfile", input,
|
|
51
52
|
"--extract", prunein,
|
|
53
|
+
"--keep-allele-order",
|
|
52
54
|
"--genome",
|
|
53
55
|
"--out", output
|
|
54
56
|
)
|
|
@@ -122,6 +124,7 @@ cmd <- c(
|
|
|
122
124
|
"--threads", ncores,
|
|
123
125
|
"--bfile", input,
|
|
124
126
|
"--remove", ibd_fail_file,
|
|
127
|
+
"--keep-allele-order",
|
|
125
128
|
"--make-bed",
|
|
126
129
|
"--out", output
|
|
127
130
|
)
|