biopipen 0.29.0__py3-none-any.whl → 0.29.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/plot.py +66 -8
- biopipen/ns/{regulation.py → regulatory.py} +3 -3
- biopipen/ns/scrna.py +16 -2
- biopipen/ns/stats.py +93 -1
- biopipen/scripts/delim/SampleInfo.R +10 -5
- biopipen/scripts/plot/Manhattan.R +6 -0
- biopipen/scripts/plot/QQPlot.R +100 -16
- biopipen/scripts/{regulation → regulatory}/MotifAffinityTest.R +3 -3
- biopipen/scripts/{regulation → regulatory}/MotifScan.py +1 -1
- biopipen/scripts/scrna/MarkersFinder.R +28 -18
- biopipen/scripts/scrna/SeuratClustering.R +8 -0
- biopipen/scripts/scrna/SeuratPreparing.R +252 -122
- biopipen/scripts/snp/MatrixEQTL.R +2 -2
- biopipen/scripts/snp/PlinkIBD.R +3 -0
- biopipen/scripts/stats/Mediation.R +94 -0
- {biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/METADATA +1 -1
- {biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/RECORD +24 -23
- {biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/entry_points.txt +1 -1
- /biopipen/scripts/{regulation → regulatory}/MotifAffinityTest_AtSNP.R +0 -0
- /biopipen/scripts/{regulation → regulatory}/MotifAffinityTest_MotifBreakR.R +0 -0
- /biopipen/scripts/{regulation → regulatory}/atSNP.R +0 -0
- /biopipen/scripts/{regulation → regulatory}/motifBreakR.R +0 -0
- {biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/WHEEL +0 -0
|
@@ -1,19 +1,27 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/caching.R")
|
|
2
3
|
|
|
3
4
|
library(Seurat)
|
|
4
5
|
library(future)
|
|
5
6
|
library(bracer)
|
|
6
7
|
library(ggplot2)
|
|
7
8
|
library(dplyr)
|
|
8
|
-
library(tidyseurat)
|
|
9
|
+
# library(tidyseurat)
|
|
9
10
|
|
|
10
|
-
metafile
|
|
11
|
-
rdsfile
|
|
12
|
-
joboutdir
|
|
13
|
-
envs
|
|
11
|
+
metafile <- {{in.metafile | quote}}
|
|
12
|
+
rdsfile <- {{out.rdsfile | quote}}
|
|
13
|
+
joboutdir <- {{job.outdir | quote}}
|
|
14
|
+
envs <- {{envs | r: todot = "-", skip = 1}}
|
|
15
|
+
|
|
16
|
+
if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
|
|
17
|
+
if (length(envs$cache) > 1) {
|
|
18
|
+
log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
|
|
19
|
+
envs$cache <- envs$cache[1]
|
|
20
|
+
}
|
|
14
21
|
|
|
15
22
|
set.seed(8525)
|
|
16
|
-
|
|
23
|
+
# 8TB
|
|
24
|
+
options(future.globals.maxSize = 8 * 1024 ^ 4)
|
|
17
25
|
options(future.rng.onMisuse="ignore")
|
|
18
26
|
options(Seurat.object.assay.version = "v5")
|
|
19
27
|
plan(strategy = "multicore", workers = envs$ncores)
|
|
@@ -34,7 +42,7 @@ add_report(
|
|
|
34
42
|
h1 = "Filters and QC"
|
|
35
43
|
)
|
|
36
44
|
|
|
37
|
-
metadata
|
|
45
|
+
metadata <- read.table(
|
|
38
46
|
metafile,
|
|
39
47
|
header = TRUE,
|
|
40
48
|
row.names = NULL,
|
|
@@ -42,6 +50,16 @@ metadata = read.table(
|
|
|
42
50
|
check.names = FALSE
|
|
43
51
|
)
|
|
44
52
|
|
|
53
|
+
cache_sig <- capture.output(str(metadata))
|
|
54
|
+
dig_sig <- digest::digest(cache_sig, algo = "md5")
|
|
55
|
+
dig_sig <- substr(dig_sig, 1, 8)
|
|
56
|
+
cache_dir <- NULL
|
|
57
|
+
if (is.character(envs$cache)) {
|
|
58
|
+
cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seuratpreparing_cache"))
|
|
59
|
+
dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
|
|
60
|
+
writeLines(cache_sig, file.path(cache_dir, "signature.txt"))
|
|
61
|
+
}
|
|
62
|
+
|
|
45
63
|
meta_cols = colnames(metadata)
|
|
46
64
|
if (!"Sample" %in% meta_cols) {
|
|
47
65
|
stop("Error: Column `Sample` is not found in metafile.")
|
|
@@ -90,21 +108,21 @@ rename_files = function(e, sample, path) {
|
|
|
90
108
|
|
|
91
109
|
|
|
92
110
|
perform_cell_qc <- function(sobj, per_sample = FALSE) {
|
|
93
|
-
log_prefix
|
|
111
|
+
log_prefix <- ifelse(per_sample, " ", "- ")
|
|
94
112
|
log_info("{log_prefix}Adding metadata for QC ...")
|
|
95
|
-
sobj$percent.mt
|
|
96
|
-
sobj$percent.ribo
|
|
97
|
-
sobj$percent.hb
|
|
98
|
-
sobj$percent.plat
|
|
113
|
+
sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-")
|
|
114
|
+
sobj$percent.ribo <- PercentageFeatureSet(sobj, pattern = "^RP[SL]")
|
|
115
|
+
sobj$percent.hb <- PercentageFeatureSet(sobj, pattern = "^HB[^(P)]")
|
|
116
|
+
sobj$percent.plat <- PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
|
|
99
117
|
|
|
100
118
|
if (is.null(envs$cell_qc) || length(envs$cell_qc) == 0) {
|
|
101
119
|
log_warn("{log_prefix}No cell QC criteria is provided. All cells will be kept.")
|
|
102
|
-
cell_qc
|
|
120
|
+
cell_qc <- "TRUE"
|
|
103
121
|
} else {
|
|
104
|
-
cell_qc
|
|
122
|
+
cell_qc <- envs$cell_qc
|
|
105
123
|
}
|
|
106
124
|
|
|
107
|
-
sobj
|
|
125
|
+
sobj@meta.data <- sobj@meta.data %>% mutate(.QC = !!rlang::parse_expr(cell_qc))
|
|
108
126
|
|
|
109
127
|
if (is.null(cell_qc_df)) {
|
|
110
128
|
cell_qc_df <<- sobj@meta.data[, c("Sample", ".QC", feats), drop = FALSE]
|
|
@@ -114,8 +132,8 @@ perform_cell_qc <- function(sobj, per_sample = FALSE) {
|
|
|
114
132
|
|
|
115
133
|
# Do the filtering
|
|
116
134
|
log_info("{log_prefix}Filtering cells using QC criteria ...")
|
|
117
|
-
sobj
|
|
118
|
-
sobj$.QC
|
|
135
|
+
sobj <- subset(sobj, subset = .QC)
|
|
136
|
+
sobj$.QC <- NULL
|
|
119
137
|
|
|
120
138
|
return(sobj)
|
|
121
139
|
}
|
|
@@ -281,42 +299,83 @@ load_sample = function(sample) {
|
|
|
281
299
|
obj
|
|
282
300
|
}
|
|
283
301
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
302
|
+
cached <- get_cached(
|
|
303
|
+
list(cell_qc = envs$cell_qc, cell_qc_per_sample = envs$cell_qc_per_sample, use_sct = envs$use_sct),
|
|
304
|
+
"CellQC",
|
|
305
|
+
cache_dir
|
|
306
|
+
)
|
|
307
|
+
if (!is.null(cached$data)) {
|
|
308
|
+
log_info("Loading cell-QC'ed object from cache ...")
|
|
309
|
+
sobj <- cached$data$sobj
|
|
310
|
+
cell_qc_df <- cached$data$cell_qc_df
|
|
311
|
+
cached$data$sobj <- NULL
|
|
312
|
+
cached$data$cell_qc_df <- NULL
|
|
313
|
+
cached$data <- NULL
|
|
314
|
+
rm(cached)
|
|
315
|
+
gc()
|
|
316
|
+
} else {
|
|
317
|
+
# Load data
|
|
318
|
+
log_info("Reading samples individually ...")
|
|
319
|
+
obj_list = lapply(samples, load_sample)
|
|
320
|
+
|
|
321
|
+
log_info("Merging samples ...")
|
|
322
|
+
sobj = Reduce(merge, obj_list)
|
|
323
|
+
rm(obj_list)
|
|
324
|
+
gc()
|
|
325
|
+
|
|
326
|
+
if (!envs$cell_qc_per_sample) {
|
|
327
|
+
log_info("Performing cell QC ...")
|
|
328
|
+
sobj = perform_cell_qc(sobj)
|
|
329
|
+
}
|
|
290
330
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
sobj = perform_cell_qc(sobj)
|
|
331
|
+
cached$data = list(sobj = sobj, cell_qc_df = cell_qc_df)
|
|
332
|
+
save_to_cache(cached, "CellQC", cache_dir)
|
|
294
333
|
}
|
|
295
334
|
|
|
296
335
|
# plot and report the QC
|
|
297
336
|
log_info("Plotting and reporting QC ...")
|
|
298
337
|
dim_df = report_cell_qc(nrow(sobj))
|
|
299
338
|
|
|
300
|
-
log_info("Filtering genes ...")
|
|
301
339
|
if (is.list(envs$gene_qc)) {
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
340
|
+
cached <- get_cached(
|
|
341
|
+
list(
|
|
342
|
+
cell_qc = envs$cell_qc,
|
|
343
|
+
gene_qc = envs$gene_qc,
|
|
344
|
+
cell_qc_per_sample = envs$cell_qc_per_sample,
|
|
345
|
+
use_sct = envs$use_sct
|
|
346
|
+
),
|
|
347
|
+
"GeneQC",
|
|
348
|
+
cache_dir
|
|
349
|
+
)
|
|
350
|
+
if (!is.null(cached$data)) {
|
|
351
|
+
log_info("Loading gene-QC'ed object from cache ...")
|
|
352
|
+
sobj <- cached$data
|
|
353
|
+
cached$data <- NULL
|
|
354
|
+
rm(cached)
|
|
355
|
+
gc()
|
|
356
|
+
} else {
|
|
357
|
+
log_info("Filtering genes ...")
|
|
358
|
+
genes <- rownames(sobj)
|
|
359
|
+
filtered <- FALSE
|
|
360
|
+
if (!is.null(envs$gene_qc$min_cells) && envs$gene_qc$min_cells > 0) {
|
|
361
|
+
genes = genes[Matrix::rowSums(sobj) >= envs$gene_qc$min_cells]
|
|
362
|
+
filtered <- TRUE
|
|
312
363
|
}
|
|
313
|
-
|
|
314
|
-
|
|
364
|
+
excludes <- envs$gene_qc$excludes
|
|
365
|
+
if (!is.null(excludes)) {
|
|
366
|
+
if (length(excludes) == 1) {
|
|
367
|
+
excludes <- trimws(unlist(strsplit(excludes, ",")))
|
|
368
|
+
}
|
|
369
|
+
for (ex in excludes) {
|
|
370
|
+
genes <- genes[!grepl(ex, genes)]
|
|
371
|
+
}
|
|
372
|
+
filtered <- TRUE
|
|
315
373
|
}
|
|
316
|
-
filtered
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
374
|
+
if (filtered) {
|
|
375
|
+
sobj = subset(sobj, features = genes)
|
|
376
|
+
}
|
|
377
|
+
cached$data <- sobj
|
|
378
|
+
save_to_cache(cached, "GeneQC", cache_dir)
|
|
320
379
|
}
|
|
321
380
|
}
|
|
322
381
|
dim_df = rbind(
|
|
@@ -350,96 +409,167 @@ add_report(
|
|
|
350
409
|
paste(capture.output(str(args)), collapse = ", ")
|
|
351
410
|
}
|
|
352
411
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
# Default is to use the SCT assay
|
|
412
|
+
envs_cache <- envs
|
|
413
|
+
envs_cache$ncores <- NULL
|
|
414
|
+
envs_cache$DoubletFinder <- NULL
|
|
415
|
+
envs_cache$IntegrateLayers <- NULL
|
|
416
|
+
cached <- get_cached(envs_cache, "Transformed", cache_dir)
|
|
417
|
+
if (!is.null(cached$data)) {
|
|
418
|
+
log_info("Loading transformed object from cache ...")
|
|
419
|
+
sobj <- cached$data
|
|
420
|
+
cached$data <- NULL
|
|
421
|
+
rm(cached)
|
|
422
|
+
gc()
|
|
365
423
|
} else {
|
|
366
|
-
log_info("
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
424
|
+
log_info("Performing transformation/scaling ...")
|
|
425
|
+
# Not joined yet
|
|
426
|
+
# sobj[["RNA"]] <- split(sobj[["RNA"]], f = sobj$Sample)
|
|
427
|
+
if (envs$use_sct) {
|
|
428
|
+
log_info("- Running SCTransform ...")
|
|
429
|
+
SCTransformArgs <- envs$SCTransform
|
|
430
|
+
# log to stdout but don't populate it to running log
|
|
431
|
+
print(paste0(" SCTransform: ", .formatArgs(SCTransformArgs)))
|
|
432
|
+
log_debug(" SCTransform: {.formatArgs(SCTransformArgs)}")
|
|
433
|
+
SCTransformArgs$object <- sobj
|
|
434
|
+
sobj <- do_call(SCTransform, SCTransformArgs)
|
|
435
|
+
# Default is to use the SCT assay
|
|
436
|
+
|
|
437
|
+
# Cleanup memory
|
|
438
|
+
SCTransformArgs$object <- NULL
|
|
439
|
+
rm(SCTransformArgs)
|
|
440
|
+
gc()
|
|
441
|
+
} else {
|
|
442
|
+
log_info("- Running NormalizeData ...")
|
|
443
|
+
NormalizeDataArgs <- envs$NormalizeData
|
|
444
|
+
print(paste0(" NormalizeData: ", .formatArgs(NormalizeDataArgs)))
|
|
445
|
+
log_debug(" NormalizeData: {.formatArgs(NormalizeDataArgs)}")
|
|
446
|
+
NormalizeDataArgs$object <- sobj
|
|
447
|
+
sobj <- do_call(NormalizeData, NormalizeDataArgs)
|
|
448
|
+
|
|
449
|
+
# Cleanup memory
|
|
450
|
+
NormalizeDataArgs$object <- NULL
|
|
451
|
+
rm(NormalizeDataArgs)
|
|
452
|
+
gc()
|
|
453
|
+
|
|
454
|
+
log_info("- Running FindVariableFeatures ...")
|
|
455
|
+
FindVariableFeaturesArgs <- envs$FindVariableFeatures
|
|
456
|
+
print(paste0(" FindVariableFeatures: ", .formatArgs(FindVariableFeaturesArgs)))
|
|
457
|
+
log_debug(" FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
|
|
458
|
+
FindVariableFeaturesArgs$object <- sobj
|
|
459
|
+
sobj <- do_call(FindVariableFeatures, FindVariableFeaturesArgs)
|
|
460
|
+
|
|
461
|
+
# Cleanup memory
|
|
462
|
+
FindVariableFeaturesArgs$object <- NULL
|
|
463
|
+
rm(FindVariableFeaturesArgs)
|
|
464
|
+
gc()
|
|
465
|
+
|
|
466
|
+
log_info("- Running ScaleData ...")
|
|
467
|
+
ScaleDataArgs <- envs$ScaleData
|
|
468
|
+
print(paste0(" ScaleData: ", .formatArgs(ScaleDataArgs)))
|
|
469
|
+
log_debug(" ScaleData: {.formatArgs(ScaleDataArgs)}")
|
|
470
|
+
ScaleDataArgs$object <- sobj
|
|
471
|
+
sobj <- do_call(ScaleData, ScaleDataArgs)
|
|
472
|
+
|
|
473
|
+
# Cleanup memory
|
|
474
|
+
ScaleDataArgs$object <- NULL
|
|
475
|
+
rm(ScaleDataArgs)
|
|
476
|
+
gc()
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
log_info("- Running RunPCA ...")
|
|
480
|
+
RunPCAArgs <- envs$RunPCA
|
|
481
|
+
RunPCAArgs$npcs <- if (is.null(RunPCAArgs$npcs)) { 50 } else { min(RunPCAArgs$npcs, ncol(sobj) - 1) }
|
|
482
|
+
print(paste0(" RunPCA: ", .formatArgs(RunPCAArgs)))
|
|
483
|
+
log_debug(" RunPCA: {.formatArgs(RunPCAArgs)}")
|
|
484
|
+
RunPCAArgs$object <- sobj
|
|
485
|
+
sobj <- do_call(RunPCA, RunPCAArgs)
|
|
486
|
+
|
|
487
|
+
# Cleanup memory
|
|
488
|
+
RunPCAArgs$object <- NULL
|
|
489
|
+
rm(RunPCAArgs)
|
|
490
|
+
gc()
|
|
491
|
+
|
|
492
|
+
cached$data <- sobj
|
|
493
|
+
save_to_cache(cached, "Transformed", cache_dir)
|
|
386
494
|
}
|
|
387
495
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
log_info("
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
496
|
+
envs_cache <- envs
|
|
497
|
+
envs_cache$ncores <- NULL
|
|
498
|
+
envs_cache$DoubletFinder <- NULL
|
|
499
|
+
cached <- get_cached(envs_cache, "Integrated", cache_dir)
|
|
500
|
+
|
|
501
|
+
if (!is.null(cached$data)) {
|
|
502
|
+
log_info("Loading integrated/layer-joined object from cache ...")
|
|
503
|
+
sobj <- cached$data
|
|
504
|
+
cached$data <- NULL
|
|
505
|
+
rm(cached)
|
|
506
|
+
gc()
|
|
507
|
+
|
|
508
|
+
} else {
|
|
509
|
+
|
|
510
|
+
if (!envs$no_integration) {
|
|
511
|
+
log_info("- Running IntegrateLayers (method = {envs$IntegrateLayers$method}) ...")
|
|
512
|
+
IntegrateLayersArgs <- envs$IntegrateLayers
|
|
513
|
+
method <- IntegrateLayersArgs$method
|
|
514
|
+
if (!is.null(IntegrateLayersArgs$reference) && is.character(IntegrateLayersArgs$reference)) {
|
|
515
|
+
log_info(" Using reference samples: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
|
|
516
|
+
IntegrateLayersArgs$reference <- match(IntegrateLayersArgs$reference, samples)
|
|
517
|
+
log_info(" Transferred to indices: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
|
|
518
|
+
}
|
|
519
|
+
if (method %in% c("CCA", "cca")) { method <- "CCAIntegration" } else
|
|
520
|
+
if (method %in% c("RPCA", "rpca")) { method <- "RPCAIntegration" } else
|
|
521
|
+
if (method %in% c("Harmony", "harmony")) { method <- "HarmonyIntegration" } else
|
|
522
|
+
if (method %in% c("FastMNN", "fastmnn")) { method <- "FastMNNIntegration" } else
|
|
523
|
+
if (method %in% c("scVI", "scvi")) { method <- "scVIIntegration" } else
|
|
524
|
+
{ stop(paste0("Unknown integration method: ", method)) }
|
|
525
|
+
if (envs$use_sct && is.null(IntegrateLayersArgs$normalization.method)) {
|
|
526
|
+
IntegrateLayersArgs$normalization.method <- "SCT"
|
|
527
|
+
}
|
|
528
|
+
IntegrateLayersArgs$method <- eval(parse(text = method))
|
|
529
|
+
new_reductions <- list(
|
|
530
|
+
"CCAIntegration" = "integrated.cca",
|
|
531
|
+
"RPCAIntegration" = "integrated.rpca",
|
|
532
|
+
"HarmonyIntegration" = "harmony",
|
|
533
|
+
"FastMNNIntegration" = "integration.mnn",
|
|
534
|
+
"scVIIntegration" = "integrated.scvi"
|
|
535
|
+
)
|
|
536
|
+
if (is.null(IntegrateLayersArgs$new.reduction)) {
|
|
537
|
+
IntegrateLayersArgs$new.reduction <- new_reductions[[method]]
|
|
538
|
+
}
|
|
539
|
+
print(paste0(" IntegrateLayers: ", .formatArgs(IntegrateLayersArgs)))
|
|
540
|
+
log_debug(" IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
|
|
541
|
+
IntegrateLayersArgs$object <- sobj
|
|
542
|
+
sobj <- do_call(IntegrateLayers, IntegrateLayersArgs)
|
|
543
|
+
# Save it for dimension reduction plots
|
|
544
|
+
sobj@misc$integrated_new_reduction <- IntegrateLayersArgs$new.reduction
|
|
545
|
+
|
|
546
|
+
# Cleanup memory
|
|
547
|
+
IntegrateLayersArgs$object <- NULL
|
|
548
|
+
rm(IntegrateLayersArgs)
|
|
549
|
+
gc()
|
|
413
550
|
}
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
"
|
|
417
|
-
|
|
418
|
-
"HarmonyIntegration" = "harmony",
|
|
419
|
-
"FastMNNIntegration" = "integration.mnn",
|
|
420
|
-
"scVIIntegration" = "integrated.scvi"
|
|
421
|
-
)
|
|
422
|
-
if (is.null(IntegrateLayersArgs$new.reduction)) {
|
|
423
|
-
IntegrateLayersArgs$new.reduction <- new_reductions[[method]]
|
|
551
|
+
|
|
552
|
+
if (!envs$use_sct) {
|
|
553
|
+
log_info("- Joining layers ...")
|
|
554
|
+
sobj <- JoinLayers(sobj)
|
|
424
555
|
}
|
|
425
|
-
print(paste0(" IntegrateLayers: ", .formatArgs(IntegrateLayersArgs)))
|
|
426
|
-
log_debug(" IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
|
|
427
|
-
IntegrateLayersArgs$object <- sobj
|
|
428
|
-
sobj <- do_call(IntegrateLayers, IntegrateLayersArgs)
|
|
429
|
-
# Save it for dimension reduction plots
|
|
430
|
-
sobj@misc$integrated_new_reduction <- IntegrateLayersArgs$new.reduction
|
|
431
|
-
}
|
|
432
556
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
sobj <- JoinLayers(sobj)
|
|
557
|
+
cached$data <- sobj
|
|
558
|
+
save_to_cache(cached, "Integrated", cache_dir)
|
|
436
559
|
}
|
|
437
560
|
|
|
561
|
+
|
|
562
|
+
# This is the last step, doesn't need to be cached
|
|
438
563
|
if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletFinder$PCs > 0) {
|
|
439
564
|
library(DoubletFinder)
|
|
440
565
|
|
|
441
566
|
log_info("Running DoubletFinder ...")
|
|
442
567
|
log_info("- Preparing Seurat object ...")
|
|
568
|
+
|
|
569
|
+
if (is.null(envs$DoubletFinder$ncores)) {
|
|
570
|
+
envs$DoubletFinder$ncores <- envs$ncores
|
|
571
|
+
}
|
|
572
|
+
|
|
443
573
|
# More controls from envs?
|
|
444
574
|
sobj <- FindNeighbors(sobj, dims = 1:envs$DoubletFinder$PCs)
|
|
445
575
|
sobj <- FindClusters(sobj)
|
|
@@ -449,7 +579,7 @@ if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletF
|
|
|
449
579
|
sobj,
|
|
450
580
|
PCs = 1:envs$DoubletFinder$PCs,
|
|
451
581
|
sct = envs$use_sct,
|
|
452
|
-
num.cores = envs$ncores
|
|
582
|
+
num.cores = envs$DoubletFinder$ncores
|
|
453
583
|
)
|
|
454
584
|
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
|
|
455
585
|
bcmvn <- find.pK(sweep.stats)
|
|
@@ -546,7 +676,7 @@ if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletF
|
|
|
546
676
|
)
|
|
547
677
|
}
|
|
548
678
|
|
|
549
|
-
log_info("Saving
|
|
679
|
+
log_info("Saving QC'ed seurat object ...")
|
|
550
680
|
saveRDS(sobj, rdsfile)
|
|
551
681
|
|
|
552
682
|
save_report(joboutdir)
|
|
@@ -107,7 +107,7 @@ engine_params$snps = snps
|
|
|
107
107
|
engine_params$gene = gene
|
|
108
108
|
engine_params$cvrt = cvrt
|
|
109
109
|
engine_params$output_file_name = if(trans_enabled) alleqtl else NULL
|
|
110
|
-
engine_params$pvOutputThreshold = if(trans_enabled) transp else 0
|
|
110
|
+
engine_params$pvOutputThreshold = if(trans_enabled) min(transp, 1) else 0
|
|
111
111
|
engine_params$useModel = model
|
|
112
112
|
engine_params$errorCovariance = numeric()
|
|
113
113
|
engine_params$verbose = TRUE
|
|
@@ -180,7 +180,7 @@ if (cis_enabled) {
|
|
|
180
180
|
|
|
181
181
|
log_info("Running MatrixEQTL with cis-eQTLs enabled ...")
|
|
182
182
|
engine_params$output_file_name.cis = outfile
|
|
183
|
-
engine_params$pvOutputThreshold.cis = pval
|
|
183
|
+
engine_params$pvOutputThreshold.cis = min(pval, 1)
|
|
184
184
|
engine_params$cisDist = dist
|
|
185
185
|
engine_params$snpspos = snppos_data
|
|
186
186
|
engine_params$genepos = genepos_data
|
biopipen/scripts/snp/PlinkIBD.R
CHANGED
|
@@ -34,6 +34,7 @@ cmd <- c(
|
|
|
34
34
|
"--threads", ncores,
|
|
35
35
|
"--bfile", input,
|
|
36
36
|
"--indep-pairwise", indep,
|
|
37
|
+
"--keep-allele-order",
|
|
37
38
|
# One should be mindful of running this with < 50 samples
|
|
38
39
|
# "--bad-ld",
|
|
39
40
|
"--out", output
|
|
@@ -49,6 +50,7 @@ cmd <- c(
|
|
|
49
50
|
"--threads", ncores,
|
|
50
51
|
"--bfile", input,
|
|
51
52
|
"--extract", prunein,
|
|
53
|
+
"--keep-allele-order",
|
|
52
54
|
"--genome",
|
|
53
55
|
"--out", output
|
|
54
56
|
)
|
|
@@ -122,6 +124,7 @@ cmd <- c(
|
|
|
122
124
|
"--threads", ncores,
|
|
123
125
|
"--bfile", input,
|
|
124
126
|
"--remove", ibd_fail_file,
|
|
127
|
+
"--keep-allele-order",
|
|
125
128
|
"--make-bed",
|
|
126
129
|
"--out", output
|
|
127
130
|
)
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
|
|
3
|
+
library(rlang)
|
|
4
|
+
library(parallel)
|
|
5
|
+
library(mediation)
|
|
6
|
+
|
|
7
|
+
infile <- {{in.infile | r}}
|
|
8
|
+
fmlfile <- {{in.fmlfile | r}}
|
|
9
|
+
outfile <- {{out.outfile | r}}
|
|
10
|
+
|
|
11
|
+
ncores <- {{envs.ncores | r}}
|
|
12
|
+
sims <- {{envs.sims | r}}
|
|
13
|
+
args <- {{envs.args | r}}
|
|
14
|
+
padj <- {{envs.padj | r}}
|
|
15
|
+
cases <- {{envs.cases | r}}
|
|
16
|
+
transpose_input <- {{envs.transpose_input | r}}
|
|
17
|
+
|
|
18
|
+
set.seed(123)
|
|
19
|
+
|
|
20
|
+
log_info("Reading input file ...")
|
|
21
|
+
indata <- read.table(infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
|
|
22
|
+
if (transpose_input) { indata <- t(indata) }
|
|
23
|
+
|
|
24
|
+
log_info("Reading formula file/cases ...")
|
|
25
|
+
if (!is.null(fmlfile)) {
|
|
26
|
+
if (!is.null(cases) && length(cases) > 0) {
|
|
27
|
+
log_warn("envs.cases ignored as in.fmlfile is provided")
|
|
28
|
+
}
|
|
29
|
+
fmldata <- read.table(fmlfile, header = TRUE, sep = "\t", row.names = NULL)
|
|
30
|
+
# Case M Y X Cov Model_M Model_Y
|
|
31
|
+
cases <- split(fmldata, fmldata$Case)
|
|
32
|
+
} else if (is.null(cases) || length(cases) == 0) {
|
|
33
|
+
stop("Either envs.cases or in.fmlfile must be provided")
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
args <- args %||% list()
|
|
37
|
+
|
|
38
|
+
medanalysis = function(casename) {
|
|
39
|
+
case <- cases[[casename]]
|
|
40
|
+
log_info("- Case:", casename)
|
|
41
|
+
M <- case$M
|
|
42
|
+
Y <- case$Y
|
|
43
|
+
X <- case$X
|
|
44
|
+
covs <- case$Cov
|
|
45
|
+
modelm <- match.fun(case$Model_M)
|
|
46
|
+
modely <- match.fun(case$Model_Y)
|
|
47
|
+
fmlm <- as.formula(sprintf("%s ~ %s", bQuote(M), bQuote(X)))
|
|
48
|
+
fmly <- as.formula(sprintf("%s ~ %s + %s", bQuote(Y), bQuote(M), bQuote(X)))
|
|
49
|
+
if (!is.null(covs) && length(covs) == 1) {
|
|
50
|
+
covs <- trimws(strsplit(covs, ",")[[1]])
|
|
51
|
+
}
|
|
52
|
+
if (!is.null(covs)) {
|
|
53
|
+
cov_fml <- as.formula(sprintf("~ . + %s", paste(bQuote(covs), collapse = " + ")))
|
|
54
|
+
fmlm <- update.formula(fmlm, cov_fml)
|
|
55
|
+
fmly <- update.formula(fmly, cov_fml)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
margs <- args
|
|
59
|
+
args$sims <- sims
|
|
60
|
+
args$model.m <- modelm(fmlm, data = indata)
|
|
61
|
+
args$model.y <- modely(fmly, data = indata)
|
|
62
|
+
args$treat <- X
|
|
63
|
+
args$mediator <- M
|
|
64
|
+
args$outcome <- Y
|
|
65
|
+
if (!is.null(covs)) {
|
|
66
|
+
args$covariates <- indata[, covs, drop = FALSE]
|
|
67
|
+
}
|
|
68
|
+
med <- do_call(mediate, args)
|
|
69
|
+
if (is.na(med$d1.p) || is.na(med$n1)) {
|
|
70
|
+
NULL
|
|
71
|
+
} else {
|
|
72
|
+
data.frame(
|
|
73
|
+
Case = casename,
|
|
74
|
+
M = M,
|
|
75
|
+
X = X,
|
|
76
|
+
Y = Y,
|
|
77
|
+
ACME = med$d1,
|
|
78
|
+
ACME95CI1 = med$d1.ci[1],
|
|
79
|
+
ACME95CI2 = med$d1.ci[2],
|
|
80
|
+
TotalEffect = med$tau.coef,
|
|
81
|
+
ADE = med$z1,
|
|
82
|
+
PropMediated = med$n1,
|
|
83
|
+
Pval = med$d1.p
|
|
84
|
+
)
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
out <- do_call(rbind, mclapply(names(cases), medanalysis, mc.cores = ncores))
|
|
89
|
+
|
|
90
|
+
if (padj != "none") {
|
|
91
|
+
out$Padj <- p.adjust(out$Pval, method = padj)
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
write.table(out, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)
|