biopipen 0.28.1__py3-none-any.whl → 0.29.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +8 -0
- biopipen/ns/bam.py +0 -2
- biopipen/ns/bed.py +35 -0
- biopipen/ns/cellranger_pipeline.py +5 -5
- biopipen/ns/cnv.py +18 -2
- biopipen/ns/cnvkit_pipeline.py +16 -11
- biopipen/ns/gene.py +68 -23
- biopipen/ns/misc.py +2 -15
- biopipen/ns/plot.py +204 -0
- biopipen/ns/regulatory.py +214 -0
- biopipen/ns/scrna.py +31 -5
- biopipen/ns/snp.py +516 -8
- biopipen/ns/stats.py +167 -3
- biopipen/ns/vcf.py +196 -0
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/scripts/bam/CNVpytor.py +144 -46
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMerge.py +1 -1
- biopipen/scripts/cnv/AneuploidyScore.R +30 -7
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
- biopipen/scripts/cnv/TMADScore.R +21 -5
- biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
- biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
- biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
- biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
- biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
- biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
- biopipen/scripts/delim/SampleInfo.R +10 -5
- biopipen/scripts/gene/GeneNameConversion.R +65 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/plot/Manhattan.R +146 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +226 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +126 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +96 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/atSNP.R +33 -0
- biopipen/scripts/regulatory/motifBreakR.R +1594 -0
- biopipen/scripts/scrna/MarkersFinder.R +69 -67
- biopipen/scripts/scrna/SeuratClustering.R +71 -29
- biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
- biopipen/scripts/scrna/SeuratPreparing.R +252 -122
- biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
- biopipen/scripts/snp/MatrixEQTL.R +85 -44
- biopipen/scripts/snp/Plink2GTMat.py +133 -0
- biopipen/scripts/snp/PlinkCallRate.R +190 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +298 -0
- biopipen/scripts/snp/PlinkFromVcf.py +78 -0
- biopipen/scripts/snp/PlinkHWE.R +80 -0
- biopipen/scripts/snp/PlinkHet.R +92 -0
- biopipen/scripts/snp/PlinkIBD.R +200 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/Mediation.R +94 -0
- biopipen/scripts/stats/MetaPvalue.R +2 -1
- biopipen/scripts/stats/MetaPvalue1.R +70 -0
- biopipen/scripts/tcr/TCRClusterStats.R +12 -7
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/VcfFix_utils.py +1 -1
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/utils/gene.R +83 -37
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.R +56 -0
- biopipen/utils/misc.py +5 -2
- biopipen/utils/reference.py +54 -10
- {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/METADATA +2 -2
- {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/RECORD +80 -51
- {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/entry_points.txt +1 -1
- biopipen/ns/bcftools.py +0 -111
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/WHEEL +0 -0
|
@@ -65,6 +65,19 @@ if (ncores > 1) {
|
|
|
65
65
|
log_info("- Reading Seurat object ...")
|
|
66
66
|
srtobj <- readRDS(srtfile)
|
|
67
67
|
defassay <- DefaultAssay(srtobj)
|
|
68
|
+
if (defassay == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@commands)) {
|
|
69
|
+
log_warn(" SCTransform used but PrepSCTFindMarkers not applied, running ...")
|
|
70
|
+
|
|
71
|
+
srtobj <- PrepSCTFindMarkers(srtobj)
|
|
72
|
+
# compose a new SeuratCommand to record it to srtobj@commands
|
|
73
|
+
scommand <- srtobj@commands$FindClusters
|
|
74
|
+
scommand@name <- "PrepSCTFindMarkers"
|
|
75
|
+
scommand@time.stamp <- Sys.time()
|
|
76
|
+
scommand@assay.used <- "SCT"
|
|
77
|
+
scommand@call.string <- "PrepSCTFindMarkers(object = srtobj)"
|
|
78
|
+
scommand@params <- list()
|
|
79
|
+
srtobj@commands$PrepSCTFindMarkers <- scommand
|
|
80
|
+
}
|
|
68
81
|
|
|
69
82
|
if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
70
83
|
log_info("- Mutating meta data ...")
|
|
@@ -411,45 +424,11 @@ do_case_findall <- function(casename) {
|
|
|
411
424
|
log_info(" Using cached markers ...")
|
|
412
425
|
markers <- cached$data
|
|
413
426
|
} else {
|
|
414
|
-
markers <-
|
|
415
|
-
do_call(FindAllMarkers, args)
|
|
416
|
-
# gene, p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
|
|
417
|
-
}, error = function(e) {
|
|
418
|
-
log_warn(e$message)
|
|
419
|
-
|
|
420
|
-
data.frame(
|
|
421
|
-
gene = character(),
|
|
422
|
-
p_val = numeric(),
|
|
423
|
-
avg_log2FC = numeric(),
|
|
424
|
-
pct.1 = numeric(),
|
|
425
|
-
pct.2 = numeric(),
|
|
426
|
-
p_val_adj=numeric(),
|
|
427
|
-
cluster = character()
|
|
428
|
-
)
|
|
429
|
-
})
|
|
427
|
+
markers <- find_markers(args, find_all = TRUE)
|
|
430
428
|
cached$data <- markers
|
|
431
429
|
save_to_cache(cached, "FindAllMarkers", cache)
|
|
432
430
|
}
|
|
433
431
|
|
|
434
|
-
if (nrow(markers) == 0 && defassay == "SCT") {
|
|
435
|
-
log_warn(" No markers found from SCT assay, try recorrect_umi = FALSE")
|
|
436
|
-
args$recorrect_umi <- FALSE
|
|
437
|
-
markers <- tryCatch({
|
|
438
|
-
do_call(FindAllMarkers, args)
|
|
439
|
-
}, error = function(e) {
|
|
440
|
-
log_warn(e$message)
|
|
441
|
-
data.frame(
|
|
442
|
-
gene = character(),
|
|
443
|
-
p_val = numeric(),
|
|
444
|
-
avg_log2FC = numeric(),
|
|
445
|
-
pct.1 = numeric(),
|
|
446
|
-
pct.2 = numeric(),
|
|
447
|
-
p_val_adj=numeric(),
|
|
448
|
-
cluster = character()
|
|
449
|
-
)
|
|
450
|
-
})
|
|
451
|
-
}
|
|
452
|
-
|
|
453
432
|
if (is.null(case$dotplot$assay)) {
|
|
454
433
|
case$dotplot$assay <- case$assay
|
|
455
434
|
}
|
|
@@ -483,6 +462,60 @@ do_case_findall <- function(casename) {
|
|
|
483
462
|
}
|
|
484
463
|
}
|
|
485
464
|
|
|
465
|
+
find_markers <- function(findmarkers_args, find_all = FALSE) {
|
|
466
|
+
if (find_all) {
|
|
467
|
+
fun <- FindAllMarkers
|
|
468
|
+
empty <- data.frame(
|
|
469
|
+
gene = character(),
|
|
470
|
+
p_val = numeric(),
|
|
471
|
+
avg_log2FC = numeric(),
|
|
472
|
+
pct.1 = numeric(),
|
|
473
|
+
pct.2 = numeric(),
|
|
474
|
+
p_val_adj = numeric(),
|
|
475
|
+
cluster = character()
|
|
476
|
+
)
|
|
477
|
+
} else {
|
|
478
|
+
fun <- FindMarkers
|
|
479
|
+
empty <- data.frame(
|
|
480
|
+
gene = character(),
|
|
481
|
+
p_val = numeric(),
|
|
482
|
+
avg_log2FC = numeric(),
|
|
483
|
+
pct.1 = numeric(),
|
|
484
|
+
pct.2 = numeric(),
|
|
485
|
+
p_val_adj = numeric()
|
|
486
|
+
)
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
call_findmarkers <- function(fn, args) {
|
|
490
|
+
if (find_all) {
|
|
491
|
+
do_call(fn, args)
|
|
492
|
+
} else {
|
|
493
|
+
do_call(fn, args) %>% rownames_to_column("gene")
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
markers <- tryCatch({
|
|
497
|
+
call_findmarkers(fun, findmarkers_args)
|
|
498
|
+
}, error = function(e) {
|
|
499
|
+
if (!grepl("PrepSCTFindMarkers", e$message) && defassay == "SCT") {
|
|
500
|
+
log_warn(paste0(" ! ", e$message))
|
|
501
|
+
}
|
|
502
|
+
empty
|
|
503
|
+
})
|
|
504
|
+
|
|
505
|
+
if (nrow(markers) == 0 && defassay == "SCT") {
|
|
506
|
+
log_warn(" ! No markers found from SCT assay, trying recorrect_umi = FALSE")
|
|
507
|
+
findmarkers_args$recorrect_umi <- FALSE
|
|
508
|
+
markers <- tryCatch({
|
|
509
|
+
call_findmarkers(fun, findmarkers_args)
|
|
510
|
+
}, error = function(e) {
|
|
511
|
+
log_warn(paste0(" ! ", e$message))
|
|
512
|
+
empty
|
|
513
|
+
})
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
markers
|
|
517
|
+
}
|
|
518
|
+
|
|
486
519
|
sections <- c()
|
|
487
520
|
do_case <- function(casename) {
|
|
488
521
|
if (isTRUE(cases[[casename]]$findall)) {
|
|
@@ -538,38 +571,7 @@ do_case <- function(casename) {
|
|
|
538
571
|
# args$min.cells.feature <- args$min.cells.feature %||% 1
|
|
539
572
|
# args$min.pct <- args$min.pct %||% 0
|
|
540
573
|
|
|
541
|
-
markers <-
|
|
542
|
-
do_call(FindMarkers, args) %>% rownames_to_column("gene")
|
|
543
|
-
}, error = function(e) {
|
|
544
|
-
log_warn(paste0(" ", e$message))
|
|
545
|
-
data.frame(
|
|
546
|
-
gene = character(),
|
|
547
|
-
p_val = numeric(),
|
|
548
|
-
avg_log2FC = numeric(),
|
|
549
|
-
pct.1 = numeric(),
|
|
550
|
-
pct.2 = numeric(),
|
|
551
|
-
p_val_adj = numeric()
|
|
552
|
-
)
|
|
553
|
-
})
|
|
554
|
-
|
|
555
|
-
if (nrow(markers) == 0 && defassay == "SCT") {
|
|
556
|
-
log_warn(" No markers found from SCT assay, trying recorrect_umi = FALSE")
|
|
557
|
-
args$recorrect_umi <- FALSE
|
|
558
|
-
markers <- tryCatch({
|
|
559
|
-
do_call(FindMarkers, args) %>% rownames_to_column("gene")
|
|
560
|
-
}, error = function(e) {
|
|
561
|
-
log_warn(paste0(" ", e$message))
|
|
562
|
-
data.frame(
|
|
563
|
-
gene = character(),
|
|
564
|
-
p_val = numeric(),
|
|
565
|
-
avg_log2FC = numeric(),
|
|
566
|
-
pct.1 = numeric(),
|
|
567
|
-
pct.2 = numeric(),
|
|
568
|
-
p_val_adj=numeric()
|
|
569
|
-
)
|
|
570
|
-
})
|
|
571
|
-
}
|
|
572
|
-
|
|
574
|
+
markers <- find_markers(args)
|
|
573
575
|
siggenes <- do_enrich(info, markers, case$sigmarkers, case$volcano_genes)
|
|
574
576
|
|
|
575
577
|
if (length(siggenes) > 0) {
|
|
@@ -3,9 +3,11 @@ source("{{biopipen_dir}}/utils/caching.R")
|
|
|
3
3
|
|
|
4
4
|
library(Seurat)
|
|
5
5
|
library(future)
|
|
6
|
+
library(rlang)
|
|
6
7
|
library(tidyr)
|
|
7
8
|
library(dplyr)
|
|
8
9
|
library(digest)
|
|
10
|
+
library(clustree)
|
|
9
11
|
|
|
10
12
|
set.seed(8525)
|
|
11
13
|
|
|
@@ -129,45 +131,85 @@ if (is.null(cached$data)) {
|
|
|
129
131
|
}
|
|
130
132
|
|
|
131
133
|
envs$FindClusters$random.seed <- envs$FindClusters$random.seed %||% 8525
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
134
|
+
expand_resolution <- function(resolution) {
|
|
135
|
+
expanded_res <- c()
|
|
136
|
+
for (res in resolution) {
|
|
137
|
+
if (is.numeric(res)) {
|
|
138
|
+
expanded_res <- c(expanded_res, res)
|
|
139
|
+
} else {
|
|
140
|
+
# is.character
|
|
141
|
+
parts <- trimws(unlist(strsplit(res, ",")))
|
|
142
|
+
for (part in parts) {
|
|
143
|
+
if (grepl(":", part)) {
|
|
144
|
+
parts <- trimws(unlist(strsplit(part, ":")))
|
|
145
|
+
if (length(parts) == 2) { parts <- c(parts, 0.1) }
|
|
146
|
+
if (length(parts) != 3) {
|
|
147
|
+
stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
|
|
148
|
+
}
|
|
149
|
+
parts <- as.numeric(parts)
|
|
150
|
+
expanded_res <- c(expanded_res, seq(parts[1], parts[2], by = parts[3]))
|
|
151
|
+
} else {
|
|
152
|
+
expanded_res <- c(expanded_res, as.numeric(part))
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
138
156
|
}
|
|
157
|
+
# keep the last resolution at last
|
|
158
|
+
rev(unique(rev(expanded_res)))
|
|
139
159
|
}
|
|
160
|
+
resolution <- envs$FindClusters$resolution <- expand_resolution(envs$FindClusters$resolution %||% 0.8)
|
|
161
|
+
log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
|
|
162
|
+
|
|
163
|
+
envs$FindClusters$object <- sobj
|
|
164
|
+
sobj <- do_call(FindClusters, envs$FindClusters)
|
|
140
165
|
|
|
166
|
+
# recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
|
|
167
|
+
recode_clusters <- function(clusters) {
|
|
168
|
+
recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
|
|
169
|
+
clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
|
|
170
|
+
clusters
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
graph_name <- envs$FindClusters$graph.name %||% paste0(DefaultAssay(sobj), "_snn_res.")
|
|
141
174
|
for (res in resolution) {
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
175
|
+
cluster_name <- paste0(graph_name, res)
|
|
176
|
+
new_cluster_name <- paste0("seurat_clusters.", res)
|
|
177
|
+
sobj@meta.data[[new_cluster_name]] <- recode_clusters(sobj@meta.data[[cluster_name]])
|
|
178
|
+
}
|
|
179
|
+
sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
|
|
180
|
+
Idents(sobj) <- "seurat_clusters"
|
|
181
|
+
|
|
182
|
+
ident_table <- table(Idents(sobj))
|
|
183
|
+
log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
|
|
184
|
+
print(ident_table)
|
|
185
|
+
cat("\n")
|
|
186
|
+
|
|
187
|
+
# plot the tree
|
|
188
|
+
if (length(resolution) > 1) {
|
|
189
|
+
log_info("Plotting clustree ...")
|
|
190
|
+
png(
|
|
191
|
+
file.path(joboutdir, "clustree.png"),
|
|
192
|
+
res = envs$clustree_devpars$res,
|
|
193
|
+
width = envs$clustree_devpars$width,
|
|
194
|
+
height = envs$clustree_devpars$height
|
|
195
|
+
)
|
|
196
|
+
p <- clustree(sobj, prefix = "seurat_clusters.")
|
|
197
|
+
print(p)
|
|
198
|
+
dev.off()
|
|
165
199
|
}
|
|
166
200
|
|
|
167
201
|
if (DefaultAssay(sobj) == "SCT") {
|
|
168
202
|
# https://github.com/satijalab/seurat/issues/6968
|
|
169
203
|
log_info("Running PrepSCTFindMarkers ...")
|
|
170
204
|
sobj <- PrepSCTFindMarkers(sobj)
|
|
205
|
+
# compose a new SeuratCommand to record it to sobj@commands
|
|
206
|
+
scommand <- sobj@commands$FindClusters
|
|
207
|
+
scommand@name <- "PrepSCTFindMarkers"
|
|
208
|
+
scommand@time.stamp <- Sys.time()
|
|
209
|
+
scommand@assay.used <- "SCT"
|
|
210
|
+
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
211
|
+
scommand@params <- list()
|
|
212
|
+
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
171
213
|
}
|
|
172
214
|
|
|
173
215
|
log_info("Saving results ...")
|
|
@@ -63,6 +63,26 @@ if (endsWith(ref, ".rds") || endsWith(ref, ".RDS")) {
|
|
|
63
63
|
reference = LoadH5Seurat(ref)
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
+
# check if refdata exists in the reference
|
|
67
|
+
for (rname in names(mapquery_args$refdata)) {
|
|
68
|
+
use_name <- mapquery_args$refdata[[rname]]
|
|
69
|
+
# transferring an assay
|
|
70
|
+
if (use_name %in% names(reference)) { next }
|
|
71
|
+
# transferring a metadata column
|
|
72
|
+
if (!use_name %in% colnames(reference@meta.data)) {
|
|
73
|
+
stop(paste0(
|
|
74
|
+
"The reference does not have the column '",
|
|
75
|
+
use_name,
|
|
76
|
+
"' in either assays or metadata. "
|
|
77
|
+
))
|
|
78
|
+
if (startsWith(use_name, "predicted.")) {
|
|
79
|
+
stop(paste0(
|
|
80
|
+
"Do you mean: ", substring(use_name, 11),
|
|
81
|
+
))
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
66
86
|
if (refnorm == "auto" && DefaultAssay(reference) == "SCT") {
|
|
67
87
|
refnorm = "SCTransform"
|
|
68
88
|
}
|