biopipen 0.28.0__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +8 -0
- biopipen/ns/bam.py +0 -2
- biopipen/ns/bed.py +35 -0
- biopipen/ns/cellranger_pipeline.py +5 -5
- biopipen/ns/cnv.py +18 -2
- biopipen/ns/cnvkit_pipeline.py +16 -11
- biopipen/ns/gene.py +68 -23
- biopipen/ns/misc.py +2 -15
- biopipen/ns/plot.py +146 -0
- biopipen/ns/regulation.py +214 -0
- biopipen/ns/scrna.py +15 -3
- biopipen/ns/snp.py +516 -8
- biopipen/ns/stats.py +74 -2
- biopipen/ns/vcf.py +196 -0
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/scripts/bam/CNVpytor.py +144 -46
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMerge.py +1 -1
- biopipen/scripts/cnv/AneuploidyScore.R +30 -7
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
- biopipen/scripts/cnv/TMADScore.R +21 -5
- biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
- biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
- biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
- biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
- biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
- biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
- biopipen/scripts/gene/GeneNameConversion.R +65 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/plot/Manhattan.R +140 -0
- biopipen/scripts/plot/QQPlot.R +62 -0
- biopipen/scripts/regulation/MotifAffinityTest.R +226 -0
- biopipen/scripts/regulation/MotifAffinityTest_AtSNP.R +126 -0
- biopipen/scripts/regulation/MotifAffinityTest_MotifBreakR.R +96 -0
- biopipen/scripts/regulation/MotifScan.py +159 -0
- biopipen/scripts/regulation/atSNP.R +33 -0
- biopipen/scripts/regulation/motifBreakR.R +1594 -0
- biopipen/scripts/scrna/CellsDistribution.R +2 -0
- biopipen/scripts/scrna/MarkersFinder.R +59 -67
- biopipen/scripts/scrna/SeuratClustering.R +63 -29
- biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
- biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
- biopipen/scripts/snp/MatrixEQTL.R +84 -43
- biopipen/scripts/snp/Plink2GTMat.py +133 -0
- biopipen/scripts/snp/PlinkCallRate.R +190 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +298 -0
- biopipen/scripts/snp/PlinkFromVcf.py +78 -0
- biopipen/scripts/snp/PlinkHWE.R +80 -0
- biopipen/scripts/snp/PlinkHet.R +92 -0
- biopipen/scripts/snp/PlinkIBD.R +197 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/MetaPvalue.R +2 -1
- biopipen/scripts/stats/MetaPvalue1.R +70 -0
- biopipen/scripts/tcr/TCRClusterStats.R +12 -7
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/VcfFix_utils.py +1 -1
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/utils/gene.R +83 -37
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.R +56 -0
- biopipen/utils/misc.py +5 -2
- biopipen/utils/reference.py +54 -10
- {biopipen-0.28.0.dist-info → biopipen-0.29.0.dist-info}/METADATA +2 -2
- {biopipen-0.28.0.dist-info → biopipen-0.29.0.dist-info}/RECORD +78 -50
- {biopipen-0.28.0.dist-info → biopipen-0.29.0.dist-info}/entry_points.txt +1 -1
- biopipen/ns/bcftools.py +0 -111
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- {biopipen-0.28.0.dist-info → biopipen-0.29.0.dist-info}/WHEEL +0 -0
|
@@ -127,13 +127,32 @@ getCAA <- function(segf, cytoarm, tcn_col,
|
|
|
127
127
|
return(as(seg_cyto_chr, "GRangesList"))
|
|
128
128
|
}
|
|
129
129
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
130
|
+
if (endsWith(segfile, ".vcf") || endsWith(segfile, ".vcf.gz")) {
|
|
131
|
+
library(VariantAnnotation)
|
|
132
|
+
vcf = readVcf(segfile)
|
|
133
|
+
seg = data.frame(
|
|
134
|
+
seqnames = as.character(seqnames(vcf)),
|
|
135
|
+
start = start(vcf),
|
|
136
|
+
end = vcf@info[[end_col]],
|
|
137
|
+
seg.mean = vcf@info[[seg_col]]
|
|
138
|
+
)
|
|
139
|
+
} else if (endsWith(segfile, ".bed")) {
|
|
140
|
+
segments = read.table(segfile, header=F, row.names=NULL, sep="\t", stringsAsFactors=F)
|
|
141
|
+
seg = data.frame(
|
|
142
|
+
seqnames = segments[, 1],
|
|
143
|
+
start = segments[, 2],
|
|
144
|
+
end = segments[, 3],
|
|
145
|
+
seg.mean = segments[, 5]
|
|
146
|
+
)
|
|
147
|
+
} else {
|
|
148
|
+
segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
|
|
149
|
+
seg = data.frame(
|
|
150
|
+
seqnames = segments[, chrom_col],
|
|
151
|
+
start = segments[, start_col],
|
|
152
|
+
end = segments[, end_col],
|
|
153
|
+
seg.mean = segments[, seg_col]
|
|
154
|
+
)
|
|
155
|
+
}
|
|
137
156
|
|
|
138
157
|
{% if envs.segmean_transform %}
|
|
139
158
|
segmean_transform = {{envs.segmean_transform}}
|
|
@@ -168,6 +187,10 @@ if (is.character(cn_transform)) {
|
|
|
168
187
|
}
|
|
169
188
|
{% endif %}
|
|
170
189
|
|
|
190
|
+
seg <- seg[
|
|
191
|
+
!is.na(seg$seg.mean) & !is.na(seg$TCN) & !is.infinite(seg$seg.mean) & !is.infinite(seg$TCN),,
|
|
192
|
+
drop=FALSE]
|
|
193
|
+
|
|
171
194
|
write.table(seg, file.path(outdir, "seg.txt"), sep="\t", quote=F, row.names=F, col.names=T)
|
|
172
195
|
|
|
173
196
|
wgd_ploidy = checkIfWGD(
|
|
@@ -52,8 +52,11 @@ if (!is.null(group_cols)) {
|
|
|
52
52
|
|
|
53
53
|
if (!is.null(metafile)) {
|
|
54
54
|
metadf = read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
|
|
55
|
-
|
|
56
|
-
|
|
55
|
+
if (!is.null(metadf$Sample)) {
|
|
56
|
+
metadf$Sample = as.character(metadf$Sample)
|
|
57
|
+
} else {
|
|
58
|
+
colnames(metadf)[1] = "Sample"
|
|
59
|
+
}
|
|
57
60
|
metadf = metadf[metadf$Sample %in% sams, c("Sample", meta_cols), drop=FALSE]
|
|
58
61
|
if (nrow(metadf) != length(sams)) {
|
|
59
62
|
stop(paste("Not all samples in metafile:", paste(setdiff(sams, metadf$Sample), collapse=", ")))
|
biopipen/scripts/cnv/TMADScore.R
CHANGED
|
@@ -11,11 +11,27 @@ if (is.character(segmean_transform)) {
|
|
|
11
11
|
segmean_transform = eval(parse(text=segmean_transform))
|
|
12
12
|
} # otherwise NULL
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
|
|
15
|
+
if (endsWith(segfile, ".vcf") || endsWith(segfile, ".vcf.gz")) {
|
|
16
|
+
library(VariantAnnotation)
|
|
17
|
+
segments = readVcf(segfile)
|
|
18
|
+
seg = data.frame(
|
|
19
|
+
chrom = as.character(seqnames(segments)),
|
|
20
|
+
log2 = segments@info[[seg_col]]
|
|
21
|
+
)
|
|
22
|
+
} else if (endsWith(segfile, ".bed")) {
|
|
23
|
+
segments = read.table(segfile, header=F, row.names=NULL, sep="\t", stringsAsFactors=F)
|
|
24
|
+
seg = data.frame(
|
|
25
|
+
chrom = segments[, 1],
|
|
26
|
+
log2 = segments[, 5]
|
|
27
|
+
)
|
|
28
|
+
} else {
|
|
29
|
+
segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
|
|
30
|
+
seg = data.frame(
|
|
31
|
+
chrom = segments[, chrom_col],
|
|
32
|
+
log2 = segments[, seg_col]
|
|
33
|
+
)
|
|
34
|
+
}
|
|
19
35
|
rm(segments)
|
|
20
36
|
|
|
21
37
|
if (!is.null(excl_chroms) && length(excl_chroms) > 0) {
|
|
@@ -49,8 +49,12 @@ if (!is.null(group_cols)) {
|
|
|
49
49
|
data = data.frame(Sample = sams, tMAD = tmads)
|
|
50
50
|
if (file.exists(metafile) && length(meta_cols) > 0) {
|
|
51
51
|
metadf = read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
|
|
52
|
-
|
|
53
|
-
|
|
52
|
+
if (!is.null(metadf$Sample)) {
|
|
53
|
+
metadf$Sample = as.character(metadf$Sample)
|
|
54
|
+
} else {
|
|
55
|
+
colnames(metadf)[1] = "Sample"
|
|
56
|
+
}
|
|
57
|
+
meta = metadf[, c("Sample", meta_cols), drop=FALSE]
|
|
54
58
|
colnames(meta) = c("Sample", meta_cols)
|
|
55
59
|
data = data %>% left_join(meta, by="Sample")
|
|
56
60
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from pathlib import Path
|
|
1
2
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
2
3
|
|
|
3
4
|
excfiles = {{in.excfiles | repr}} # pyright: ignore
|
|
@@ -12,7 +13,7 @@ def main():
|
|
|
12
13
|
"": [cnvkit, "access"],
|
|
13
14
|
"s": min_gap_size,
|
|
14
15
|
"o": outfile,
|
|
15
|
-
"_": reffile,
|
|
16
|
+
"_": Path(reffile).expanduser(),
|
|
16
17
|
}
|
|
17
18
|
if excfiles:
|
|
18
19
|
other_args["exclude"] = excfiles
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from pathlib import Path
|
|
1
2
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
2
3
|
|
|
3
4
|
bamfiles = {{in.bamfiles | repr}} # pyright: ignore
|
|
@@ -20,7 +21,7 @@ short_names = {{envs.short_names | repr}} # pyright: ignore
|
|
|
20
21
|
def main():
|
|
21
22
|
|
|
22
23
|
args = dict(
|
|
23
|
-
f=reffile,
|
|
24
|
+
f=Path(reffile).expanduser(),
|
|
24
25
|
m=method,
|
|
25
26
|
g=accfile,
|
|
26
27
|
t=baitfile,
|
|
@@ -29,7 +30,7 @@ def main():
|
|
|
29
30
|
target_min_size=target_min_size,
|
|
30
31
|
antitarget_max_size=antitarget_max_size,
|
|
31
32
|
antitarget_min_size=antitarget_min_size,
|
|
32
|
-
annotate=annotate,
|
|
33
|
+
annotate=Path(annotate).expanduser(),
|
|
33
34
|
short_names=short_names,
|
|
34
35
|
target_output_bed=target_file,
|
|
35
36
|
antitarget_output_bed=antitarget_file,
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from pathlib import Path
|
|
1
2
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
2
3
|
|
|
3
4
|
bamfile = {{in.bamfile | quote}} # pyright: ignore
|
|
@@ -13,7 +14,7 @@ ncores = {{envs.ncores | repr}} # pyright: ignore
|
|
|
13
14
|
def main():
|
|
14
15
|
|
|
15
16
|
args = dict(
|
|
16
|
-
f=reffile,
|
|
17
|
+
f=Path(reffile).expanduser(),
|
|
17
18
|
c=count,
|
|
18
19
|
q=min_mapq,
|
|
19
20
|
p=ncores,
|
|
@@ -4,7 +4,7 @@ from diot import Diot
|
|
|
4
4
|
|
|
5
5
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
6
6
|
|
|
7
|
-
segfiles = {{in.segfiles | repr}} # pyright: ignore
|
|
7
|
+
segfiles = {{in.segfiles | repr}} # pyright: ignore # noqa
|
|
8
8
|
sample_sex = {{in.sample_sex | repr}} # pyright: ignore
|
|
9
9
|
outdir = {{out.outdir | repr}} # pyright: ignore
|
|
10
10
|
cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from pathlib import Path
|
|
1
2
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
2
3
|
|
|
3
4
|
covfiles = {{in.covfiles | repr}} # pyright: ignore
|
|
@@ -18,7 +19,7 @@ no_rmask = {{envs.no_rmask | repr}} # pyright: ignore
|
|
|
18
19
|
def main():
|
|
19
20
|
|
|
20
21
|
args = dict(
|
|
21
|
-
f=reffile,
|
|
22
|
+
f=Path(reffile).expanduser(),
|
|
22
23
|
o=outfile,
|
|
23
24
|
c=cluster,
|
|
24
25
|
min_cluster_size=min_cluster_size,
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/gene.R")
|
|
3
|
+
|
|
4
|
+
infile <- {{in.infile | quote}}
|
|
5
|
+
outfile <- {{out.outfile | quote}}
|
|
6
|
+
notfound <- {{envs.notfound | r}}
|
|
7
|
+
genecol <- {{envs.genecol | r}}
|
|
8
|
+
output <- {{envs.output | r}}
|
|
9
|
+
dup <- {{envs.dup | r}}
|
|
10
|
+
infmt <- {{envs.infmt | r}}
|
|
11
|
+
outfmt <- {{envs.outfmt | r}}
|
|
12
|
+
species <- {{envs.species | r}}
|
|
13
|
+
|
|
14
|
+
if (is.na(notfound)) {
|
|
15
|
+
notfound = "na"
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
df <- read.table(infile, header=TRUE, sep="\t", check.names=FALSE)
|
|
19
|
+
|
|
20
|
+
if (genecol == 0) {
|
|
21
|
+
log_warn("envs.genecol should be 1-based, but 0 was given. Using 1 instead.")
|
|
22
|
+
genecol <- 1
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
if (is.numeric(genecol)) { genecol <- colnames(df)[genecol] }
|
|
26
|
+
if (dup == "combine") { dup <- ";" }
|
|
27
|
+
|
|
28
|
+
genes <- df[[genecol]]
|
|
29
|
+
converted <- gene_name_conversion(
|
|
30
|
+
genes=genes,
|
|
31
|
+
species=species,
|
|
32
|
+
infmt=infmt,
|
|
33
|
+
outfmt=outfmt,
|
|
34
|
+
notfound=notfound,
|
|
35
|
+
dup=dup
|
|
36
|
+
)
|
|
37
|
+
# <genecol> <outfmt>
|
|
38
|
+
# 1 1255_g_at GUCA1A
|
|
39
|
+
# 2 1316_at THRA
|
|
40
|
+
# 3 1320_at PTPN21
|
|
41
|
+
# 4 1294_at MIR5193
|
|
42
|
+
|
|
43
|
+
# order the converted dataframe by the original gene column
|
|
44
|
+
converted <- converted[order(match(converted$query, genes)), , drop=FALSE]
|
|
45
|
+
outcol <- outfmt
|
|
46
|
+
|
|
47
|
+
if (notfound == "skip" || notfound == "ignore") {
|
|
48
|
+
df <- df[df[[genecol]] %in% converted$query, , drop=FALSE]
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (output == "append") {
|
|
52
|
+
if (outfmt %in% colnames(df)) {
|
|
53
|
+
log_warn("The output column name already exists in the input dataframe. Appending with a suffix `_1`.")
|
|
54
|
+
outcol <- paste(outfmt, "_1", sep="")
|
|
55
|
+
}
|
|
56
|
+
df[[outcol]] <- converted[[outfmt]]
|
|
57
|
+
} else if (output == "replace") {
|
|
58
|
+
df[[genecol]] <- converted[[outfmt]]
|
|
59
|
+
} else if (output == "with-query") {
|
|
60
|
+
df <- converted
|
|
61
|
+
} else {
|
|
62
|
+
df <- converted[, outfmt, drop=FALSE]
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
write.table(df, file=outfile, sep="\t", quote=FALSE, row.names=FALSE)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
library(rlang)
|
|
2
|
+
library(rtracklayer)
|
|
3
|
+
|
|
4
|
+
infile <- {{in.infile | r}}
|
|
5
|
+
outfile <- {{out.outfile | r}}
|
|
6
|
+
up <- {{envs.up | r}}
|
|
7
|
+
down <- {{envs.down | r}}
|
|
8
|
+
notfound <- {{envs.notfound | r}}
|
|
9
|
+
refgene <- {{envs.refgene | r}}
|
|
10
|
+
header <- {{envs.header | r}}
|
|
11
|
+
genecol <- {{envs.genecol | r}}
|
|
12
|
+
match_id <- {{envs.match_id | r}}
|
|
13
|
+
sort_ <- {{envs.sort | r}}
|
|
14
|
+
chrsize <- {{envs.chrsize | r}}
|
|
15
|
+
|
|
16
|
+
down <- down %||% up
|
|
17
|
+
|
|
18
|
+
refgenes <- readGFF(refgene)
|
|
19
|
+
refcol <- ifelse(match_id, "gene_id", "gene_name")
|
|
20
|
+
|
|
21
|
+
if (infile == "/dev/null") {
|
|
22
|
+
genes <- unique(refgenes[[refcol]])
|
|
23
|
+
} else {
|
|
24
|
+
data <- read.table(infile, header=header, sep="\t", stringsAsFactors=FALSE, check.names=FALSE)
|
|
25
|
+
genes <- data[[genecol]]
|
|
26
|
+
rm(data)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
notfound_genes <- setdiff(genes, refgenes[[refcol]])
|
|
30
|
+
if (notfound == "error" && length(notfound_genes) > 0) {
|
|
31
|
+
stop(paste(
|
|
32
|
+
"The following genes were not found in the reference annotation:",
|
|
33
|
+
paste(notfound_genes, collapse=", ")
|
|
34
|
+
))
|
|
35
|
+
} else if (notfound == 'skip') {
|
|
36
|
+
genes <- genes[!genes %in% notfound_genes]
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Select the genes that are in the reference annotation and keep the order
|
|
40
|
+
# of the records in genes
|
|
41
|
+
refgenes <- refgenes[match(genes, refgenes[[refcol]]), , drop = FALSE]
|
|
42
|
+
refgenes <- unique(makeGRangesFromDataFrame(refgenes, keep.extra.columns=TRUE))
|
|
43
|
+
|
|
44
|
+
proms <- promoters(refgenes, up=up, down=down)
|
|
45
|
+
# Scores must be non-NA numeric values
|
|
46
|
+
elementMetadata(proms)$name <- elementMetadata(proms)[[refcol]]
|
|
47
|
+
score(proms) <- 0
|
|
48
|
+
start(proms) <- pmax(1, start(proms))
|
|
49
|
+
|
|
50
|
+
if (sort_) {
|
|
51
|
+
chrom_sizes <- read.table(chrsize, header=FALSE, stringsAsFactors=FALSE, sep="\t")
|
|
52
|
+
common_chroms <- intersect(chrom_sizes$V1, seqlevels(proms))
|
|
53
|
+
if (length(common_chroms) == 0) {
|
|
54
|
+
stop("No common chromosomes found between the promoters and the chromosome sizes. Do you use the correct chromosome sizes file?")
|
|
55
|
+
}
|
|
56
|
+
proms <- keepSeqlevels(proms, common_chroms, pruning.mode="coarse")
|
|
57
|
+
seqlevels(proms) <- common_chroms
|
|
58
|
+
proms <- sort(proms, ignore.strand = TRUE)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export.bed(proms, outfile)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# shellcheck disable=all
|
|
2
|
+
export infile={{in.infile | quote}}
|
|
3
|
+
export outfile={{out.outfile | quote}}
|
|
4
|
+
is_outdir={{envs.outdir | int}}
|
|
5
|
+
cmd_given={{envs.cmd | bool | int}}
|
|
6
|
+
{% set _ = out.outfile | dirname | joinpath: "cmd.sh" | as_path | attr: 'write_text' | call: envs.cmd %}
|
|
7
|
+
cmd="{{proc.lang}} {{out.outfile | dirname | joinpath: 'cmd.sh'}}"
|
|
8
|
+
if [[ "$cmd_given" -eq 0 ]]; then
|
|
9
|
+
echo "No command given." 1>&2
|
|
10
|
+
exit 1
|
|
11
|
+
fi
|
|
12
|
+
if [[ $is_outdir -eq 1 ]]; then
|
|
13
|
+
mkdir -p "$outfile"
|
|
14
|
+
fi
|
|
15
|
+
eval "$cmd"
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
library(rlang)
|
|
3
|
+
library(ggmanh)
|
|
4
|
+
|
|
5
|
+
infile <- {{in.infile | r}}
|
|
6
|
+
outfile <- {{out.outfile | r}}
|
|
7
|
+
chrom_col <- {{envs.chrom_col | r}}
|
|
8
|
+
pos_col <- {{envs.pos_col | r}}
|
|
9
|
+
pval_col <- {{envs.pval_col | r}}
|
|
10
|
+
label_col <- {{envs.label_col | r}}
|
|
11
|
+
devpars <- {{envs.devpars | r}}
|
|
12
|
+
title <- {{envs.title | r}}
|
|
13
|
+
ylabel <- {{envs.ylabel | r}}
|
|
14
|
+
rescale <- {{envs.rescale | r}}
|
|
15
|
+
rescale_ratio_threshold <- {{envs.rescale_ratio_threshold | r}}
|
|
16
|
+
signif <- {{envs.signif | r}}
|
|
17
|
+
hicolors <- {{envs.hicolors | r}}
|
|
18
|
+
thin_n <- {{envs.thin_n | r}}
|
|
19
|
+
thin_bins <- {{envs.thin_bins | r}}
|
|
20
|
+
zoom <- {{envs.zoom | r}}
|
|
21
|
+
zoom_devpars <- {{envs.zoom_devpars | r}}
|
|
22
|
+
chroms <- {{envs.chroms | r}}
|
|
23
|
+
args <- {{envs.args | r: todot="-"}}
|
|
24
|
+
|
|
25
|
+
data <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
26
|
+
|
|
27
|
+
# normalize columns
|
|
28
|
+
cnames <- colnames(data)
|
|
29
|
+
if (is.numeric(chrom_col)) { chrom_col <- cnames[chrom_col] }
|
|
30
|
+
if (is.numeric(pos_col)) { pos_col <- cnames[pos_col] }
|
|
31
|
+
if (is.numeric(pval_col)) { pval_col <- cnames[pval_col] }
|
|
32
|
+
if (is.numeric(label_col)) { label_col <- cnames[label_col] }
|
|
33
|
+
|
|
34
|
+
# normalize chroms
|
|
35
|
+
norm_chroms <- function(chrs) {
|
|
36
|
+
chrs <- as.character(chrs)
|
|
37
|
+
if (length(chrs) == 1 && grepl(",", chrs)) {
|
|
38
|
+
chrs <- trimws(unlist(strsplit(chrs, ",")))
|
|
39
|
+
}
|
|
40
|
+
if (length(chrs) > 1) {
|
|
41
|
+
return(unique(unlist(sapply(chrs, function(chr) norm_chroms(chr)))))
|
|
42
|
+
}
|
|
43
|
+
if (!grepl("-", chrs)) { return(chrs) }
|
|
44
|
+
|
|
45
|
+
# expand chr1-22 -> chr1, chr2, ..., chr22
|
|
46
|
+
# chr1-22 -> 'chr1', '22'
|
|
47
|
+
chrs <- unlist(strsplit(chrs, "-"))
|
|
48
|
+
if (length(chrs) != 2) {
|
|
49
|
+
stop(paste0("Invalid chroms: ", chrs))
|
|
50
|
+
}
|
|
51
|
+
# detect prefix
|
|
52
|
+
prefix1 <- gsub("[0-9]", "", chrs[1])
|
|
53
|
+
prefix2 <- gsub("[0-9]", "", chrs[2])
|
|
54
|
+
if (nchar(prefix2) > 0 && prefix1 != prefix2) {
|
|
55
|
+
stop(paste0("Invalid chroms: ", chrs, " (prefix mismatch)"))
|
|
56
|
+
}
|
|
57
|
+
chr_a <- as.integer(substring(chrs[1], nchar(prefix1) + 1))
|
|
58
|
+
chr_b <- as.integer(substring(chrs[2], nchar(prefix2) + 1))
|
|
59
|
+
chr_min <- min(chr_a, chr_b)
|
|
60
|
+
chr_max <- max(chr_a, chr_b)
|
|
61
|
+
return(paste0(prefix1, chr_min:chr_max))
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
log_info("Preparing data for plotting ...")
|
|
65
|
+
if (length(chroms) == 1 && chroms == "auto") {
|
|
66
|
+
chroms <- unique(data[[chrom_col]])
|
|
67
|
+
} else {
|
|
68
|
+
chroms <- norm_chroms(chroms)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# prepare data
|
|
72
|
+
mp_prep_args = list()
|
|
73
|
+
if (length(signif) == 1 && is.character(signif)) {
|
|
74
|
+
signif <- as.numeric(trimws(unlist(strsplit(signif, ","))))
|
|
75
|
+
}
|
|
76
|
+
siglevel <- min(signif)
|
|
77
|
+
if (!is.null(label_col)) {
|
|
78
|
+
data$.label <- ifelse(data[[pval_col]] < siglevel, data[[label_col]], "")
|
|
79
|
+
}
|
|
80
|
+
if (!is.null(hicolors)) {
|
|
81
|
+
sig_str <- "Significant"
|
|
82
|
+
nsig_str <- "Not significant"
|
|
83
|
+
data$.highlight <- ifelse(data[[pval_col]] < siglevel, sig_str, nsig_str)
|
|
84
|
+
if (length(hicolors) == 1) { hicolors <- c(hicolors, "grey") }
|
|
85
|
+
names(hicolors) <- c(sig_str, nsig_str)
|
|
86
|
+
mp_prep_args$highlight.colname <- ".highlight"
|
|
87
|
+
mp_prep_args$highlight.col <- hicolors
|
|
88
|
+
}
|
|
89
|
+
mp_prep_args$x <- data
|
|
90
|
+
mp_prep_args$chr.colname <- chrom_col
|
|
91
|
+
mp_prep_args$pos.colname <- pos_col
|
|
92
|
+
mp_prep_args$pval.colname <- pval_col
|
|
93
|
+
mp_prep_args$chr.order <- chroms
|
|
94
|
+
if (!is.null(thin_n) && thin_n > 0) {
|
|
95
|
+
mp_prep_args$thin.n <- thin_n
|
|
96
|
+
mp_prep_args$thin.bins <- thin_bins
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
mpdata <- do_call(manhattan_data_preprocess, mp_prep_args)
|
|
100
|
+
|
|
101
|
+
# plot
|
|
102
|
+
log_info("Plotting Manhattan plot ...")
|
|
103
|
+
args$x <- mpdata
|
|
104
|
+
args$signif <- signif
|
|
105
|
+
args$plot.title <- title
|
|
106
|
+
args$rescale <- rescale
|
|
107
|
+
args$rescale.ratio.threshold <- rescale_ratio_threshold
|
|
108
|
+
if (!is.null(hicolors)) { args$color.by.highlight <- TRUE }
|
|
109
|
+
if (!is.null(label_col)) { args$label.colname <- ".label" }
|
|
110
|
+
g <- do_call(manhattan_plot, args)
|
|
111
|
+
|
|
112
|
+
png(outfile, width=devpars$width, height=devpars$height, res=devpars$res)
|
|
113
|
+
print(g)
|
|
114
|
+
dev.off()
|
|
115
|
+
|
|
116
|
+
# zoom into chromosomes
|
|
117
|
+
if (!is.null(zoom)) {
|
|
118
|
+
log_info("Zooming into chromosomes ...")
|
|
119
|
+
zoom <- norm_chroms(zoom)
|
|
120
|
+
for (z in zoom) {
|
|
121
|
+
log_info("- {z}")
|
|
122
|
+
args_z <- args
|
|
123
|
+
args_z$chromosome <- z
|
|
124
|
+
args_z$plot.title <- paste0(title, " (", z, ")")
|
|
125
|
+
args_z$x.label <- "Position"
|
|
126
|
+
g_z <- do_call(manhattan_plot, args_z)
|
|
127
|
+
outfile_z <- gsub("\\.png$", paste0("-", z, ".png"), outfile)
|
|
128
|
+
zm_devpars <- zoom_devpars
|
|
129
|
+
zm_devpars$res <- zm_devpars$res %||% devpars$res
|
|
130
|
+
zm_devpars$height <- zm_devpars$height %||% devpars$height
|
|
131
|
+
png(
|
|
132
|
+
outfile_z,
|
|
133
|
+
width=zm_devpars$width,
|
|
134
|
+
height=zm_devpars$height,
|
|
135
|
+
res=zm_devpars$res
|
|
136
|
+
)
|
|
137
|
+
print(g_z)
|
|
138
|
+
dev.off()
|
|
139
|
+
}
|
|
140
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
|
|
3
|
+
library(ggplot2)
|
|
4
|
+
library(ggprism)
|
|
5
|
+
library(qqplotr)
|
|
6
|
+
|
|
7
|
+
theme_set(theme_prism())
|
|
8
|
+
|
|
9
|
+
infile <- {{in.infile | r}}
|
|
10
|
+
outfile <- {{out.outfile | r}}
|
|
11
|
+
val_col <- {{envs.val_col | r}}
|
|
12
|
+
devpars <- {{envs.devpars | r}}
|
|
13
|
+
title <- {{envs.title | r}}
|
|
14
|
+
xlabel <- {{envs.xlabel | r}}
|
|
15
|
+
ylabel <- {{envs.ylabel | r}}
|
|
16
|
+
kind <- {{envs.kind | r}}
|
|
17
|
+
trans <- {{envs.trans | r}}
|
|
18
|
+
band_args <- {{envs.band | r}}
|
|
19
|
+
line_args <- {{envs.line | r}}
|
|
20
|
+
point_args <- {{envs.point | r}}
|
|
21
|
+
ggs <- {{envs.ggs | r}}
|
|
22
|
+
|
|
23
|
+
indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
24
|
+
if (is.numeric(val_col)) { val_col <- colnames(indata)[val_col] }
|
|
25
|
+
|
|
26
|
+
band_fun <- ifelse(kind == "pp", stat_pp_band, stat_qq_band)
|
|
27
|
+
line_fun <- ifelse(kind == "pp", stat_pp_line, stat_qq_line)
|
|
28
|
+
point_fun <- ifelse(kind == "pp", stat_pp_point, stat_qq_point)
|
|
29
|
+
|
|
30
|
+
title <- title %||% waiver()
|
|
31
|
+
xlabel <- xlabel %||% waiver()
|
|
32
|
+
ylabel <- ylabel %||% waiver()
|
|
33
|
+
|
|
34
|
+
if (!is.null(trans)) {
|
|
35
|
+
trans <- trimws(trans)
|
|
36
|
+
if (trans == "-log10") {
|
|
37
|
+
trans <- function(x) -log10(x)
|
|
38
|
+
} else {
|
|
39
|
+
trans <- eval(parse(text = trans))
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
indata$.trans_val <- trans(indata[[val_col]])
|
|
43
|
+
val_col <- ".trans_val"
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
indata <- indata[!is.na(indata[[val_col]]), , drop=FALSE]
|
|
47
|
+
|
|
48
|
+
p <- ggplot(data = indata, mapping = aes(sample = !!sym(val_col))) +
|
|
49
|
+
do_call(band_fun, band_args) +
|
|
50
|
+
do_call(line_fun, line_args) +
|
|
51
|
+
do_call(point_fun, point_args) +
|
|
52
|
+
labs(title = title, x = xlabel, y = ylabel)
|
|
53
|
+
|
|
54
|
+
if (!is.null(ggs)) {
|
|
55
|
+
for (gg in ggs) {
|
|
56
|
+
p <- p + eval(parse(text = gg))
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
png(outfile, width=devpars$width, height=devpars$height, res=devpars$res)
|
|
61
|
+
print(p)
|
|
62
|
+
dev.off()
|