biopipen 0.28.1__py3-none-any.whl → 0.29.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (82) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +8 -0
  3. biopipen/ns/bam.py +0 -2
  4. biopipen/ns/bed.py +35 -0
  5. biopipen/ns/cellranger_pipeline.py +5 -5
  6. biopipen/ns/cnv.py +18 -2
  7. biopipen/ns/cnvkit_pipeline.py +16 -11
  8. biopipen/ns/gene.py +68 -23
  9. biopipen/ns/misc.py +2 -15
  10. biopipen/ns/plot.py +146 -0
  11. biopipen/ns/regulation.py +214 -0
  12. biopipen/ns/scrna.py +15 -3
  13. biopipen/ns/snp.py +516 -8
  14. biopipen/ns/stats.py +74 -2
  15. biopipen/ns/vcf.py +196 -0
  16. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  17. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  18. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  19. biopipen/reports/snp/PlinkHet.svelte +18 -0
  20. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  21. biopipen/scripts/bam/CNVpytor.py +144 -46
  22. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  23. biopipen/scripts/bed/BedtoolsMerge.py +1 -1
  24. biopipen/scripts/cnv/AneuploidyScore.R +30 -7
  25. biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
  26. biopipen/scripts/cnv/TMADScore.R +21 -5
  27. biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
  28. biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
  29. biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
  30. biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
  31. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
  32. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
  33. biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
  34. biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
  35. biopipen/scripts/gene/GeneNameConversion.R +65 -0
  36. biopipen/scripts/gene/GenePromoters.R +61 -0
  37. biopipen/scripts/misc/Shell.sh +15 -0
  38. biopipen/scripts/plot/Manhattan.R +140 -0
  39. biopipen/scripts/plot/QQPlot.R +62 -0
  40. biopipen/scripts/regulation/MotifAffinityTest.R +226 -0
  41. biopipen/scripts/regulation/MotifAffinityTest_AtSNP.R +126 -0
  42. biopipen/scripts/regulation/MotifAffinityTest_MotifBreakR.R +96 -0
  43. biopipen/scripts/regulation/MotifScan.py +159 -0
  44. biopipen/scripts/regulation/atSNP.R +33 -0
  45. biopipen/scripts/regulation/motifBreakR.R +1594 -0
  46. biopipen/scripts/scrna/MarkersFinder.R +59 -67
  47. biopipen/scripts/scrna/SeuratClustering.R +63 -29
  48. biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
  49. biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
  50. biopipen/scripts/snp/MatrixEQTL.R +84 -43
  51. biopipen/scripts/snp/Plink2GTMat.py +133 -0
  52. biopipen/scripts/snp/PlinkCallRate.R +190 -0
  53. biopipen/scripts/snp/PlinkFilter.py +100 -0
  54. biopipen/scripts/snp/PlinkFreq.R +298 -0
  55. biopipen/scripts/snp/PlinkFromVcf.py +78 -0
  56. biopipen/scripts/snp/PlinkHWE.R +80 -0
  57. biopipen/scripts/snp/PlinkHet.R +92 -0
  58. biopipen/scripts/snp/PlinkIBD.R +197 -0
  59. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  60. biopipen/scripts/stats/MetaPvalue.R +2 -1
  61. biopipen/scripts/stats/MetaPvalue1.R +70 -0
  62. biopipen/scripts/tcr/TCRClusterStats.R +12 -7
  63. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  64. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  65. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  66. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  67. biopipen/scripts/vcf/VcfFix_utils.py +1 -1
  68. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  69. biopipen/utils/gene.R +83 -37
  70. biopipen/utils/gene.py +108 -60
  71. biopipen/utils/misc.R +56 -0
  72. biopipen/utils/misc.py +5 -2
  73. biopipen/utils/reference.py +54 -10
  74. {biopipen-0.28.1.dist-info → biopipen-0.29.0.dist-info}/METADATA +2 -2
  75. {biopipen-0.28.1.dist-info → biopipen-0.29.0.dist-info}/RECORD +77 -49
  76. {biopipen-0.28.1.dist-info → biopipen-0.29.0.dist-info}/entry_points.txt +1 -1
  77. biopipen/ns/bcftools.py +0 -111
  78. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  79. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  80. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  81. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  82. {biopipen-0.28.1.dist-info → biopipen-0.29.0.dist-info}/WHEEL +0 -0
@@ -127,13 +127,32 @@ getCAA <- function(segf, cytoarm, tcn_col,
127
127
  return(as(seg_cyto_chr, "GRangesList"))
128
128
  }
129
129
 
130
- segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
131
- seg = data.frame(
132
- seqnames = segments[, chrom_col],
133
- start = segments[, start_col],
134
- end = segments[, end_col],
135
- seg.mean = segments[, seg_col]
136
- )
130
+ if (endsWith(segfile, ".vcf") || endsWith(segfile, ".vcf.gz")) {
131
+ library(VariantAnnotation)
132
+ vcf = readVcf(segfile)
133
+ seg = data.frame(
134
+ seqnames = as.character(seqnames(vcf)),
135
+ start = start(vcf),
136
+ end = vcf@info[[end_col]],
137
+ seg.mean = vcf@info[[seg_col]]
138
+ )
139
+ } else if (endsWith(segfile, ".bed")) {
140
+ segments = read.table(segfile, header=F, row.names=NULL, sep="\t", stringsAsFactors=F)
141
+ seg = data.frame(
142
+ seqnames = segments[, 1],
143
+ start = segments[, 2],
144
+ end = segments[, 3],
145
+ seg.mean = segments[, 5]
146
+ )
147
+ } else {
148
+ segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
149
+ seg = data.frame(
150
+ seqnames = segments[, chrom_col],
151
+ start = segments[, start_col],
152
+ end = segments[, end_col],
153
+ seg.mean = segments[, seg_col]
154
+ )
155
+ }
137
156
 
138
157
  {% if envs.segmean_transform %}
139
158
  segmean_transform = {{envs.segmean_transform}}
@@ -168,6 +187,10 @@ if (is.character(cn_transform)) {
168
187
  }
169
188
  {% endif %}
170
189
 
190
+ seg <- seg[
191
+ !is.na(seg$seg.mean) & !is.na(seg$TCN) & !is.infinite(seg$seg.mean) & !is.infinite(seg$TCN),,
192
+ drop=FALSE]
193
+
171
194
  write.table(seg, file.path(outdir, "seg.txt"), sep="\t", quote=F, row.names=F, col.names=T)
172
195
 
173
196
  wgd_ploidy = checkIfWGD(
@@ -52,8 +52,11 @@ if (!is.null(group_cols)) {
52
52
 
53
53
  if (!is.null(metafile)) {
54
54
  metadf = read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
55
- sample_col = colnames(metadf)[1]
56
- colnames(metadf)[1] = "Sample"
55
+ if (!is.null(metadf$Sample)) {
56
+ metadf$Sample = as.character(metadf$Sample)
57
+ } else {
58
+ colnames(metadf)[1] = "Sample"
59
+ }
57
60
  metadf = metadf[metadf$Sample %in% sams, c("Sample", meta_cols), drop=FALSE]
58
61
  if (nrow(metadf) != length(sams)) {
59
62
  stop(paste("Not all samples in metafile:", paste(setdiff(sams, metadf$Sample), collapse=", ")))
@@ -11,11 +11,27 @@ if (is.character(segmean_transform)) {
11
11
  segmean_transform = eval(parse(text=segmean_transform))
12
12
  } # otherwise NULL
13
13
 
14
- segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
15
- seg = data.frame(
16
- chrom = segments[, chrom_col],
17
- log2 = segments[, seg_col]
18
- )
14
+
15
+ if (endsWith(segfile, ".vcf") || endsWith(segfile, ".vcf.gz")) {
16
+ library(VariantAnnotation)
17
+ segments = readVcf(segfile)
18
+ seg = data.frame(
19
+ chrom = as.character(seqnames(segments)),
20
+ log2 = segments@info[[seg_col]]
21
+ )
22
+ } else if (endsWith(segfile, ".bed")) {
23
+ segments = read.table(segfile, header=F, row.names=NULL, sep="\t", stringsAsFactors=F)
24
+ seg = data.frame(
25
+ chrom = segments[, 1],
26
+ log2 = segments[, 5]
27
+ )
28
+ } else {
29
+ segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
30
+ seg = data.frame(
31
+ chrom = segments[, chrom_col],
32
+ log2 = segments[, seg_col]
33
+ )
34
+ }
19
35
  rm(segments)
20
36
 
21
37
  if (!is.null(excl_chroms) && length(excl_chroms) > 0) {
@@ -49,8 +49,12 @@ if (!is.null(group_cols)) {
49
49
  data = data.frame(Sample = sams, tMAD = tmads)
50
50
  if (file.exists(metafile) && length(meta_cols) > 0) {
51
51
  metadf = read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
52
- sample_col = colnames(metadf)[1]
53
- meta = metadf[, c(sample_col, meta_cols), drop=FALSE]
52
+ if (!is.null(metadf$Sample)) {
53
+ metadf$Sample = as.character(metadf$Sample)
54
+ } else {
55
+ colnames(metadf)[1] = "Sample"
56
+ }
57
+ meta = metadf[, c("Sample", meta_cols), drop=FALSE]
54
58
  colnames(meta) = c("Sample", meta_cols)
55
59
  data = data %>% left_join(meta, by="Sample")
56
60
  }
@@ -1,3 +1,4 @@
1
+ from pathlib import Path
1
2
  from biopipen.utils.misc import run_command, dict_to_cli_args
2
3
 
3
4
  excfiles = {{in.excfiles | repr}} # pyright: ignore
@@ -12,7 +13,7 @@ def main():
12
13
  "": [cnvkit, "access"],
13
14
  "s": min_gap_size,
14
15
  "o": outfile,
15
- "_": reffile,
16
+ "_": Path(reffile).expanduser(),
16
17
  }
17
18
  if excfiles:
18
19
  other_args["exclude"] = excfiles
@@ -1,3 +1,4 @@
1
+ from pathlib import Path
1
2
  from biopipen.utils.misc import run_command, dict_to_cli_args
2
3
 
3
4
  bamfiles = {{in.bamfiles | repr}} # pyright: ignore
@@ -20,7 +21,7 @@ short_names = {{envs.short_names | repr}} # pyright: ignore
20
21
  def main():
21
22
 
22
23
  args = dict(
23
- f=reffile,
24
+ f=Path(reffile).expanduser(),
24
25
  m=method,
25
26
  g=accfile,
26
27
  t=baitfile,
@@ -29,7 +30,7 @@ def main():
29
30
  target_min_size=target_min_size,
30
31
  antitarget_max_size=antitarget_max_size,
31
32
  antitarget_min_size=antitarget_min_size,
32
- annotate=annotate,
33
+ annotate=Path(annotate).expanduser(),
33
34
  short_names=short_names,
34
35
  target_output_bed=target_file,
35
36
  antitarget_output_bed=antitarget_file,
@@ -42,7 +42,7 @@ def gen_access():
42
42
  exclude=access_excludes or False,
43
43
  s=access_min_gap_size or False,
44
44
  o=accessfile,
45
- _=ref,
45
+ _=Path(ref).expanduser(),
46
46
  )
47
47
  args[""] = [cnvkit, "access"]
48
48
  run_command(dict_to_cli_args(args, dashify=True), fg=True)
@@ -1,3 +1,4 @@
1
+ from pathlib import Path
1
2
  from biopipen.utils.misc import run_command, dict_to_cli_args
2
3
 
3
4
  bamfile = {{in.bamfile | quote}} # pyright: ignore
@@ -13,7 +14,7 @@ ncores = {{envs.ncores | repr}} # pyright: ignore
13
14
  def main():
14
15
 
15
16
  args = dict(
16
- f=reffile,
17
+ f=Path(reffile).expanduser(),
17
18
  c=count,
18
19
  q=min_mapq,
19
20
  p=ncores,
@@ -60,7 +60,7 @@ params.update({
60
60
  "o": targetfile,
61
61
  "c": covfile,
62
62
  "p": ncores,
63
- "f": ref,
63
+ "f": Path(ref).expanduser(),
64
64
  "s": samtools,
65
65
  "_": bamfiles,
66
66
  })
@@ -4,7 +4,7 @@ from diot import Diot
4
4
 
5
5
  from biopipen.utils.misc import run_command, dict_to_cli_args
6
6
 
7
- segfiles = {{in.segfiles | repr}} # pyright: ignore
7
+ segfiles = {{in.segfiles | repr}} # pyright: ignore # noqa
8
8
  sample_sex = {{in.sample_sex | repr}} # pyright: ignore
9
9
  outdir = {{out.outdir | repr}} # pyright: ignore
10
10
  cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
@@ -1,3 +1,4 @@
1
+ from pathlib import Path
1
2
  from biopipen.utils.misc import run_command, dict_to_cli_args
2
3
 
3
4
  covfiles = {{in.covfiles | repr}} # pyright: ignore
@@ -18,7 +19,7 @@ no_rmask = {{envs.no_rmask | repr}} # pyright: ignore
18
19
  def main():
19
20
 
20
21
  args = dict(
21
- f=reffile,
22
+ f=Path(reffile).expanduser(),
22
23
  o=outfile,
23
24
  c=cluster,
24
25
  min_cluster_size=min_cluster_size,
@@ -0,0 +1,65 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
2
+ source("{{biopipen_dir}}/utils/gene.R")
3
+
4
+ infile <- {{in.infile | quote}}
5
+ outfile <- {{out.outfile | quote}}
6
+ notfound <- {{envs.notfound | r}}
7
+ genecol <- {{envs.genecol | r}}
8
+ output <- {{envs.output | r}}
9
+ dup <- {{envs.dup | r}}
10
+ infmt <- {{envs.infmt | r}}
11
+ outfmt <- {{envs.outfmt | r}}
12
+ species <- {{envs.species | r}}
13
+
14
+ if (is.na(notfound)) {
15
+ notfound = "na"
16
+ }
17
+
18
+ df <- read.table(infile, header=TRUE, sep="\t", check.names=FALSE)
19
+
20
+ if (genecol == 0) {
21
+ log_warn("envs.genecol should be 1-based, but 0 was given. Using 1 instead.")
22
+ genecol <- 1
23
+ }
24
+
25
+ if (is.numeric(genecol)) { genecol <- colnames(df)[genecol] }
26
+ if (dup == "combine") { dup <- ";" }
27
+
28
+ genes <- df[[genecol]]
29
+ converted <- gene_name_conversion(
30
+ genes=genes,
31
+ species=species,
32
+ infmt=infmt,
33
+ outfmt=outfmt,
34
+ notfound=notfound,
35
+ dup=dup
36
+ )
37
+ # <genecol> <outfmt>
38
+ # 1 1255_g_at GUCA1A
39
+ # 2 1316_at THRA
40
+ # 3 1320_at PTPN21
41
+ # 4 1294_at MIR5193
42
+
43
+ # order the converted dataframe by the original gene column
44
+ converted <- converted[order(match(converted$query, genes)), , drop=FALSE]
45
+ outcol <- outfmt
46
+
47
+ if (notfound == "skip" || notfound == "ignore") {
48
+ df <- df[df[[genecol]] %in% converted$query, , drop=FALSE]
49
+ }
50
+
51
+ if (output == "append") {
52
+ if (outfmt %in% colnames(df)) {
53
+ log_warn("The output column name already exists in the input dataframe. Appending with a suffix `_1`.")
54
+ outcol <- paste(outfmt, "_1", sep="")
55
+ }
56
+ df[[outcol]] <- converted[[outfmt]]
57
+ } else if (output == "replace") {
58
+ df[[genecol]] <- converted[[outfmt]]
59
+ } else if (output == "with-query") {
60
+ df <- converted
61
+ } else {
62
+ df <- converted[, outfmt, drop=FALSE]
63
+ }
64
+
65
+ write.table(df, file=outfile, sep="\t", quote=FALSE, row.names=FALSE)
@@ -0,0 +1,61 @@
1
+ library(rlang)
2
+ library(rtracklayer)
3
+
4
+ infile <- {{in.infile | r}}
5
+ outfile <- {{out.outfile | r}}
6
+ up <- {{envs.up | r}}
7
+ down <- {{envs.down | r}}
8
+ notfound <- {{envs.notfound | r}}
9
+ refgene <- {{envs.refgene | r}}
10
+ header <- {{envs.header | r}}
11
+ genecol <- {{envs.genecol | r}}
12
+ match_id <- {{envs.match_id | r}}
13
+ sort_ <- {{envs.sort | r}}
14
+ chrsize <- {{envs.chrsize | r}}
15
+
16
+ down <- down %||% up
17
+
18
+ refgenes <- readGFF(refgene)
19
+ refcol <- ifelse(match_id, "gene_id", "gene_name")
20
+
21
+ if (infile == "/dev/null") {
22
+ genes <- unique(refgenes[[refcol]])
23
+ } else {
24
+ data <- read.table(infile, header=header, sep="\t", stringsAsFactors=FALSE, check.names=FALSE)
25
+ genes <- data[[genecol]]
26
+ rm(data)
27
+ }
28
+
29
+ notfound_genes <- setdiff(genes, refgenes[[refcol]])
30
+ if (notfound == "error" && length(notfound_genes) > 0) {
31
+ stop(paste(
32
+ "The following genes were not found in the reference annotation:",
33
+ paste(notfound_genes, collapse=", ")
34
+ ))
35
+ } else if (notfound == 'skip') {
36
+ genes <- genes[!genes %in% notfound_genes]
37
+ }
38
+
39
+ # Select the genes that are in the reference annotation and keep the order
40
+ # of the records in genes
41
+ refgenes <- refgenes[match(genes, refgenes[[refcol]]), , drop = FALSE]
42
+ refgenes <- unique(makeGRangesFromDataFrame(refgenes, keep.extra.columns=TRUE))
43
+
44
+ proms <- promoters(refgenes, up=up, down=down)
45
+ # Scores must be non-NA numeric values
46
+ elementMetadata(proms)$name <- elementMetadata(proms)[[refcol]]
47
+ score(proms) <- 0
48
+ start(proms) <- pmax(1, start(proms))
49
+
50
+ if (sort_) {
51
+ chrom_sizes <- read.table(chrsize, header=FALSE, stringsAsFactors=FALSE, sep="\t")
52
+ common_chroms <- intersect(chrom_sizes$V1, seqlevels(proms))
53
+ if (length(common_chroms) == 0) {
54
+ stop("No common chromosomes found between the promoters and the chromosome sizes. Do you use the correct chromosome sizes file?")
55
+ }
56
+ proms <- keepSeqlevels(proms, common_chroms, pruning.mode="coarse")
57
+ seqlevels(proms) <- common_chroms
58
+ proms <- sort(proms, ignore.strand = TRUE)
59
+ }
60
+
61
+ export.bed(proms, outfile)
@@ -0,0 +1,15 @@
1
+ # shellcheck disable=all
2
+ export infile={{in.infile | quote}}
3
+ export outfile={{out.outfile | quote}}
4
+ is_outdir={{envs.outdir | int}}
5
+ cmd_given={{envs.cmd | bool | int}}
6
+ {% set _ = out.outfile | dirname | joinpath: "cmd.sh" | as_path | attr: 'write_text' | call: envs.cmd %}
7
+ cmd="{{proc.lang}} {{out.outfile | dirname | joinpath: 'cmd.sh'}}"
8
+ if [[ "$cmd_given" -eq 0 ]]; then
9
+ echo "No command given." 1>&2
10
+ exit 1
11
+ fi
12
+ if [[ $is_outdir -eq 1 ]]; then
13
+ mkdir -p "$outfile"
14
+ fi
15
+ eval "$cmd"
@@ -0,0 +1,140 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
2
+ library(rlang)
3
+ library(ggmanh)
4
+
5
+ infile <- {{in.infile | r}}
6
+ outfile <- {{out.outfile | r}}
7
+ chrom_col <- {{envs.chrom_col | r}}
8
+ pos_col <- {{envs.pos_col | r}}
9
+ pval_col <- {{envs.pval_col | r}}
10
+ label_col <- {{envs.label_col | r}}
11
+ devpars <- {{envs.devpars | r}}
12
+ title <- {{envs.title | r}}
13
+ ylabel <- {{envs.ylabel | r}}
14
+ rescale <- {{envs.rescale | r}}
15
+ rescale_ratio_threshold <- {{envs.rescale_ratio_threshold | r}}
16
+ signif <- {{envs.signif | r}}
17
+ hicolors <- {{envs.hicolors | r}}
18
+ thin_n <- {{envs.thin_n | r}}
19
+ thin_bins <- {{envs.thin_bins | r}}
20
+ zoom <- {{envs.zoom | r}}
21
+ zoom_devpars <- {{envs.zoom_devpars | r}}
22
+ chroms <- {{envs.chroms | r}}
23
+ args <- {{envs.args | r: todot="-"}}
24
+
25
+ data <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
26
+
27
+ # normalize columns
28
+ cnames <- colnames(data)
29
+ if (is.numeric(chrom_col)) { chrom_col <- cnames[chrom_col] }
30
+ if (is.numeric(pos_col)) { pos_col <- cnames[pos_col] }
31
+ if (is.numeric(pval_col)) { pval_col <- cnames[pval_col] }
32
+ if (is.numeric(label_col)) { label_col <- cnames[label_col] }
33
+
34
+ # normalize chroms
35
+ norm_chroms <- function(chrs) {
36
+ chrs <- as.character(chrs)
37
+ if (length(chrs) == 1 && grepl(",", chrs)) {
38
+ chrs <- trimws(unlist(strsplit(chrs, ",")))
39
+ }
40
+ if (length(chrs) > 1) {
41
+ return(unique(unlist(sapply(chrs, function(chr) norm_chroms(chr)))))
42
+ }
43
+ if (!grepl("-", chrs)) { return(chrs) }
44
+
45
+ # expand chr1-22 -> chr1, chr2, ..., chr22
46
+ # chr1-22 -> 'chr1', '22'
47
+ chrs <- unlist(strsplit(chrs, "-"))
48
+ if (length(chrs) != 2) {
49
+ stop(paste0("Invalid chroms: ", chrs))
50
+ }
51
+ # detect prefix
52
+ prefix1 <- gsub("[0-9]", "", chrs[1])
53
+ prefix2 <- gsub("[0-9]", "", chrs[2])
54
+ if (nchar(prefix2) > 0 && prefix1 != prefix2) {
55
+ stop(paste0("Invalid chroms: ", chrs, " (prefix mismatch)"))
56
+ }
57
+ chr_a <- as.integer(substring(chrs[1], nchar(prefix1) + 1))
58
+ chr_b <- as.integer(substring(chrs[2], nchar(prefix2) + 1))
59
+ chr_min <- min(chr_a, chr_b)
60
+ chr_max <- max(chr_a, chr_b)
61
+ return(paste0(prefix1, chr_min:chr_max))
62
+ }
63
+
64
+ log_info("Preparing data for plotting ...")
65
+ if (length(chroms) == 1 && chroms == "auto") {
66
+ chroms <- unique(data[[chrom_col]])
67
+ } else {
68
+ chroms <- norm_chroms(chroms)
69
+ }
70
+
71
+ # prepare data
72
+ mp_prep_args = list()
73
+ if (length(signif) == 1 && is.character(signif)) {
74
+ signif <- as.numeric(trimws(unlist(strsplit(signif, ","))))
75
+ }
76
+ siglevel <- min(signif)
77
+ if (!is.null(label_col)) {
78
+ data$.label <- ifelse(data[[pval_col]] < siglevel, data[[label_col]], "")
79
+ }
80
+ if (!is.null(hicolors)) {
81
+ sig_str <- "Significant"
82
+ nsig_str <- "Not significant"
83
+ data$.highlight <- ifelse(data[[pval_col]] < siglevel, sig_str, nsig_str)
84
+ if (length(hicolors) == 1) { hicolors <- c(hicolors, "grey") }
85
+ names(hicolors) <- c(sig_str, nsig_str)
86
+ mp_prep_args$highlight.colname <- ".highlight"
87
+ mp_prep_args$highlight.col <- hicolors
88
+ }
89
+ mp_prep_args$x <- data
90
+ mp_prep_args$chr.colname <- chrom_col
91
+ mp_prep_args$pos.colname <- pos_col
92
+ mp_prep_args$pval.colname <- pval_col
93
+ mp_prep_args$chr.order <- chroms
94
+ if (!is.null(thin_n) && thin_n > 0) {
95
+ mp_prep_args$thin.n <- thin_n
96
+ mp_prep_args$thin.bins <- thin_bins
97
+ }
98
+
99
+ mpdata <- do_call(manhattan_data_preprocess, mp_prep_args)
100
+
101
+ # plot
102
+ log_info("Plotting Manhattan plot ...")
103
+ args$x <- mpdata
104
+ args$signif <- signif
105
+ args$plot.title <- title
106
+ args$rescale <- rescale
107
+ args$rescale.ratio.threshold <- rescale_ratio_threshold
108
+ if (!is.null(hicolors)) { args$color.by.highlight <- TRUE }
109
+ if (!is.null(label_col)) { args$label.colname <- ".label" }
110
+ g <- do_call(manhattan_plot, args)
111
+
112
+ png(outfile, width=devpars$width, height=devpars$height, res=devpars$res)
113
+ print(g)
114
+ dev.off()
115
+
116
+ # zoom into chromosomes
117
+ if (!is.null(zoom)) {
118
+ log_info("Zooming into chromosomes ...")
119
+ zoom <- norm_chroms(zoom)
120
+ for (z in zoom) {
121
+ log_info("- {z}")
122
+ args_z <- args
123
+ args_z$chromosome <- z
124
+ args_z$plot.title <- paste0(title, " (", z, ")")
125
+ args_z$x.label <- "Position"
126
+ g_z <- do_call(manhattan_plot, args_z)
127
+ outfile_z <- gsub("\\.png$", paste0("-", z, ".png"), outfile)
128
+ zm_devpars <- zoom_devpars
129
+ zm_devpars$res <- zm_devpars$res %||% devpars$res
130
+ zm_devpars$height <- zm_devpars$height %||% devpars$height
131
+ png(
132
+ outfile_z,
133
+ width=zm_devpars$width,
134
+ height=zm_devpars$height,
135
+ res=zm_devpars$res
136
+ )
137
+ print(g_z)
138
+ dev.off()
139
+ }
140
+ }
@@ -0,0 +1,62 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
2
+
3
+ library(ggplot2)
4
+ library(ggprism)
5
+ library(qqplotr)
6
+
7
+ theme_set(theme_prism())
8
+
9
+ infile <- {{in.infile | r}}
10
+ outfile <- {{out.outfile | r}}
11
+ val_col <- {{envs.val_col | r}}
12
+ devpars <- {{envs.devpars | r}}
13
+ title <- {{envs.title | r}}
14
+ xlabel <- {{envs.xlabel | r}}
15
+ ylabel <- {{envs.ylabel | r}}
16
+ kind <- {{envs.kind | r}}
17
+ trans <- {{envs.trans | r}}
18
+ band_args <- {{envs.band | r}}
19
+ line_args <- {{envs.line | r}}
20
+ point_args <- {{envs.point | r}}
21
+ ggs <- {{envs.ggs | r}}
22
+
23
+ indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
24
+ if (is.numeric(val_col)) { val_col <- colnames(indata)[val_col] }
25
+
26
+ band_fun <- ifelse(kind == "pp", stat_pp_band, stat_qq_band)
27
+ line_fun <- ifelse(kind == "pp", stat_pp_line, stat_qq_line)
28
+ point_fun <- ifelse(kind == "pp", stat_pp_point, stat_qq_point)
29
+
30
+ title <- title %||% waiver()
31
+ xlabel <- xlabel %||% waiver()
32
+ ylabel <- ylabel %||% waiver()
33
+
34
+ if (!is.null(trans)) {
35
+ trans <- trimws(trans)
36
+ if (trans == "-log10") {
37
+ trans <- function(x) -log10(x)
38
+ } else {
39
+ trans <- eval(parse(text = trans))
40
+ }
41
+
42
+ indata$.trans_val <- trans(indata[[val_col]])
43
+ val_col <- ".trans_val"
44
+ }
45
+
46
+ indata <- indata[!is.na(indata[[val_col]]), , drop=FALSE]
47
+
48
+ p <- ggplot(data = indata, mapping = aes(sample = !!sym(val_col))) +
49
+ do_call(band_fun, band_args) +
50
+ do_call(line_fun, line_args) +
51
+ do_call(point_fun, point_args) +
52
+ labs(title = title, x = xlabel, y = ylabel)
53
+
54
+ if (!is.null(ggs)) {
55
+ for (gg in ggs) {
56
+ p <- p + eval(parse(text = gg))
57
+ }
58
+ }
59
+
60
+ png(outfile, width=devpars$width, height=devpars$height, res=devpars$res)
61
+ print(p)
62
+ dev.off()