biopipen 0.28.1__py3-none-any.whl → 0.29.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (82) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +8 -0
  3. biopipen/ns/bam.py +0 -2
  4. biopipen/ns/bed.py +35 -0
  5. biopipen/ns/cellranger_pipeline.py +5 -5
  6. biopipen/ns/cnv.py +18 -2
  7. biopipen/ns/cnvkit_pipeline.py +16 -11
  8. biopipen/ns/gene.py +68 -23
  9. biopipen/ns/misc.py +2 -15
  10. biopipen/ns/plot.py +146 -0
  11. biopipen/ns/regulation.py +214 -0
  12. biopipen/ns/scrna.py +15 -3
  13. biopipen/ns/snp.py +516 -8
  14. biopipen/ns/stats.py +74 -2
  15. biopipen/ns/vcf.py +196 -0
  16. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  17. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  18. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  19. biopipen/reports/snp/PlinkHet.svelte +18 -0
  20. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  21. biopipen/scripts/bam/CNVpytor.py +144 -46
  22. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  23. biopipen/scripts/bed/BedtoolsMerge.py +1 -1
  24. biopipen/scripts/cnv/AneuploidyScore.R +30 -7
  25. biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
  26. biopipen/scripts/cnv/TMADScore.R +21 -5
  27. biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
  28. biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
  29. biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
  30. biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
  31. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
  32. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
  33. biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
  34. biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
  35. biopipen/scripts/gene/GeneNameConversion.R +65 -0
  36. biopipen/scripts/gene/GenePromoters.R +61 -0
  37. biopipen/scripts/misc/Shell.sh +15 -0
  38. biopipen/scripts/plot/Manhattan.R +140 -0
  39. biopipen/scripts/plot/QQPlot.R +62 -0
  40. biopipen/scripts/regulation/MotifAffinityTest.R +226 -0
  41. biopipen/scripts/regulation/MotifAffinityTest_AtSNP.R +126 -0
  42. biopipen/scripts/regulation/MotifAffinityTest_MotifBreakR.R +96 -0
  43. biopipen/scripts/regulation/MotifScan.py +159 -0
  44. biopipen/scripts/regulation/atSNP.R +33 -0
  45. biopipen/scripts/regulation/motifBreakR.R +1594 -0
  46. biopipen/scripts/scrna/MarkersFinder.R +59 -67
  47. biopipen/scripts/scrna/SeuratClustering.R +63 -29
  48. biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
  49. biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
  50. biopipen/scripts/snp/MatrixEQTL.R +84 -43
  51. biopipen/scripts/snp/Plink2GTMat.py +133 -0
  52. biopipen/scripts/snp/PlinkCallRate.R +190 -0
  53. biopipen/scripts/snp/PlinkFilter.py +100 -0
  54. biopipen/scripts/snp/PlinkFreq.R +298 -0
  55. biopipen/scripts/snp/PlinkFromVcf.py +78 -0
  56. biopipen/scripts/snp/PlinkHWE.R +80 -0
  57. biopipen/scripts/snp/PlinkHet.R +92 -0
  58. biopipen/scripts/snp/PlinkIBD.R +197 -0
  59. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  60. biopipen/scripts/stats/MetaPvalue.R +2 -1
  61. biopipen/scripts/stats/MetaPvalue1.R +70 -0
  62. biopipen/scripts/tcr/TCRClusterStats.R +12 -7
  63. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  64. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  65. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  66. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  67. biopipen/scripts/vcf/VcfFix_utils.py +1 -1
  68. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  69. biopipen/utils/gene.R +83 -37
  70. biopipen/utils/gene.py +108 -60
  71. biopipen/utils/misc.R +56 -0
  72. biopipen/utils/misc.py +5 -2
  73. biopipen/utils/reference.py +54 -10
  74. {biopipen-0.28.1.dist-info → biopipen-0.29.0.dist-info}/METADATA +2 -2
  75. {biopipen-0.28.1.dist-info → biopipen-0.29.0.dist-info}/RECORD +77 -49
  76. {biopipen-0.28.1.dist-info → biopipen-0.29.0.dist-info}/entry_points.txt +1 -1
  77. biopipen/ns/bcftools.py +0 -111
  78. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  79. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  80. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  81. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  82. {biopipen-0.28.1.dist-info → biopipen-0.29.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,197 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
2
+ source("{{biopipen_dir}}/utils/plot.R")
3
+ suppressPackageStartupMessages({
4
+ library(dplyr)
5
+ library(tidyr)
6
+ library(tibble)
7
+ })
8
+
9
+ indir <- {{in.indir | r}}
10
+ outdir <- {{out.outdir | r}}
11
+ plink <- {{envs.plink | r}}
12
+ indep <- {{envs.indep | r}}
13
+ highld <- {{envs.highld | r}}
14
+ devpars <- {{envs.devpars | r}}
15
+ pihat <- {{envs.pihat | r}}
16
+ samid <- {{envs.samid | r}}
17
+ annofile <- {{envs.anno | r}}
18
+ doplot <- {{envs.plot | r}}
19
+ seed <- {{envs.seed | r}}
20
+ ncores <- {{envs.ncores | r}}
21
+
22
+ bedfile <- Sys.glob(file.path(indir, '*.bed'))
23
+ if (length(bedfile) == 0)
24
+ stop("No bed files found in the input directory.")
25
+ if (length(bedfile) > 1) {
26
+ log_warn("Multiple bed files found in the input directory. Using the first one.")
27
+ bedfile <- bedfile[1]
28
+ }
29
+ input <- tools::file_path_sans_ext(bedfile)
30
+ output <- file.path(outdir, basename(input))
31
+
32
+ cmd <- c(
33
+ plink,
34
+ "--threads", ncores,
35
+ "--bfile", input,
36
+ "--indep-pairwise", indep,
37
+ # One should be mindful of running this with < 50 samples
38
+ # "--bad-ld",
39
+ "--out", output
40
+ )
41
+ if (!is.null(highld) && !isFALSE(highld)) {
42
+ cmd <- c(cmd, "--range", "--exclude", highld)
43
+ }
44
+ run_command(cmd, fg = TRUE)
45
+
46
+ prunein <- paste0(output, '.prune.in')
47
+ cmd <- c(
48
+ plink,
49
+ "--threads", ncores,
50
+ "--bfile", input,
51
+ "--extract", prunein,
52
+ "--genome",
53
+ "--out", output
54
+ )
55
+ run_command(cmd, fg = TRUE)
56
+
57
+ genome <- read.table(
58
+ paste0(output, '.genome'),
59
+ row.names = NULL,
60
+ header = TRUE,
61
+ check.names = FALSE
62
+ )
63
+ # "unmelt" it
64
+ # FID1 IID1 FID2 IID2 RT EZ Z0 Z1 Z2 PI_HAT PHE DST PPC RATIO
65
+ # s1 s1 s2 s2 UN NA 1.0000 0.0000 0.0000 0.0000 -1 0.866584 0.0000 0.9194
66
+ # s1 s1 s2 s2 UN NA 0.4846 0.3724 0.1431 0.3293 -1 0.913945 0.7236 2.0375
67
+ # s1 s1 s3 s3 UN NA 1.0000 0.0000 0.0000 0.0000 -1 0.867186 0.0000 1.0791
68
+ genome$SAMPLE1 <- paste(genome$FID1, genome$IID1, sep = "\t")
69
+ genome$SAMPLE2 <- paste(genome$FID2, genome$IID2, sep = "\t")
70
+
71
+
72
+ # get all samples
73
+ samples <- unique(c(genome$SAMPLE1, genome$SAMPLE2))
74
+ # make paired into a distance-like matrix
75
+ similarity <- genome %>%
76
+ select(SAMPLE1, SAMPLE2, PI_HAT) %>%
77
+ pivot_wider(names_from = SAMPLE2, values_from = PI_HAT, values_fill = NA) %>%
78
+ as.data.frame() %>%
79
+ column_to_rownames("SAMPLE1")
80
+ rm(genome)
81
+ # get the rownames back
82
+ samids <- rownames(similarity)
83
+ # get samples that didn't involved
84
+ missedrow <- setdiff(samples, rownames(similarity))
85
+ missedcol <- setdiff(samples, colnames(similarity))
86
+ similarity[missedrow, ] <- NA
87
+ similarity[, missedcol] <- NA
88
+ # order the matrix
89
+ similarity <- similarity[samples, samples, drop = FALSE]
90
+ # transpose the matrix to get the symmetric values
91
+ sim2 <- t(similarity)
92
+ isna <- is.na(similarity)
93
+ # fill the na's with their symmetric values
94
+ similarity[isna] <- sim2[isna]
95
+ rm(sim2)
96
+ # still missing: keep them
97
+ similarity[is.na(similarity)] <- 0
98
+ # get the marks (samples that fail the pihat cutoff)
99
+ nsams <- length(samples)
100
+ fails <- which(similarity > pihat)
101
+ marks <- data.frame(x = (fails - 1)%%nsams + 1, y = ceiling(fails/nsams))
102
+ diag(similarity) <- 1
103
+
104
+ failflags <- rep(F, nrow(marks))
105
+ freqs <- as.data.frame(table(factor(as.matrix(marks))))
106
+ freqs <- freqs[order(freqs$Freq, decreasing = T), 'Var1', drop = T]
107
+ ibd.fail <- c()
108
+ while (sum(failflags) < nrow(marks)) {
109
+ samidx <- freqs[1]
110
+ ibd.fail <- c(ibd.fail, samples[samidx])
111
+ freqs <- freqs[-1]
112
+ sapply(1:nrow(marks), function(i) {
113
+ if (samidx %in% marks[i,])
114
+ failflags[i] <<- TRUE
115
+ })
116
+ }
117
+
118
+ ibd_fail_file <- paste0(output, '.ibd.fail')
119
+ writeLines(ibd.fail, ibd_fail_file)
120
+ cmd <- c(
121
+ plink,
122
+ "--threads", ncores,
123
+ "--bfile", input,
124
+ "--remove", ibd_fail_file,
125
+ "--make-bed",
126
+ "--out", output
127
+ )
128
+ run_command(cmd, fg = TRUE)
129
+
130
+ if (doplot) {
131
+ set.seed(seed)
132
+ library(ComplexHeatmap)
133
+ fontsize8 <- gpar(fontsize = 8)
134
+ fontsize9 <- gpar(fontsize = 9)
135
+ ht_opt$heatmap_row_names_gp <- fontsize8
136
+ ht_opt$heatmap_column_names_gp <- fontsize8
137
+ ht_opt$legend_title_gp <- fontsize9
138
+ ht_opt$legend_labels_gp <- fontsize8
139
+ ht_opt$simple_anno_size <- unit(3, "mm")
140
+
141
+ samids <- sapply(samples, function(sid) {
142
+ fidiid <- unlist(strsplit(sid, "\t", fixed = TRUE))
143
+ gsub(
144
+ "{fid}",
145
+ fidiid[1],
146
+ gsub("{iid}", fidiid[2], samid, fixed = TRUE),
147
+ fixed = TRUE
148
+ )
149
+ })
150
+ rownames(similarity) <- samids
151
+ colnames(similarity) <- samids
152
+
153
+ annos <- list()
154
+ if (!is.null(annofile) && !isFALSE(annofile)) {
155
+ options(stringsAsFactors = TRUE)
156
+ andata <- read.table(annofile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)
157
+ andata <- andata[samids, , drop = FALSE]
158
+ for (anname in colnames(andata)) {
159
+ annos[[anname]] <- as.matrix(andata[, anname])
160
+ }
161
+ annos$annotation_name_gp <- fontsize8
162
+ annos <- do.call(HeatmapAnnotation, annos)
163
+ }
164
+
165
+ args <- list(
166
+ name = "PI_HAT",
167
+ cell_fun = function(j, i, x, y, width, height, fill) {
168
+ if (similarity[i, j] > pihat && i != j)
169
+ grid.points(x, y, pch = 4, size = unit(.5, "char"))
170
+ },
171
+ #heatmap_legend_param = list(
172
+ # title_gp = fontsize9,
173
+ # labels_gp = fontsize8
174
+ #),
175
+ clustering_distance_rows = function(m) as.dist(1-m),
176
+ clustering_distance_columns = function(m) as.dist(1-m),
177
+ top_annotation = if (length(annos) == 0) NULL else annos
178
+ )
179
+
180
+ plotHeatmap(
181
+ similarity,
182
+ outfile = paste0(output, '.ibd.png'),
183
+ args = args,
184
+ draw = list(
185
+ annotation_legend_list = list(
186
+ Legend(
187
+ labels = paste(">", pihat),
188
+ title = "",
189
+ type = "points",
190
+ pch = 4,
191
+ title_gp = fontsize9,
192
+ labels_gp = fontsize8)),
193
+ merge_legend = TRUE
194
+ ),
195
+ devpars = devpars
196
+ )
197
+ }
@@ -0,0 +1,124 @@
1
+ from pathlib import Path
2
+ from biopipen.utils.misc import run_command, dict_to_cli_args, logger
3
+
4
+ indir = {{in.indir | repr}} # pyright: ignore # noqa: #999
5
+ namefile = {{in.namefile | repr}} # pyright: ignore
6
+ outdir = {{out.outdir | repr}} # pyright: ignore
7
+ plink = {{envs.plink | repr}} # pyright: ignore
8
+ bcftools = {{envs.bcftools | repr}} # pyright: ignore
9
+ ncores = {{envs.ncores | repr}} # pyright: ignore
10
+ match_alt = {{envs.match_alt | repr}} # pyright: ignore
11
+
12
+ bedfile = list(Path(indir).glob("*.bed"))
13
+ if len(bedfile) == 0:
14
+ raise FileNotFoundError(f"No .bed file found in `in.indir`")
15
+ elif len(bedfile) > 1:
16
+ logger.warning(f"Multiple .bed files found in `in.indir`, using the first one.")
17
+
18
+ bedfile = bedfile[0]
19
+ input = bedfile.with_suffix("")
20
+ output = Path(outdir) / bedfile.stem
21
+
22
+ if namefile.endswith(".vcf") or namefile.endswith(".vcf.gz"):
23
+ logger.info("VCF file received, extracting names")
24
+ def alt_matched(bim_alt, vcf_alt, match_alt):
25
+ if match_alt == "none":
26
+ return True
27
+ if match_alt == "exact":
28
+ return bim_alt == vcf_alt
29
+
30
+ bim_alts = bim_alt.split(",")
31
+ vcf_alts = vcf_alt.split(",")
32
+ if match_alt == "all":
33
+ return set(bim_alts) == set(vcf_alts)
34
+ if match_alt == "any":
35
+ return bool(set(bim_alts) & set(vcf_alts))
36
+ if match_alt == "first_included":
37
+ return bim_alts[0] in vcf_alts
38
+ if match_alt == "first":
39
+ return bim_alts[0] == vcf_alts[0]
40
+
41
+ raise ValueError(f"Unknown match_alt: {match_alt}")
42
+
43
+ def readline(f):
44
+ line = f.readline().strip()
45
+ return line.split("\t") if line else None
46
+
47
+ namefile_tmp = Path(outdir) / "_namefile_from_vcf.txt"
48
+ infofile = Path(outdir) / "_information_from_vcf_unsorted.txt"
49
+ sorted_infofile = Path(outdir) / "_information_from_vcf_sorted.txt"
50
+ sorted_bim = Path(outdir) / "_sorted_bim.txt"
51
+ bt_cmd = [
52
+ bcftools, "query",
53
+ "-f", "%CHROM\\t%ID\\t0\\t%POS\\t%ALT\\t%REF\\n",
54
+ "-o", infofile,
55
+ namefile,
56
+ ]
57
+ ## infofile
58
+ # 1 rs10492 0 10492 T C
59
+ logger.info("- Extracting information from VCF file ...")
60
+ run_command(bt_cmd, fg=True)
61
+ # sort infofile
62
+ logger.info("- Sorting the information from VCF file ...")
63
+ run_command(
64
+ [
65
+ "sort",
66
+ "-k1,1", "-k4,4n", "-k6,6",
67
+ infofile,
68
+ "--parallel", ncores,
69
+ "-o", sorted_infofile
70
+ ],
71
+ env={"LC_ALL": "C"},
72
+ fg=True,
73
+ )
74
+
75
+ ## .bim file
76
+ # 1 1_10492 0 10492 T C
77
+ # sort .bim file
78
+ logger.info("- Sorting the .bim file ...")
79
+ run_command(
80
+ [
81
+ "sort",
82
+ "-k1,1", "-k4,4n", "-k6,6",
83
+ input.with_suffix(".bim"),
84
+ "--parallel", ncores,
85
+ "-o", sorted_bim
86
+ ],
87
+ env={"LC_ALL": "C"},
88
+ fg=True,
89
+ )
90
+ # query namefile for records in sorted bim file
91
+ logger.info("- Matching and generating the name file ...")
92
+ with sorted_bim.open() as fbim, sorted_infofile.open() as finfo, namefile_tmp.open("w") as fout: # noqa: E501
93
+ bim = readline(fbim)
94
+ info = readline(finfo)
95
+ while bim and info:
96
+ if (
97
+ bim[0] == info[0]
98
+ and bim[3] == info[3]
99
+ and bim[5] == info[5]
100
+ and alt_matched(bim[4], info[4], match_alt)
101
+ ):
102
+ fout.write(f"{bim[1]}\t{info[1]}\n")
103
+ bim = readline(fbim)
104
+ info = readline(finfo)
105
+ elif (
106
+ bim[0] < info[0]
107
+ or (bim[0] == info[0] and bim[3] < info[3])
108
+ or (bim[0] == info[0] and bim[3] == info[3] and bim[5] < info[5])
109
+ ):
110
+ bim = readline(fbim)
111
+ else:
112
+ info = readline(finfo)
113
+
114
+ namefile = namefile_tmp
115
+
116
+ args = {
117
+ "": plink,
118
+ "bfile": input,
119
+ "out": output,
120
+ "make_bed": True,
121
+ "update_name": namefile,
122
+ }
123
+
124
+ run_command(dict_to_cli_args(args, dashify=True), fg=True)
@@ -11,6 +11,7 @@ id_exprs <- {{envs.id_exprs | r}}
11
11
  pval_cols <- {{envs.pval_cols | r}}
12
12
  method <- {{envs.method | r}}
13
13
  na <- {{envs.na | r}}
14
+ keep_single <- {{envs.keep_single | r}}
14
15
  padj <- {{envs.padj | r}}
15
16
 
16
17
  if (method == "fisher") { method = "sumlog" }
@@ -102,7 +103,7 @@ if (length(infiles) == 1 && padj == "none") {
102
103
  if (length(ps) == 0) {
103
104
  metaps <- c(metaps, NA)
104
105
  ns <- c(ns, NA)
105
- } else if (length(ps) == 1) {
106
+ } else if (length(ps) == 1 && keep_single) {
106
107
  metaps <- c(metaps, ps)
107
108
  ns <- c(ns, 1)
108
109
  } else {
@@ -0,0 +1,70 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
2
+
3
+ library(metap)
4
+ library(rlang)
5
+ library(dplyr)
6
+
7
+ infile <- {{in.infile | r}}
8
+ outfile <- {{out.outfile | r}}
9
+ id_cols <- {{envs.id_cols | r}}
10
+ pval_col <- {{envs.pval_col | r}}
11
+ method <- {{envs.method | r}}
12
+ na <- {{envs.na | r}}
13
+ keep_single <- {{envs.keep_single | r}}
14
+ padj <- {{envs.padj | r}}
15
+
16
+ if (method == "fisher") { method = "sumlog" }
17
+
18
+ # Check pval_cols
19
+ if (is.null(pval_col)) { stop("Must provide envs.pval_col") }
20
+
21
+ # Check id_cols
22
+ if (is.null(id_cols)) { stop("Must provide envs.id_cols") }
23
+ if (length(id_cols) == 1) {
24
+ id_cols <- trimws(strsplit(id_cols, ",")[[1]])
25
+ }
26
+
27
+ log_info("Reading input and performing meta-analysis ...")
28
+ outdata <- read.table(
29
+ infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE
30
+ ) %>%
31
+ group_by(!!!syms(id_cols)) %>%
32
+ summarise(
33
+ N = n(),
34
+ .pvals = list(!!sym(pval_col)),
35
+ .groups = "drop"
36
+ )
37
+
38
+ metaps <- c()
39
+ ns <- c()
40
+ for (ps in outdata$.pvals) {
41
+ if (na == -1) {
42
+ ps <- ps[!is.na(ps)]
43
+ } else {
44
+ ps[is.na(ps)] <- na
45
+ }
46
+
47
+ if (length(ps) == 0) {
48
+ metaps <- c(metaps, NA)
49
+ ns <- c(ns, NA)
50
+ } else if (length(ps) == 1 && keep_single) {
51
+ metaps <- c(metaps, ps)
52
+ ns <- c(ns, 1)
53
+ } else {
54
+ metaps <- c(metaps, do.call(method, list(ps))$p)
55
+ ns <- c(ns, length(ps))
56
+ }
57
+ }
58
+ outdata$MetaPval <- metaps
59
+ outdata$N <- ns
60
+ outdata$.pvals <- NULL
61
+ outdata <- outdata %>% arrange(MetaPval)
62
+
63
+ if (padj != "none") {
64
+ log_info("Calculating adjusted p-values ...")
65
+ outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
66
+
67
+ }
68
+
69
+ log_info("Writing output ...")
70
+ write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
@@ -130,13 +130,6 @@ shared_clusters = function(name) {
130
130
  row.names=TRUE, col.names=TRUE, quote=FALSE, sep="\t"
131
131
  )
132
132
 
133
- if (is.null(case$heatmap_meta) || length(case$heatmap_meta) == 0) {
134
- anno = NULL
135
- } else {
136
- anno = as.list(immdata$meta[, case$heatmap_meta, drop=FALSE])
137
- anno = do_call(ComplexHeatmap::HeatmapAnnotation, anno)
138
- }
139
-
140
133
  if (!is.null(case$sample_order) && length(case$sample_order) > 0) {
141
134
  if (length(case$sample_order) == 1) {
142
135
  case$sample_order = trimws(strsplit(case$sample_order, ",")[[1]])
@@ -148,6 +141,18 @@ shared_clusters = function(name) {
148
141
  plotdata = plotdata[, case$sample_order, drop=FALSE]
149
142
  }
150
143
 
144
+ if (is.null(case$heatmap_meta) || length(case$heatmap_meta) == 0) {
145
+ anno = NULL
146
+ } else {
147
+ anno = as.list(
148
+ immdata$meta[
149
+ match(colnames(plotdata), immdata$meta$Sample),
150
+ case$heatmap_meta,
151
+ drop=FALSE
152
+ ])
153
+ anno = do_call(ComplexHeatmap::HeatmapAnnotation, anno)
154
+ }
155
+
151
156
  cluster_rows = case$cluster_rows && nrow(plotdata) > 2
152
157
  col_samples = colnames(plotdata)
153
158
  if (!cluster_rows) {
@@ -0,0 +1,91 @@
1
+ from os import path
2
+ from contextlib import suppress
3
+ from pathlib import PosixPath # noqa: F401
4
+
5
+ from biopipen.utils.reference import tabix_index
6
+ from biopipen.utils.misc import logger
7
+ from biopipen.scripts.vcf.bcftools_utils import run_bcftools
8
+
9
+ infile = {{in.infile | repr}} # pyright: ignore # noqa: E999
10
+ annfile = {{in.annfile | repr}} # pyright: ignore
11
+ outfile = {{out.outfile | repr}} # pyright: ignore
12
+ joboutdir = {{job.outdir | repr}} # pyright: ignore
13
+ envs = {{envs | dict | repr}} # pyright: ignore
14
+
15
+ bcftools = envs.pop("bcftools")
16
+ tabix = envs.pop("tabix")
17
+ ncores = envs.pop("ncores")
18
+ columns = envs.pop("columns")
19
+ remove = envs.pop("remove")
20
+ header = envs.pop("header")
21
+ gz = envs.pop("gz")
22
+ index = envs.pop("index")
23
+
24
+ if isinstance(columns, list):
25
+ columns = ",".join(columns)
26
+
27
+ if "c" in envs:
28
+ logger.warning("Ignoring envs\[c], use envs\[columns] instead.")
29
+ del envs["c"]
30
+
31
+ if isinstance(remove, list):
32
+ remove = ",".join(remove)
33
+
34
+ if "x" in envs:
35
+ logger.warning("Ignoring envs\[x], use envs\[remove] instead.")
36
+ del envs["x"]
37
+
38
+ envs_has_annfile = "a" in envs or "annotations" in envs
39
+ headerfile = path.join(joboutdir, "header.txt")
40
+ if header:
41
+ with open(headerfile, "w") as fh:
42
+ fh.writelines(header)
43
+
44
+ if annfile and envs_has_annfile:
45
+ logger.warning(
46
+ "Ignoring envs\[a/annotations] because in.annfile is provided."
47
+ )
48
+ with suppress(KeyError):
49
+ del envs["a"]
50
+ with suppress(KeyError):
51
+ del envs["annotations"]
52
+ elif not annfile and envs_has_annfile:
53
+ annfile = envs.pop("annotations", None) or envs.pop("a", None)
54
+
55
+
56
+ if index and not gz:
57
+ logger.warning("Forcing envs.gz to True because envs.index is True.")
58
+ gz = True
59
+
60
+ envs[""] = [bcftools, "annotate"]
61
+ envs["o"] = outfile
62
+ envs["threads"] = ncores
63
+
64
+ if "O" not in envs and "output-type" not in envs and "output_type" not in envs:
65
+ envs["O"] = "z" if gz else "v"
66
+
67
+ if columns:
68
+ envs["columns"] = columns
69
+ if not annfile:
70
+ raise ValueError(
71
+ "envs.columns specified but no in.annfile/envs.annfile provided."
72
+ )
73
+ envs["_"] = tabix_index(infile, "vcf", tabix=tabix)
74
+
75
+ if remove:
76
+ envs["remove"] = remove
77
+ # no need to index it
78
+ envs["_"] = infile
79
+
80
+ if "columns" not in envs and "remove" not in envs:
81
+ logger.warning(
82
+ "No columns/remove specified, no columns will be carried over or removed."
83
+ )
84
+
85
+ if annfile:
86
+ envs["annotations"] = tabix_index(annfile, "vcf", tabix=tabix)
87
+
88
+ if header:
89
+ envs["header_lines"] = headerfile
90
+
91
+ run_bcftools(envs, bcftools=bcftools, index=index, tabix=tabix)
@@ -0,0 +1,90 @@
1
+ from pathlib import Path, PosixPath # noqa: F401
2
+
3
+ from biopipen.utils.misc import logger
4
+ from biopipen.scripts.vcf.bcftools_utils import run_bcftools
5
+
6
+ infile = {{in.infile | repr}} # pyright: ignore # noqa: #999
7
+ outfile = {{out.outfile | repr}} # pyright: ignore
8
+ outdir = Path(outfile).parent
9
+
10
+ envs = {{envs | dict | repr}} # pyright: ignore
11
+ bcftools = envs.pop("bcftools")
12
+ tabix = envs.pop("tabix")
13
+ keep = envs.pop("keep")
14
+ ncores = envs.pop("ncores")
15
+ includes = envs.pop("includes")
16
+ excludes = envs.pop("excludes")
17
+ gz = envs.pop("gz")
18
+ index = envs.pop("index")
19
+
20
+ # a.vcf.gz -> a
21
+ # a.vcf -> a
22
+ stem = Path(infile).stem
23
+ if stem.endswith(".vcf"):
24
+ stem = stem[:-4]
25
+ # .vcf.gz
26
+ # .gz
27
+ ext = ".vcf.gz" if index or gz else '.vcf'
28
+
29
+
30
+ def normalize_expr(expr, flag, prev_n_filters=0):
31
+ out = {}
32
+ if not expr:
33
+ return out
34
+ if isinstance(expr, list):
35
+ for ex in expr:
36
+ out[f"FILTER_{flag.upper()}_{len(out) + 1 + prev_n_filters}"] = (ex, flag)
37
+ elif isinstance(expr, dict):
38
+ for name, ex in expr.items():
39
+ out[name] = (ex, flag)
40
+ else: # str
41
+ out[f"FILTER_{flag.upper()}_{len(out) + 1 + prev_n_filters}"] = (expr, flag)
42
+ return out
43
+
44
+
45
+ def handle_filter(vcf, fname, filt, flag, final):
46
+ logger.info("- Handling filter %s: %s ...", fname, filt)
47
+
48
+ arguments = envs.copy()
49
+ arguments[flag] = filt
50
+ arguments["_"] = vcf
51
+ arguments["o"] = outfile if final else outdir / f"{stem}.{fname}{ext}"
52
+ if keep:
53
+ arguments["s"] = fname
54
+
55
+ run_bcftools(arguments, bcftools=bcftools, index=index and final, tabix=tabix)
56
+
57
+ if final:
58
+ flagfile = outdir.joinpath(f"{stem}.{fname}{ext}")
59
+ if flagfile.is_symlink():
60
+ flagfile.unlink()
61
+ outdir.joinpath(f"{stem}.{fname}{ext}").symlink_to(outfile)
62
+
63
+ return arguments["o"]
64
+
65
+
66
+ includes = normalize_expr(includes, "include")
67
+ excludes = normalize_expr(excludes, "exclude", len(includes))
68
+ includes.update(excludes)
69
+
70
+ if index and not gz:
71
+ logger.warning("Forcing envs.gz to True because envs.index is True.")
72
+ gz = True
73
+
74
+ envs[""] = [bcftools, "filter"]
75
+ envs["_"] = infile
76
+ envs["o"] = outfile
77
+ envs["threads"] = ncores
78
+
79
+ if "O" not in envs and "output-type" not in envs and "output_type" not in envs:
80
+ envs["O"] = "z" if gz else "v"
81
+
82
+ if keep:
83
+ envs["soft_filter"] = "+"
84
+
85
+ if "m" not in envs and "mode" not in envs:
86
+ envs["m"] = "+"
87
+
88
+ # bcftools can be only done once at one filter
89
+ for i, (fname, (filt, flag)) in enumerate(includes.items()):
90
+ infile = handle_filter(infile, fname, filt, flag, i == len(includes) - 1)