biopipen 0.31.5__py3-none-any.whl → 0.31.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.31.5"
1
+ __version__ = "0.31.6"
biopipen/ns/regulatory.py CHANGED
@@ -212,3 +212,75 @@ class MotifAffinityTest(Proc):
212
212
  "atsnp_args": {"padj_cutoff": True, "padj": "BH", "p": "pval_diff"},
213
213
  }
214
214
  script = "file://../scripts/regulatory/MotifAffinityTest.R"
215
+
216
+
217
+ class VariantMotifPlot(Proc):
218
+ """A plot with a genomic region surrounding a genomic variant, and
219
+ potentially disrupted motifs.
220
+
221
+ Currently only SNVs are supported.
222
+
223
+ Input:
224
+ infile: File containing the variants and motifs.
225
+ It is a TAB-delimited file with the following columns:
226
+ - chrom: The chromosome of the SNV. Alias: chr, seqnames.
227
+ - start: The start position of the SNV, no matter 0- or 1-based.
228
+ - end: The end position of the SNV, which will be used as the position of the SNV.
229
+ - strand: Indicating the direction of the surrounding sequence matching the motif.
230
+ - SNP_id: The name of the SNV.
231
+ - REF: The reference allele of the SNV.
232
+ - ALT: The alternative allele of the SNV.
233
+ - providerId: The motif id. It can be specified by `envs.motif_col`.
234
+ - providerName: The name of the motif provider. Optional.
235
+ - Regulator: The regulator name. Optional, can be specified by `envs.regulator_col`.
236
+ - motifPos: The position of the motif, relative to the position of the SNV.
237
+ For example, '-8, 4' means the motif is 8 bp upstream and 4 bp downstream of the SNV.
238
+
239
+ Envs:
240
+ genome: The genome assembly.
241
+ Used to fetch the sequences around the variants by package, for example, `BSgenome.Hsapiens.UCSC.hg19` is required if
242
+ `hg19`. If it is an organism other than human, please specify the full name of the package, for example, `BSgenome.Mmusculus.UCSC.mm10`.
243
+ motifdb: The path to the motif database. This is required.
244
+ It should be in the format of MEME motif database.
245
+ Databases can be downloaded here: <https://meme-suite.org/meme/doc/download.html>.
246
+ See also introduction to the databases: <https://meme-suite.org/meme/db/motifs>.
247
+ [universalmotif](https://github.com/bjmt/universalmotif) is required to read the motif database.
248
+ motif_col: The column name in the motif file containing the motif names.
249
+ If this is not provided, `envs.regulator_col` and `envs.regmotifs` are required,
250
+ which are used to infer the motif names from the regulator names.
251
+ regulator_col: The column name in the motif file containing the regulator names.
252
+ Both `motif_col` and `regulator_col` should be the direct column names or
253
+ the index (1-based) of the columns.
254
+ If no `regulator_col` is provided, no regulator information is written in
255
+ the output. Otherwise, the regulator information is written in the output in
256
+ the `Regulator` column.
257
+ regmotifs: The path to the regulator-motif mapping file.
258
+ It must have header and the columns `Motif` or `Model` for motif names and
259
+ `TF`, `Regulator` or `Transcription factor` for regulator names.
260
+ notfound (choice): What to do if a motif is not found in the database,
261
+ or a regulator is not found in the regulator-motif mapping (envs.regmotifs)
262
+ file.
263
+ - error: Report error and stop the process.
264
+ - ignore: Ignore the motif and continue.
265
+ devpars (ns): The default device parameters for the plot.
266
+ - width (type=int): The width of the plot.
267
+ - height (type=int): The height of the plot.
268
+ - res (type=int): The resolution of the plot.
269
+ plot_vars (type=auto): The variants (SNP_id) to plot.
270
+ A list of variant names to plot or a string with the variant names separated by comma.
271
+ When not specified, all variants are plotted.
272
+ """ # noqa: E501
273
+ input = "infile:file"
274
+ output = "outdir:dir:{{in.infile | stem}}.vmplots"
275
+ lang = config.lang.rscript
276
+ envs = {
277
+ "genome": config.ref.genome,
278
+ "motifdb": config.ref.tf_motifdb,
279
+ "motif_col": "providerId",
280
+ "regulator_col": None,
281
+ "regmotifs": config.ref.tf_motifs,
282
+ "notfound": "error",
283
+ "devpars": {"width": 800, "height": None, "res": 100},
284
+ "plot_vars": None,
285
+ }
286
+ script = "file://../scripts/regulatory/VariantMotifPlot.R"
biopipen/ns/vcf.py CHANGED
@@ -335,6 +335,8 @@ class TruvariBench(Proc):
335
335
  """Run `truvari bench` to compare a VCF with CNV calls and
336
336
  base CNV standards
337
337
 
338
+ Requires truvari v4+
339
+
338
340
  See https://github.com/ACEnglish/truvari/wiki/bench
339
341
 
340
342
  Input:
@@ -358,7 +360,7 @@ class TruvariBench(Proc):
358
360
  "truvari": config.exe.truvari,
359
361
  "ref": config.ref.reffa,
360
362
  "refdist": 500,
361
- "pctsim": 0.7,
363
+ "pctseq": 0.7,
362
364
  "pctsize": 0.7,
363
365
  "pctovl": 0.0,
364
366
  "typeignore": False,
@@ -402,7 +404,7 @@ class TruvariBenchSummary(Proc):
402
404
  output = "outdir:dir:truvari_bench.summary"
403
405
  lang = config.lang.rscript
404
406
  envs = {
405
- "plots": ["call cnt", "base cnt", "precision", "recall", "f1"],
407
+ "plots": ["comp cnt", "base cnt", "precision", "recall", "f1"],
406
408
  "devpars": None,
407
409
  }
408
410
  script = "file://../scripts/vcf/TruvariBenchSummary.R"
@@ -414,6 +416,8 @@ class TruvariConsistency(Proc):
414
416
 
415
417
  See https://github.com/ACEnglish/truvari/wiki/consistency
416
418
 
419
+ Requires truvari v4+
420
+
417
421
  Input:
418
422
  vcfs: The vcf files with CNV calls
419
423
 
@@ -1,9 +1,9 @@
1
1
  # Script for regulatory.MotifAffinityTest
2
2
  {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
3
+ {{ biopipen_dir | joinpaths: "scripts", "regulatory", "motifs-common.R" | source_r }}
3
4
 
4
5
  library(BiocParallel)
5
6
  library(BSgenome)
6
- library(universalmotif)
7
7
 
8
8
  motiffile <- {{in.motiffile | r}}
9
9
  varfile <- {{in.varfile | r}}
@@ -45,63 +45,9 @@ if (is.null(motif_col) && is.null(regulator_col)) {
45
45
  log_info("Reading input regulator/motif file ...")
46
46
  in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
47
47
 
48
- if (is.null(motif_col)) {
49
- log_info("Inferring motifs from regulators ...")
50
- if (is.null(regmotifs) || !file.exists(regmotifs)) {
51
- stop("Regulator motifs (envs.regmotifs) is required and must exist when no motif column (envs.motif_col) is provided")
52
- }
53
- regmotifs <- read.table(regmotifs, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
54
- rm_motif_col <- c('Motif', 'motif', 'MOTIF', 'Model', 'model', 'MODEL')
55
- rm_reg_col <- c('Regulator', 'regulator', 'REGULATOR', 'TF', 'tf', 'TF', 'Transcription factor', 'transcription factor', 'Transcription Factor')
56
- rm_motif_col <- intersect(rm_motif_col, colnames(regmotifs))
57
- rm_reg_col <- intersect(rm_reg_col, colnames(regmotifs))
58
- if (length(rm_motif_col) == 0) {
59
- stop("No motif column found in envs.regmotifs, provide one of: ", paste(rm_motif_col, collapse = ", "))
60
- }
61
- if (length(rm_reg_col) == 0) {
62
- stop("No regulator column found in envs.regmotifs, provide one of: ", paste(rm_reg_col, collapse = ", "))
63
- }
64
- rm_motif_col <- rm_motif_col[1]
65
- rm_reg_col <- rm_reg_col[1]
66
- # check regulators
67
- rm_regs <- regmotifs[, rm_reg_col, drop = TRUE]
68
- regulators <- in_motifs[, regulator_col, drop = TRUE]
69
- notfound_regs <- setdiff(regulators, rm_regs)
70
- if (length(notfound_regs) > 0 && notfound == "error") {
71
- first_notfound <- head(notfound_regs, 3)
72
- if (length(notfound_regs) > 3) {
73
- first_notfound <- c(first_notfound, "...")
74
- notfound_file <- file.path(outdir, "notfound_regulators.txt")
75
- writeLines(notfound_regs, notfound_file)
76
- msg1 <- paste0("The following regulators were not found in the envs.regmotifs file: ", paste(first_notfound, collapse = ", "))
77
- msg2 <- paste0("Check the full list in ", notfound_file)
78
- stop(msg1, "\n", msg2)
79
- } else {
80
- msg <- paste0("The following regulators were not found in the regmotifs file: ", paste(first_notfound, collapse = ", "))
81
- stop(msg)
82
- }
83
- }
84
- in_motifs <- in_motifs[in_motifs[, regulator_col] %in% rm_regs, , drop = FALSE]
85
- # add motif column
86
- in_motifs <- merge(in_motifs, regmotifs, by.x = regulator_col, by.y = rm_reg_col, all.x = TRUE, suffixes = c("", "_db"))
87
- motif_col <- rm_motif_col
88
- }
89
- if (is.null(regulator_col)) {
90
- # make motifs unique
91
- in_moitfs <- in_motifs[!duplicated(in_motifs[, motif_col]), , drop = FALSE]
92
- } else {
93
- in_motifs <- in_motifs[!duplicated(in_motifs[, c(regulator_col, motif_col)]), , drop = FALSE]
94
- }
95
-
96
-
97
- if (!grepl(".", genome, fixed = TRUE)) {
98
- genome_pkg = sprintf("BSgenome.Hsapiens.UCSC.%s", genome)
99
- } else {
100
- genome_pkg = genome
101
- }
102
- if (!requireNamespace(genome_pkg, quietly = TRUE)) {
103
- stop(sprintf("Genome package %s is not installed", genome_pkg))
104
- }
48
+ log_info("Ensuring motifs and regulators in the input data ...")
49
+ in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
50
+ genome_pkg <- get_genome_pkg(genome)
105
51
 
106
52
  log_info("Reading variant file ...")
107
53
  if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
@@ -124,91 +70,7 @@ snpinfo <- read.table(varfile, header=FALSE, stringsAsFactors=FALSE)
124
70
  colnames(snpinfo) <- c("chrom", "start", "end", "name", "score", "strand", "ref", "alt")
125
71
 
126
72
  log_info("Reading motif database ...")
127
- meme <- read_meme(motifdb)
128
-
129
- check_motifs <- function(motifdb_names) {
130
- motifs <- in_motifs[, motif_col, drop = TRUE]
131
- notfound_motifs <- setdiff(motifs, motifdb_names)
132
- if (length(notfound_motifs) > 0) {
133
- first_notfound <- head(notfound_motifs, 3)
134
- if (length(notfound_motifs) > 3) {
135
- first_notfound <- c(first_notfound, "...")
136
- notfound_file <- file.path(outdir, "notfound_motifs.txt")
137
- writeLines(notfound_motifs, notfound_file)
138
- msg1 <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
139
- msg2 <- paste0("Check the full list in ", notfound_file)
140
-
141
- if (notfound == "error") {
142
- stop(msg1, "\n", msg2)
143
- } else if (notfound == "ignore") {
144
- log_warn(msg1)
145
- log_warn(msg2)
146
- }
147
- } else {
148
- msg <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
149
- if (notfound == "error") {
150
- stop(msg)
151
- } else if (notfound == "ignore") {
152
- log_warn(msg)
153
- }
154
- }
155
-
156
- motifs <- setdiff(motifs, notfound_motifs)
157
- }
158
- return(motifs)
159
- }
160
-
161
- plot_variant <- function(motifbreakr_results) {
162
- log_info("Plotting variants ...")
163
- plotdir <- file.path(outdir, "plots")
164
- dir.create(plotdir, showWarnings = FALSE)
165
- results <- motifbreakr_results
166
- if (is.null(plots) || length(plots) == 0) {
167
- results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
168
- results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
169
- variants <- unique(results$SNP_id)
170
- } else {
171
- variants <- names(plots)
172
- }
173
- for (variant in variants) {
174
- log_info("- Variant: {variant}")
175
- if (is.null(plots[[variant]])) {
176
- plots[[variant]] <- list(devpars = devpars, which = "TRUE")
177
- }
178
- if (is.null(plots[[variant]]$which)) {
179
- plots[[variant]]$which <- "TRUE"
180
- }
181
- if (is.null(plots[[variant]]$devpars)) {
182
- plots[[variant]]$devpars <- devpars
183
- }
184
- if (is.null(plots[[variant]]$devpars$res)) {
185
- plots[[variant]]$devpars$res <- 100
186
- }
187
- res <- results[results$SNP_id == variant, , drop = FALSE]
188
- if (length(res) == 0) {
189
- stop(sprintf("Variant %s not found in results", variant))
190
- }
191
- res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
192
- if (length(res) == 0) {
193
- stop(sprintf("No variants to plot for %s", variant))
194
- }
195
- plotfile <- file.path(plotdir, sprintf("%s.png", slugify(variant)))
196
- # fix motifBreakR 2.12 using names to filter in plotMB
197
- names(res) <- res$SNP_id
198
- dv <- plots[[variant]]$devpars
199
- if (is.null(dv$height)) {
200
- dv$height <- 2.4 * dv$res + length(res) * 1.2 * dv$res
201
- }
202
- if (is.null(dv$width)) {
203
- left <- min(sapply(res$motifPos, `[`, 1))
204
- right <- max(sapply(res$motifPos, `[`, 2))
205
- dv$width <- 1.5 * dv$res + (right - left) * 0.3 * dv$res
206
- }
207
- png(plotfile, width = dv$width, height = dv$height, res = dv$res)
208
- motifbreakR::plotMB(res, variant)
209
- dev.off()
210
- }
211
- }
73
+ mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfound, outdir)
212
74
 
213
75
  tool <- tolower(tool)
214
76
  tool <- match.arg(tool, c("motifbreakr", "atsnp"))
@@ -1,36 +1,6 @@
1
1
  library(atSNP)
2
2
  library(rtracklayer)
3
3
 
4
- log_info("Converting universalmotif object to motif_library ...")
5
-
6
- motifdb_names <- sapply(meme, function(m) m@name)
7
- motifs <- check_motifs(motifdb_names)
8
- meme <- filter_motifs(meme, name = motifs)
9
- # Get the right order of motif names
10
- motifs <- sapply(meme, function(m) m@name)
11
-
12
- # used for atSNP
13
- mdb <- lapply(meme, function(m) t(m@motif))
14
- names(mdb) <- motifs
15
-
16
- # compose one used for plotting using motifbreakR
17
- motifdb_matrices <- lapply(meme, function(m) m@motif)
18
- names(motifdb_matrices) <- motifs
19
- motifdb_meta <- do.call(rbind, lapply(meme, function(m) {
20
- ats <- attributes(m)
21
- ats$dataSource <- basename(motifdb)
22
- ats$class <- NULL
23
- ats$motif <- NULL
24
- ats$gapinfo <- NULL
25
- ats$sequenceCount <- ats$nsites
26
- ats$providerId <- ats$name
27
- ats$providerName <- ats$name
28
- ats$organism <- if (is.null(ats$organism) || length(ats$organism) == 0) "Unknown" else ats$organism
29
- unlist(ats)
30
- }))
31
- rownames(motifdb_meta) <- motifs
32
- pmotifs <- MotifDb:::MotifList(motifdb_matrices, tbl.metadata = motifdb_meta)
33
-
34
4
  log_info("Converting snpinfo to atSNP object ...")
35
5
 
36
6
  # c("chrom", "start", "end", "name", "score", "strand", "ref", "alt", "ref_seq", "alt_seq")
@@ -53,7 +23,9 @@ write.table(
53
23
  file = atsnp_bed,
54
24
  sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE
55
25
  )
56
- k <- max(sapply(mdb, nrow))
26
+
27
+ motif_lib <- motifdb_to_motiflib(mdb)
28
+ k <- max(sapply(motif_lib, nrow))
57
29
  snps <- LoadSNPData(
58
30
  atsnp_bed,
59
31
  genome.lib = genome_pkg,
@@ -62,13 +34,12 @@ snps <- LoadSNPData(
62
34
  half.window.size = k
63
35
  )
64
36
 
65
- # run motifbreakR
66
37
  log_info("Running atSNP ...")
67
- atsnp_scores <- ComputeMotifScore(mdb, snps, ncores = ncores)
38
+ atsnp_scores <- ComputeMotifScore(motif_lib, snps, ncores = ncores)
68
39
 
69
40
  log_info("Calculating p values ...")
70
41
  atsnp_result <- ComputePValues(
71
- motif.lib = mdb,
42
+ motif.lib = motif_lib,
72
43
  snp.info = snps,
73
44
  motif.scores = atsnp_scores$motif.scores,
74
45
  ncores = ncores,
@@ -101,7 +72,7 @@ atsnp_result$motifPos <- sapply(1:nrow(atsnp_result), function(i) {
101
72
  paste(c(atsnp_result$ref_start[i] - k, atsnp_result$ref_end[i] - k), collapse = ",")
102
73
  })
103
74
  if (!is.null(regulator_col)) {
104
- atsnp_result$Regulator <- in_motifs[
75
+ atsnp_result$geneSymbol <- atsnp_result$Regulator <- in_motifs[
105
76
  match(atsnp_result$providerId, in_motifs[[motif_col]]),
106
77
  regulator_col,
107
78
  drop = TRUE
@@ -120,7 +91,30 @@ atsnp_result$alleleDiff <- -atsnp_result[[cutoff_col]]
120
91
  atsnp_result$effect <- "strong"
121
92
  atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
122
93
  atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
94
+ genome(atsnp_result) <- genome
123
95
  attributes(atsnp_result)$genome.package <- genome_pkg
124
- attributes(atsnp_result)$motifs <- pmotifs
96
+ attributes(atsnp_result)$motifs <- mdb
97
+
98
+ if (is.null(plots) || length(plots) == 0) {
99
+ atsnp_result <- atsnp_result[order(-abs(atsnp_result$alleleDiff)), , drop = FALSE]
100
+ atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
101
+ variants <- unique(atsnp_result$SNP_id)
102
+ } else {
103
+ variants <- names(plots)
104
+ }
105
+ for (variant in variants) {
106
+ log_info("- Variant: {variant}")
107
+ if (is.null(plots[[variant]])) {
108
+ plots[[variant]] <- list(devpars = devpars, which = "TRUE")
109
+ }
110
+ if (is.null(plots[[variant]]$which)) {
111
+ plots[[variant]]$which <- "TRUE"
112
+ }
113
+ if (is.null(plots[[variant]]$devpars)) {
114
+ plots[[variant]]$devpars <- devpars
115
+ }
116
+ res <- atsnp_result[atsnp_result$SNP_id == variant, , drop = FALSE]
117
+ res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
125
118
 
126
- plot_variant(atsnp_result)
119
+ plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
120
+ }
@@ -1,30 +1,6 @@
1
1
  library(motifbreakR)
2
- bsgenome <- getBSgenome(genome_pkg)
3
-
4
- log_info("Converting universalmotif object to MotifDb object ...")
5
-
6
- motifdb_names <- sapply(meme, function(m) m@name)
7
- motifs <- check_motifs(motifdb_names)
8
- meme <- filter_motifs(meme, name = motifs)
9
- # Get the right order of motif names
10
- motifs <- sapply(meme, function(m) m@name)
11
- motifdb_matrices <- lapply(meme, function(m) m@motif)
12
- names(motifdb_matrices) <- motifs
13
2
 
14
- motifdb_meta <- do.call(rbind, lapply(meme, function(m) {
15
- ats <- attributes(m)
16
- ats$dataSource <- basename(motifdb)
17
- ats$class <- NULL
18
- ats$motif <- NULL
19
- ats$gapinfo <- NULL
20
- ats$sequenceCount <- ats$nsites
21
- ats$providerId <- ats$name
22
- ats$providerName <- ats$name
23
- ats$organism <- if (is.null(ats$organism) || length(ats$organism) == 0) "Unknown" else ats$organism
24
- unlist(ats)
25
- }))
26
- rownames(motifdb_meta) <- motifs
27
- mdb <- MotifDb:::MotifList(motifdb_matrices, tbl.metadata = motifdb_meta)
3
+ bsgenome <- getBSgenome(genome_pkg)
28
4
 
29
5
  # `chrom`, `start`, `end`, `name`, `score`, `strand`, `ref`, `alt`.
30
6
  is_indel <- nchar(snpinfo$ref) != 1 | nchar(snpinfo$alt) != 1
@@ -93,4 +69,27 @@ write.table(
93
69
  )
94
70
  rm(results_to_save)
95
71
 
96
- plot_variant(results)
72
+ log_info("Plotting variants ...")
73
+ if (is.null(plots) || length(plots) == 0) {
74
+ results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
75
+ results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
76
+ variants <- unique(results$SNP_id)
77
+ } else {
78
+ variants <- names(plots)
79
+ }
80
+ for (variant in variants) {
81
+ log_info("- Variant: {variant}")
82
+ if (is.null(plots[[variant]])) {
83
+ plots[[variant]] <- list(devpars = devpars, which = "TRUE")
84
+ }
85
+ if (is.null(plots[[variant]]$which)) {
86
+ plots[[variant]]$which <- "TRUE"
87
+ }
88
+ if (is.null(plots[[variant]]$devpars)) {
89
+ plots[[variant]]$devpars <- devpars
90
+ }
91
+ res <- results[results$SNP_id == variant, , drop = FALSE]
92
+ res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
93
+
94
+ plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
95
+ }
@@ -0,0 +1,76 @@
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "scripts", "regulatory", "motifs-common.R" | source_r }}
3
+
4
+ library(BSgenome)
5
+ library(GenomicRanges)
6
+
7
+ infile <- {{in.infile | r}}
8
+ outdir <- {{out.outdir | r}}
9
+ genome <- {{envs.genome | r}}
10
+ motifdb <- {{envs.motifdb | r}}
11
+ motif_col <- {{envs.motif_col | r}}
12
+ regulator_col <- {{envs.regulator_col | r}}
13
+ regmotifs <- {{envs.regmotifs | r}}
14
+ notfound <- {{envs.notfound | r}}
15
+ devpars <- {{envs.devpars | r}}
16
+ plot_vars <- {{envs.plot_vars | r}}
17
+
18
+ if (is.null(motifdb) || !file.exists(motifdb)) {
19
+ stop("Motif database (envs.motifdb) is required and must exist")
20
+ }
21
+
22
+ if (is.null(genome)) {
23
+ stop("Reference genome (envs.ref) is required and must exist")
24
+ }
25
+
26
+ if (is.null(motif_col) && is.null(regulator_col)) {
27
+ stop("Either motif (envs.motif_col) or regulator (envs.regulator_col) column must be provided")
28
+ }
29
+
30
+ log_info("Reading input data ...")
31
+ indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
32
+
33
+ log_info("Ensuring regulators in the input data ...")
34
+ indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
35
+ genome_pkg <- get_genome_pkg(genome)
36
+
37
+ log_info("Reading motif database ...")
38
+ meme <- read_meme_to_motifdb(motifdb, indata, motif_col, regulator_col, notfound, outdir)
39
+
40
+ log_info("Composing motifbreakR results from input data ...")
41
+ indata$chr <- indata$chrom %||% indata$chr %||% indata$seqnames
42
+ indata$seqnames <- NULL
43
+ indata$strand <- indata$strand %||% "+"
44
+ indata$varType <- indata$varType %||% "SNV"
45
+ indata$geneSymbol <- indata$geneSymbol %||% indata$Regulator
46
+ indata$providerId <- indata$providerId %||% indata$motif
47
+ indata$providerName <- indata$providerName %||% indata$providerId
48
+ indata$dataSource <- indata$dataSource %||% strsplit(basename(motifdb), "\\.")[[1]][1]
49
+ indata$effect <- indata$effect %||% "strong"
50
+ indata$altPos <- indata$altPos %||% 1
51
+ indata$alleleDiff <- indata$alleleDiff %||% indata$score %||% 0
52
+
53
+ # check other required columns
54
+ for (col in c("start", "end", "SNP_id", "REF", "ALT", "motifPos")) {
55
+ if (!(col %in% colnames(indata))) {
56
+ stop("Column '", col, "' is required in the input data")
57
+ }
58
+ }
59
+ indata$motifPos <- lapply(indata$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
60
+ indata <- makeGRangesFromDataFrame(indata, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
61
+ genome(indata) <- genome
62
+ attributes(indata)$genome.package <- genome_pkg
63
+ attributes(indata)$motifs <- meme
64
+
65
+ log_info("Plotting variants ...")
66
+ if (is.null(plot_vars)) {
67
+ plot_vars <- unique(indata$SNP_id)
68
+ } else if (length(plot_vars) > 1) {
69
+ plot_vars <- unique(plot_vars)
70
+ } else {
71
+ plot_vars <- strsplit(plot_vars, ",")[[1]]
72
+ }
73
+ for (pvar in plot_vars) {
74
+ log_info("- Variant: {pvar}")
75
+ plot_variant_motifs(indata, pvar, devpars, outdir)
76
+ }