biopipen 0.31.5__py3-none-any.whl → 0.31.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.31.5"
1
+ __version__ = "0.31.7"
biopipen/ns/bam.py CHANGED
@@ -301,3 +301,31 @@ class BamSampling(Proc):
301
301
  "sort_args": [],
302
302
  }
303
303
  script = "file://../scripts/bam/BamSampling.py"
304
+
305
+
306
+ class BamSubsetByBed(Proc):
307
+ """Subset bam file by the regions in a bed file
308
+
309
+ Input:
310
+ bamfile: The bam file
311
+ bedfile: The bed file
312
+
313
+ Output:
314
+ outfile: The output bam file
315
+
316
+ Envs:
317
+ ncores: Number of cores to use
318
+ samtools: Path to samtools executable
319
+ tool: The tool to use, currently only "samtools" is supported
320
+ index: Whether to index the output bam file
321
+ """
322
+ input = "bamfile:file, bedfile:file"
323
+ output = "outfile:file:{{in.bamfile | stem}}-subset.bam"
324
+ lang = config.lang.python
325
+ envs = {
326
+ "ncores": config.misc.ncores,
327
+ "samtools": config.exe.samtools,
328
+ "tool": "samtools",
329
+ "index": True,
330
+ }
331
+ script = "file://../scripts/bam/BamSubsetByBed.py"
biopipen/ns/bed.py CHANGED
@@ -198,3 +198,43 @@ class BedtoolsIntersect(Proc):
198
198
  "postcmd": None,
199
199
  }
200
200
  script = "file://../scripts/bed/BedtoolsIntersect.py"
201
+
202
+
203
+ class BedtoolsMakeWindows(Proc):
204
+ """Make windows from a BED file or genome size file, using `bedtools makewindows`.
205
+
206
+ Input:
207
+ infile: The input BED file or a genome size file
208
+ Type will be detected by the number of columns in the file.
209
+ If it has 3+ columns, it is treated as a BED file, otherwise
210
+ a genome size file.
211
+
212
+ Output:
213
+ outfile: The output BED file
214
+
215
+ Envs:
216
+ bedtools: The path to bedtools
217
+ window (type=int): The size of the windows
218
+ step (type=int): The step size of the windows
219
+ nwin (type=int): The number of windows to be generated
220
+ Exclusive with `window` and `step`.
221
+ Either `nwin` or `window` and `step` should be provided.
222
+ reverse (flag): Reverse numbering of windows in the output
223
+ name (choice): How to name the generated windows/regions
224
+ - none: Do not add any name
225
+ - src: Use the source interval's name
226
+ - winnum: Use the window number
227
+ - srcwinnum: Use the source interval's name and window number
228
+ """ # noqa: E501
229
+ input = "infile:file"
230
+ output = "outfile:file:{{in.infile | stem}}_windows.bed"
231
+ lang = config.lang.python
232
+ envs = {
233
+ "bedtools": config.exe.bedtools,
234
+ "window": None,
235
+ "step": None,
236
+ "nwin": None,
237
+ "reverse": False,
238
+ "name": "none",
239
+ }
240
+ script = "file://../scripts/bed/BedtoolsMakeWindows.py"
biopipen/ns/regulatory.py CHANGED
@@ -212,3 +212,75 @@ class MotifAffinityTest(Proc):
212
212
  "atsnp_args": {"padj_cutoff": True, "padj": "BH", "p": "pval_diff"},
213
213
  }
214
214
  script = "file://../scripts/regulatory/MotifAffinityTest.R"
215
+
216
+
217
+ class VariantMotifPlot(Proc):
218
+ """A plot with a genomic region surrounding a genomic variant, and
219
+ potentially disrupted motifs.
220
+
221
+ Currently only SNVs are supported.
222
+
223
+ Input:
224
+ infile: File containing the variants and motifs.
225
+ It is a TAB-delimited file with the following columns:
226
+ - chrom: The chromosome of the SNV. Alias: chr, seqnames.
227
+ - start: The start position of the SNV, no matter 0- or 1-based.
228
+ - end: The end position of the SNV, which will be used as the position of the SNV.
229
+ - strand: Indicating the direction of the surrounding sequence matching the motif.
230
+ - SNP_id: The name of the SNV.
231
+ - REF: The reference allele of the SNV.
232
+ - ALT: The alternative allele of the SNV.
233
+ - providerId: The motif id. It can be specified by `envs.motif_col`.
234
+ - providerName: The name of the motif provider. Optional.
235
+ - Regulator: The regulator name. Optional, can be specified by `envs.regulator_col`.
236
+ - motifPos: The position of the motif, relative to the position of the SNV.
237
+ For example, '-8, 4' means the motif is 8 bp upstream and 4 bp downstream of the SNV.
238
+
239
+ Envs:
240
+ genome: The genome assembly.
241
+ Used to fetch the sequences around the variants by package, for example, `BSgenome.Hsapiens.UCSC.hg19` is required if
242
+ `hg19`. If it is an organism other than human, please specify the full name of the package, for example, `BSgenome.Mmusculus.UCSC.mm10`.
243
+ motifdb: The path to the motif database. This is required.
244
+ It should be in the format of MEME motif database.
245
+ Databases can be downloaded here: <https://meme-suite.org/meme/doc/download.html>.
246
+ See also introduction to the databases: <https://meme-suite.org/meme/db/motifs>.
247
+ [universalmotif](https://github.com/bjmt/universalmotif) is required to read the motif database.
248
+ motif_col: The column name in the motif file containing the motif names.
249
+ If this is not provided, `envs.regulator_col` and `envs.regmotifs` are required,
250
+ which are used to infer the motif names from the regulator names.
251
+ regulator_col: The column name in the motif file containing the regulator names.
252
+ Both `motif_col` and `regulator_col` should be the direct column names or
253
+ the index (1-based) of the columns.
254
+ If no `regulator_col` is provided, no regulator information is written in
255
+ the output. Otherwise, the regulator information is written in the output in
256
+ the `Regulator` column.
257
+ regmotifs: The path to the regulator-motif mapping file.
258
+ It must have header and the columns `Motif` or `Model` for motif names and
259
+ `TF`, `Regulator` or `Transcription factor` for regulator names.
260
+ notfound (choice): What to do if a motif is not found in the database,
261
+ or a regulator is not found in the regulator-motif mapping (envs.regmotifs)
262
+ file.
263
+ - error: Report error and stop the process.
264
+ - ignore: Ignore the motif and continue.
265
+ devpars (ns): The default device parameters for the plot.
266
+ - width (type=int): The width of the plot.
267
+ - height (type=int): The height of the plot.
268
+ - res (type=int): The resolution of the plot.
269
+ plot_vars (type=auto): The variants (SNP_id) to plot.
270
+ A list of variant names to plot or a string with the variant names separated by comma.
271
+ When not specified, all variants are plotted.
272
+ """ # noqa: E501
273
+ input = "infile:file"
274
+ output = "outdir:dir:{{in.infile | stem}}.vmplots"
275
+ lang = config.lang.rscript
276
+ envs = {
277
+ "genome": config.ref.genome,
278
+ "motifdb": config.ref.tf_motifdb,
279
+ "motif_col": "providerId",
280
+ "regulator_col": None,
281
+ "regmotifs": config.ref.tf_motifs,
282
+ "notfound": "error",
283
+ "devpars": {"width": 800, "height": None, "res": 100},
284
+ "plot_vars": None,
285
+ }
286
+ script = "file://../scripts/regulatory/VariantMotifPlot.R"
biopipen/ns/vcf.py CHANGED
@@ -335,6 +335,8 @@ class TruvariBench(Proc):
335
335
  """Run `truvari bench` to compare a VCF with CNV calls and
336
336
  base CNV standards
337
337
 
338
+ Requires truvari v4+
339
+
338
340
  See https://github.com/ACEnglish/truvari/wiki/bench
339
341
 
340
342
  Input:
@@ -358,7 +360,7 @@ class TruvariBench(Proc):
358
360
  "truvari": config.exe.truvari,
359
361
  "ref": config.ref.reffa,
360
362
  "refdist": 500,
361
- "pctsim": 0.7,
363
+ "pctseq": 0.7,
362
364
  "pctsize": 0.7,
363
365
  "pctovl": 0.0,
364
366
  "typeignore": False,
@@ -402,7 +404,7 @@ class TruvariBenchSummary(Proc):
402
404
  output = "outdir:dir:truvari_bench.summary"
403
405
  lang = config.lang.rscript
404
406
  envs = {
405
- "plots": ["call cnt", "base cnt", "precision", "recall", "f1"],
407
+ "plots": ["comp cnt", "base cnt", "precision", "recall", "f1"],
406
408
  "devpars": None,
407
409
  }
408
410
  script = "file://../scripts/vcf/TruvariBenchSummary.R"
@@ -414,6 +416,8 @@ class TruvariConsistency(Proc):
414
416
 
415
417
  See https://github.com/ACEnglish/truvari/wiki/consistency
416
418
 
419
+ Requires truvari v4+
420
+
417
421
  Input:
418
422
  vcfs: The vcf files with CNV calls
419
423
 
@@ -0,0 +1,38 @@
1
+ from pathlib import Path
2
+ from biopipen.utils.misc import run_command, logger
3
+
4
+ # using:
5
+ # samtools view --subsample 0.1 --subsample-seed 1234 --threads 4 -b -o out.bam in.bam
6
+
7
+ bamfile = {{ in.bamfile | repr }} # pyright: ignore # noqa
8
+ bedfile = {{ in.bedfile | repr }} # pyright: ignore # noqa
9
+ outfile = Path({{ out.outfile | repr }}) # pyright: ignore
10
+ ncores = {{ envs.ncores | int }} # pyright: ignore
11
+ samtools = {{ envs.samtools | repr }} # pyright: ignore
12
+ tool = {{ envs.tool | repr }} # pyright: ignore
13
+ should_index = {{ envs.index | repr }} # pyright: ignore
14
+
15
+ if tool != "samtools":
16
+ raise ValueError(
17
+ f"Tool {tool} is not supported. "
18
+ "Currently only samtools is supported."
19
+ )
20
+
21
+ cmd = [
22
+ samtools,
23
+ "view",
24
+ "--target-file",
25
+ bedfile,
26
+ "-b",
27
+ "--threads",
28
+ ncores,
29
+ "-o",
30
+ outfile,
31
+ bamfile
32
+ ]
33
+ run_command(cmd, fg=True)
34
+
35
+ if should_index:
36
+ logger.info("Indexing the output bam file.")
37
+ cmd = [samtools, "index", "-@", ncores, outfile]
38
+ run_command(cmd, fg=True)
@@ -0,0 +1,47 @@
1
+ from pathlib import Path
2
+ from biopipen.utils.misc import run_command, logger
3
+
4
+ infile = Path({{in.afile | repr}}) # pyright: ignore # noqa: #999
5
+ outfile = Path({{in.bfile | repr}}) # pyright: ignore
6
+ bedtools = {{envs.bedtools | repr}} # pyright: ignore
7
+ window = {{envs.window | repr}} # pyright: ignore
8
+ step = {{envs.step | repr}} # pyright: ignore
9
+ nwin = {{envs.nwin | repr}} # pyright: ignore
10
+ reverse = {{envs.reverse | repr}} # pyright: ignore
11
+ name = {{envs.name | repr}} # pyright: ignore
12
+
13
+ if nwin is None and window is None:
14
+ raise ValueError("Either `nwin` or `window` should be provided.")
15
+
16
+ if nwin is not None and window is not None:
17
+ raise ValueError("Either `nwin` or `window` should be provided, not both.")
18
+
19
+ # detect if infile is a genome size file or a bed file
20
+ with infile.open() as f:
21
+ line = f.readline().strip()
22
+ if len(line.split("\t")) > 2:
23
+ is_bed = True
24
+ else:
25
+ is_bed = False
26
+
27
+ if is_bed:
28
+ logger.info("BED file is detected as input.")
29
+ cmd = [bedtools, "makewindows", "-b", infile]
30
+ else:
31
+ logger.info("Genome size file is detected as input.")
32
+ cmd = [bedtools, "makewindows", "-g", infile]
33
+
34
+ if nwin:
35
+ cmd.extend(["-n", nwin])
36
+ elif step is not None:
37
+ cmd.extend(["-w", window, "-s", step])
38
+ else:
39
+ cmd.extend(["-w", window])
40
+
41
+ if reverse:
42
+ cmd.append("-reverse")
43
+
44
+ if name != "none":
45
+ cmd.extend(["-name", name])
46
+
47
+ run_command(cmd, stdout=outfile)
@@ -1,9 +1,9 @@
1
1
  # Script for regulatory.MotifAffinityTest
2
2
  {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
3
+ {{ biopipen_dir | joinpaths: "scripts", "regulatory", "motifs-common.R" | source_r }}
3
4
 
4
5
  library(BiocParallel)
5
6
  library(BSgenome)
6
- library(universalmotif)
7
7
 
8
8
  motiffile <- {{in.motiffile | r}}
9
9
  varfile <- {{in.varfile | r}}
@@ -45,63 +45,9 @@ if (is.null(motif_col) && is.null(regulator_col)) {
45
45
  log_info("Reading input regulator/motif file ...")
46
46
  in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
47
47
 
48
- if (is.null(motif_col)) {
49
- log_info("Inferring motifs from regulators ...")
50
- if (is.null(regmotifs) || !file.exists(regmotifs)) {
51
- stop("Regulator motifs (envs.regmotifs) is required and must exist when no motif column (envs.motif_col) is provided")
52
- }
53
- regmotifs <- read.table(regmotifs, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
54
- rm_motif_col <- c('Motif', 'motif', 'MOTIF', 'Model', 'model', 'MODEL')
55
- rm_reg_col <- c('Regulator', 'regulator', 'REGULATOR', 'TF', 'tf', 'TF', 'Transcription factor', 'transcription factor', 'Transcription Factor')
56
- rm_motif_col <- intersect(rm_motif_col, colnames(regmotifs))
57
- rm_reg_col <- intersect(rm_reg_col, colnames(regmotifs))
58
- if (length(rm_motif_col) == 0) {
59
- stop("No motif column found in envs.regmotifs, provide one of: ", paste(rm_motif_col, collapse = ", "))
60
- }
61
- if (length(rm_reg_col) == 0) {
62
- stop("No regulator column found in envs.regmotifs, provide one of: ", paste(rm_reg_col, collapse = ", "))
63
- }
64
- rm_motif_col <- rm_motif_col[1]
65
- rm_reg_col <- rm_reg_col[1]
66
- # check regulators
67
- rm_regs <- regmotifs[, rm_reg_col, drop = TRUE]
68
- regulators <- in_motifs[, regulator_col, drop = TRUE]
69
- notfound_regs <- setdiff(regulators, rm_regs)
70
- if (length(notfound_regs) > 0 && notfound == "error") {
71
- first_notfound <- head(notfound_regs, 3)
72
- if (length(notfound_regs) > 3) {
73
- first_notfound <- c(first_notfound, "...")
74
- notfound_file <- file.path(outdir, "notfound_regulators.txt")
75
- writeLines(notfound_regs, notfound_file)
76
- msg1 <- paste0("The following regulators were not found in the envs.regmotifs file: ", paste(first_notfound, collapse = ", "))
77
- msg2 <- paste0("Check the full list in ", notfound_file)
78
- stop(msg1, "\n", msg2)
79
- } else {
80
- msg <- paste0("The following regulators were not found in the regmotifs file: ", paste(first_notfound, collapse = ", "))
81
- stop(msg)
82
- }
83
- }
84
- in_motifs <- in_motifs[in_motifs[, regulator_col] %in% rm_regs, , drop = FALSE]
85
- # add motif column
86
- in_motifs <- merge(in_motifs, regmotifs, by.x = regulator_col, by.y = rm_reg_col, all.x = TRUE, suffixes = c("", "_db"))
87
- motif_col <- rm_motif_col
88
- }
89
- if (is.null(regulator_col)) {
90
- # make motifs unique
91
- in_moitfs <- in_motifs[!duplicated(in_motifs[, motif_col]), , drop = FALSE]
92
- } else {
93
- in_motifs <- in_motifs[!duplicated(in_motifs[, c(regulator_col, motif_col)]), , drop = FALSE]
94
- }
95
-
96
-
97
- if (!grepl(".", genome, fixed = TRUE)) {
98
- genome_pkg = sprintf("BSgenome.Hsapiens.UCSC.%s", genome)
99
- } else {
100
- genome_pkg = genome
101
- }
102
- if (!requireNamespace(genome_pkg, quietly = TRUE)) {
103
- stop(sprintf("Genome package %s is not installed", genome_pkg))
104
- }
48
+ log_info("Ensuring motifs and regulators in the input data ...")
49
+ in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
50
+ genome_pkg <- get_genome_pkg(genome)
105
51
 
106
52
  log_info("Reading variant file ...")
107
53
  if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
@@ -124,91 +70,7 @@ snpinfo <- read.table(varfile, header=FALSE, stringsAsFactors=FALSE)
124
70
  colnames(snpinfo) <- c("chrom", "start", "end", "name", "score", "strand", "ref", "alt")
125
71
 
126
72
  log_info("Reading motif database ...")
127
- meme <- read_meme(motifdb)
128
-
129
- check_motifs <- function(motifdb_names) {
130
- motifs <- in_motifs[, motif_col, drop = TRUE]
131
- notfound_motifs <- setdiff(motifs, motifdb_names)
132
- if (length(notfound_motifs) > 0) {
133
- first_notfound <- head(notfound_motifs, 3)
134
- if (length(notfound_motifs) > 3) {
135
- first_notfound <- c(first_notfound, "...")
136
- notfound_file <- file.path(outdir, "notfound_motifs.txt")
137
- writeLines(notfound_motifs, notfound_file)
138
- msg1 <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
139
- msg2 <- paste0("Check the full list in ", notfound_file)
140
-
141
- if (notfound == "error") {
142
- stop(msg1, "\n", msg2)
143
- } else if (notfound == "ignore") {
144
- log_warn(msg1)
145
- log_warn(msg2)
146
- }
147
- } else {
148
- msg <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
149
- if (notfound == "error") {
150
- stop(msg)
151
- } else if (notfound == "ignore") {
152
- log_warn(msg)
153
- }
154
- }
155
-
156
- motifs <- setdiff(motifs, notfound_motifs)
157
- }
158
- return(motifs)
159
- }
160
-
161
- plot_variant <- function(motifbreakr_results) {
162
- log_info("Plotting variants ...")
163
- plotdir <- file.path(outdir, "plots")
164
- dir.create(plotdir, showWarnings = FALSE)
165
- results <- motifbreakr_results
166
- if (is.null(plots) || length(plots) == 0) {
167
- results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
168
- results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
169
- variants <- unique(results$SNP_id)
170
- } else {
171
- variants <- names(plots)
172
- }
173
- for (variant in variants) {
174
- log_info("- Variant: {variant}")
175
- if (is.null(plots[[variant]])) {
176
- plots[[variant]] <- list(devpars = devpars, which = "TRUE")
177
- }
178
- if (is.null(plots[[variant]]$which)) {
179
- plots[[variant]]$which <- "TRUE"
180
- }
181
- if (is.null(plots[[variant]]$devpars)) {
182
- plots[[variant]]$devpars <- devpars
183
- }
184
- if (is.null(plots[[variant]]$devpars$res)) {
185
- plots[[variant]]$devpars$res <- 100
186
- }
187
- res <- results[results$SNP_id == variant, , drop = FALSE]
188
- if (length(res) == 0) {
189
- stop(sprintf("Variant %s not found in results", variant))
190
- }
191
- res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
192
- if (length(res) == 0) {
193
- stop(sprintf("No variants to plot for %s", variant))
194
- }
195
- plotfile <- file.path(plotdir, sprintf("%s.png", slugify(variant)))
196
- # fix motifBreakR 2.12 using names to filter in plotMB
197
- names(res) <- res$SNP_id
198
- dv <- plots[[variant]]$devpars
199
- if (is.null(dv$height)) {
200
- dv$height <- 2.4 * dv$res + length(res) * 1.2 * dv$res
201
- }
202
- if (is.null(dv$width)) {
203
- left <- min(sapply(res$motifPos, `[`, 1))
204
- right <- max(sapply(res$motifPos, `[`, 2))
205
- dv$width <- 1.5 * dv$res + (right - left) * 0.3 * dv$res
206
- }
207
- png(plotfile, width = dv$width, height = dv$height, res = dv$res)
208
- motifbreakR::plotMB(res, variant)
209
- dev.off()
210
- }
211
- }
73
+ mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfound, outdir)
212
74
 
213
75
  tool <- tolower(tool)
214
76
  tool <- match.arg(tool, c("motifbreakr", "atsnp"))
@@ -1,36 +1,6 @@
1
1
  library(atSNP)
2
2
  library(rtracklayer)
3
3
 
4
- log_info("Converting universalmotif object to motif_library ...")
5
-
6
- motifdb_names <- sapply(meme, function(m) m@name)
7
- motifs <- check_motifs(motifdb_names)
8
- meme <- filter_motifs(meme, name = motifs)
9
- # Get the right order of motif names
10
- motifs <- sapply(meme, function(m) m@name)
11
-
12
- # used for atSNP
13
- mdb <- lapply(meme, function(m) t(m@motif))
14
- names(mdb) <- motifs
15
-
16
- # compose one used for plotting using motifbreakR
17
- motifdb_matrices <- lapply(meme, function(m) m@motif)
18
- names(motifdb_matrices) <- motifs
19
- motifdb_meta <- do.call(rbind, lapply(meme, function(m) {
20
- ats <- attributes(m)
21
- ats$dataSource <- basename(motifdb)
22
- ats$class <- NULL
23
- ats$motif <- NULL
24
- ats$gapinfo <- NULL
25
- ats$sequenceCount <- ats$nsites
26
- ats$providerId <- ats$name
27
- ats$providerName <- ats$name
28
- ats$organism <- if (is.null(ats$organism) || length(ats$organism) == 0) "Unknown" else ats$organism
29
- unlist(ats)
30
- }))
31
- rownames(motifdb_meta) <- motifs
32
- pmotifs <- MotifDb:::MotifList(motifdb_matrices, tbl.metadata = motifdb_meta)
33
-
34
4
  log_info("Converting snpinfo to atSNP object ...")
35
5
 
36
6
  # c("chrom", "start", "end", "name", "score", "strand", "ref", "alt", "ref_seq", "alt_seq")
@@ -53,7 +23,9 @@ write.table(
53
23
  file = atsnp_bed,
54
24
  sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE
55
25
  )
56
- k <- max(sapply(mdb, nrow))
26
+
27
+ motif_lib <- motifdb_to_motiflib(mdb)
28
+ k <- max(sapply(motif_lib, nrow))
57
29
  snps <- LoadSNPData(
58
30
  atsnp_bed,
59
31
  genome.lib = genome_pkg,
@@ -62,13 +34,12 @@ snps <- LoadSNPData(
62
34
  half.window.size = k
63
35
  )
64
36
 
65
- # run motifbreakR
66
37
  log_info("Running atSNP ...")
67
- atsnp_scores <- ComputeMotifScore(mdb, snps, ncores = ncores)
38
+ atsnp_scores <- ComputeMotifScore(motif_lib, snps, ncores = ncores)
68
39
 
69
40
  log_info("Calculating p values ...")
70
41
  atsnp_result <- ComputePValues(
71
- motif.lib = mdb,
42
+ motif.lib = motif_lib,
72
43
  snp.info = snps,
73
44
  motif.scores = atsnp_scores$motif.scores,
74
45
  ncores = ncores,
@@ -101,7 +72,7 @@ atsnp_result$motifPos <- sapply(1:nrow(atsnp_result), function(i) {
101
72
  paste(c(atsnp_result$ref_start[i] - k, atsnp_result$ref_end[i] - k), collapse = ",")
102
73
  })
103
74
  if (!is.null(regulator_col)) {
104
- atsnp_result$Regulator <- in_motifs[
75
+ atsnp_result$geneSymbol <- atsnp_result$Regulator <- in_motifs[
105
76
  match(atsnp_result$providerId, in_motifs[[motif_col]]),
106
77
  regulator_col,
107
78
  drop = TRUE
@@ -120,7 +91,30 @@ atsnp_result$alleleDiff <- -atsnp_result[[cutoff_col]]
120
91
  atsnp_result$effect <- "strong"
121
92
  atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
122
93
  atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
94
+ genome(atsnp_result) <- genome
123
95
  attributes(atsnp_result)$genome.package <- genome_pkg
124
- attributes(atsnp_result)$motifs <- pmotifs
96
+ attributes(atsnp_result)$motifs <- mdb
97
+
98
+ if (is.null(plots) || length(plots) == 0) {
99
+ atsnp_result <- atsnp_result[order(-abs(atsnp_result$alleleDiff)), , drop = FALSE]
100
+ atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
101
+ variants <- unique(atsnp_result$SNP_id)
102
+ } else {
103
+ variants <- names(plots)
104
+ }
105
+ for (variant in variants) {
106
+ log_info("- Variant: {variant}")
107
+ if (is.null(plots[[variant]])) {
108
+ plots[[variant]] <- list(devpars = devpars, which = "TRUE")
109
+ }
110
+ if (is.null(plots[[variant]]$which)) {
111
+ plots[[variant]]$which <- "TRUE"
112
+ }
113
+ if (is.null(plots[[variant]]$devpars)) {
114
+ plots[[variant]]$devpars <- devpars
115
+ }
116
+ res <- atsnp_result[atsnp_result$SNP_id == variant, , drop = FALSE]
117
+ res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
125
118
 
126
- plot_variant(atsnp_result)
119
+ plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
120
+ }
@@ -1,30 +1,6 @@
1
1
  library(motifbreakR)
2
- bsgenome <- getBSgenome(genome_pkg)
3
-
4
- log_info("Converting universalmotif object to MotifDb object ...")
5
-
6
- motifdb_names <- sapply(meme, function(m) m@name)
7
- motifs <- check_motifs(motifdb_names)
8
- meme <- filter_motifs(meme, name = motifs)
9
- # Get the right order of motif names
10
- motifs <- sapply(meme, function(m) m@name)
11
- motifdb_matrices <- lapply(meme, function(m) m@motif)
12
- names(motifdb_matrices) <- motifs
13
2
 
14
- motifdb_meta <- do.call(rbind, lapply(meme, function(m) {
15
- ats <- attributes(m)
16
- ats$dataSource <- basename(motifdb)
17
- ats$class <- NULL
18
- ats$motif <- NULL
19
- ats$gapinfo <- NULL
20
- ats$sequenceCount <- ats$nsites
21
- ats$providerId <- ats$name
22
- ats$providerName <- ats$name
23
- ats$organism <- if (is.null(ats$organism) || length(ats$organism) == 0) "Unknown" else ats$organism
24
- unlist(ats)
25
- }))
26
- rownames(motifdb_meta) <- motifs
27
- mdb <- MotifDb:::MotifList(motifdb_matrices, tbl.metadata = motifdb_meta)
3
+ bsgenome <- getBSgenome(genome_pkg)
28
4
 
29
5
  # `chrom`, `start`, `end`, `name`, `score`, `strand`, `ref`, `alt`.
30
6
  is_indel <- nchar(snpinfo$ref) != 1 | nchar(snpinfo$alt) != 1
@@ -93,4 +69,27 @@ write.table(
93
69
  )
94
70
  rm(results_to_save)
95
71
 
96
- plot_variant(results)
72
+ log_info("Plotting variants ...")
73
+ if (is.null(plots) || length(plots) == 0) {
74
+ results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
75
+ results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
76
+ variants <- unique(results$SNP_id)
77
+ } else {
78
+ variants <- names(plots)
79
+ }
80
+ for (variant in variants) {
81
+ log_info("- Variant: {variant}")
82
+ if (is.null(plots[[variant]])) {
83
+ plots[[variant]] <- list(devpars = devpars, which = "TRUE")
84
+ }
85
+ if (is.null(plots[[variant]]$which)) {
86
+ plots[[variant]]$which <- "TRUE"
87
+ }
88
+ if (is.null(plots[[variant]]$devpars)) {
89
+ plots[[variant]]$devpars <- devpars
90
+ }
91
+ res <- results[results$SNP_id == variant, , drop = FALSE]
92
+ res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
93
+
94
+ plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
95
+ }