biopipen 0.31.4__py3-none-any.whl → 0.31.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

@@ -0,0 +1,133 @@
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+
3
+ library(rlang)
4
+ library(dplyr)
5
+ library(ggplot2)
6
+ library(ggprism)
7
+
8
+ theme_set(theme_prism())
9
+
10
+ infiles <- {{in.infiles | r}}
11
+ outdir <- {{out.outdir | r}}
12
+ joboutdir <- {{job.outdir | r}}
13
+ group <- {{envs.group | r}}
14
+
15
+ if (is.character(group)) {
16
+ group <- read.csv(group, header = FALSE, row.names = NULL)
17
+ colnames(group) <- c("Sample", "Group")
18
+ } else if (is.list(group)) {
19
+ group <- do_call(
20
+ rbind,
21
+ lapply(names(group), function(n) data.frame(Sample = group[[n]], Group = n))
22
+ )
23
+ } else if (!is.null(group)) {
24
+ stop(paste0("Invalid group: ", paste0(group, collapse = ", ")))
25
+ }
26
+
27
+ log_info("Reading and merging metrics for each sample ...")
28
+ metrics <- NULL
29
+
30
+ for (infile in infiles) {
31
+ sample <- sub("_prodigy$", "", basename(dirname(infile)))
32
+ log_debug("- Reading metrics from {sample}")
33
+ metric <- read.table(
34
+ infile,
35
+ header = TRUE,
36
+ sep = "\t",
37
+ stringsAsFactors = FALSE,
38
+ check.names = FALSE,
39
+ row.names = NULL)
40
+ metric$Sample <- sample
41
+ metric <- metric %>% select(Sample, everything())
42
+ if (is.null(metrics)) {
43
+ metrics <- metric
44
+ } else {
45
+ metrics <- rbind(metrics, metric)
46
+ }
47
+ }
48
+
49
+ # Save metrics
50
+ write.table(
51
+ metrics,
52
+ file.path(outdir, "metrics.txt"),
53
+ sep = "\t",
54
+ quote = FALSE,
55
+ row.names = FALSE
56
+ )
57
+
58
+ add_report(
59
+ list(kind = "descr", content = "Metrics for all samples"),
60
+ list(kind = "table", src = file.path(outdir, "metrics.txt")),
61
+ h1 = "Metrics of all samples"
62
+ )
63
+
64
+ METRIC_DESCR = list(
65
+ nIC = "No. of intermolecular contacts",
66
+ nCCC = "No. of charged-charged contacts",
67
+ nCPC = "No. of charged-polar contacts",
68
+ nCAPC = "No. of charged-apolar contacts",
69
+ nPPC = "No. of polar-polar contacts",
70
+ nAPPC = "No. of apolar-polar contacts",
71
+ nAPAPC = "No. of apolar-apolar contacts",
72
+ pANISR = "Percentage of apolar NIS residues",
73
+ pCNISR = "Percentage of charged NIS residues",
74
+ BindingAffinity = "Predicted binding affinity (kcal.mol^-1)",
75
+ DissociationConstant = "Predicted dissociation constant (M)"
76
+ )
77
+
78
+ if (!is.null(group)) {
79
+ log_info("Merging group information ...")
80
+ metrics <- group %>%
81
+ left_join(metrics, by = "Sample") %>%
82
+ mutate(Group = factor(Group, levels = unique(Group)))
83
+ }
84
+
85
+ log_info("Plotting Prodigy metrics ...")
86
+ for (metric in names(METRIC_DESCR)) {
87
+ log_info("- {metric}: {METRIC_DESCR[[metric]]}")
88
+
89
+ add_report(
90
+ list(
91
+ kind = "descr",
92
+ content = METRIC_DESCR[[metric]] %||% paste0("Metric: ", metric)
93
+ ),
94
+ h1 = metric
95
+ )
96
+
97
+ # barplot
98
+ p <- ggplot(metrics, aes(x = Sample, y = !!sym(metric))) +
99
+ geom_bar(stat = "identity", fill = "steelblue") +
100
+ labs(x = "Sample", y = metric) +
101
+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
102
+
103
+ figfile <- file.path(outdir, paste0(slugify(metric), ".barplot.png"))
104
+ png(figfile, height = 600, res = 100, width = nrow(metrics) * 30 + 200)
105
+ print(p)
106
+ dev.off()
107
+
108
+ add_report(
109
+ list(src = figfile, name = "By Sample"),
110
+ ui = "table_of_images",
111
+ h1 = metric
112
+ )
113
+
114
+ if (is.null(group)) { next }
115
+ # group: Sample, Group
116
+ p <- ggplot(metrics, aes(x = Group, y = !!sym(metric))) +
117
+ geom_boxplot(fill = "steelblue") +
118
+ labs(x = "Group", y = metric) +
119
+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
120
+
121
+ figfile <- file.path(outdir, paste0(slugify(metric), ".boxplot.png"))
122
+ png(figfile, height = 600, res = 100, width = length(unique(metrics$Group)) * 30 + 200)
123
+ print(p)
124
+ dev.off()
125
+
126
+ add_report(
127
+ list(src = figfile, name = "By Group"),
128
+ ui = "table_of_images",
129
+ h1 = metric
130
+ )
131
+ }
132
+
133
+ save_report(joboutdir)
@@ -1,9 +1,9 @@
1
1
  # Script for regulatory.MotifAffinityTest
2
2
  {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
3
+ {{ biopipen_dir | joinpaths: "scripts", "regulatory", "motifs-common.R" | source_r }}
3
4
 
4
5
  library(BiocParallel)
5
6
  library(BSgenome)
6
- library(universalmotif)
7
7
 
8
8
  motiffile <- {{in.motiffile | r}}
9
9
  varfile <- {{in.varfile | r}}
@@ -45,63 +45,9 @@ if (is.null(motif_col) && is.null(regulator_col)) {
45
45
  log_info("Reading input regulator/motif file ...")
46
46
  in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
47
47
 
48
- if (is.null(motif_col)) {
49
- log_info("Inferring motifs from regulators ...")
50
- if (is.null(regmotifs) || !file.exists(regmotifs)) {
51
- stop("Regulator motifs (envs.regmotifs) is required and must exist when no motif column (envs.motif_col) is provided")
52
- }
53
- regmotifs <- read.table(regmotifs, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
54
- rm_motif_col <- c('Motif', 'motif', 'MOTIF', 'Model', 'model', 'MODEL')
55
- rm_reg_col <- c('Regulator', 'regulator', 'REGULATOR', 'TF', 'tf', 'TF', 'Transcription factor', 'transcription factor', 'Transcription Factor')
56
- rm_motif_col <- intersect(rm_motif_col, colnames(regmotifs))
57
- rm_reg_col <- intersect(rm_reg_col, colnames(regmotifs))
58
- if (length(rm_motif_col) == 0) {
59
- stop("No motif column found in envs.regmotifs, provide one of: ", paste(rm_motif_col, collapse = ", "))
60
- }
61
- if (length(rm_reg_col) == 0) {
62
- stop("No regulator column found in envs.regmotifs, provide one of: ", paste(rm_reg_col, collapse = ", "))
63
- }
64
- rm_motif_col <- rm_motif_col[1]
65
- rm_reg_col <- rm_reg_col[1]
66
- # check regulators
67
- rm_regs <- regmotifs[, rm_reg_col, drop = TRUE]
68
- regulators <- in_motifs[, regulator_col, drop = TRUE]
69
- notfound_regs <- setdiff(regulators, rm_regs)
70
- if (length(notfound_regs) > 0 && notfound == "error") {
71
- first_notfound <- head(notfound_regs, 3)
72
- if (length(notfound_regs) > 3) {
73
- first_notfound <- c(first_notfound, "...")
74
- notfound_file <- file.path(outdir, "notfound_regulators.txt")
75
- writeLines(notfound_regs, notfound_file)
76
- msg1 <- paste0("The following regulators were not found in the envs.regmotifs file: ", paste(first_notfound, collapse = ", "))
77
- msg2 <- paste0("Check the full list in ", notfound_file)
78
- stop(msg1, "\n", msg2)
79
- } else {
80
- msg <- paste0("The following regulators were not found in the regmotifs file: ", paste(first_notfound, collapse = ", "))
81
- stop(msg)
82
- }
83
- }
84
- in_motifs <- in_motifs[in_motifs[, regulator_col] %in% rm_regs, , drop = FALSE]
85
- # add motif column
86
- in_motifs <- merge(in_motifs, regmotifs, by.x = regulator_col, by.y = rm_reg_col, all.x = TRUE, suffixes = c("", "_db"))
87
- motif_col <- rm_motif_col
88
- }
89
- if (is.null(regulator_col)) {
90
- # make motifs unique
91
- in_moitfs <- in_motifs[!duplicated(in_motifs[, motif_col]), , drop = FALSE]
92
- } else {
93
- in_motifs <- in_motifs[!duplicated(in_motifs[, c(regulator_col, motif_col)]), , drop = FALSE]
94
- }
95
-
96
-
97
- if (!grepl(".", genome, fixed = TRUE)) {
98
- genome_pkg = sprintf("BSgenome.Hsapiens.UCSC.%s", genome)
99
- } else {
100
- genome_pkg = genome
101
- }
102
- if (!requireNamespace(genome_pkg, quietly = TRUE)) {
103
- stop(sprintf("Genome package %s is not installed", genome_pkg))
104
- }
48
+ log_info("Ensuring motifs and regulators in the input data ...")
49
+ in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
50
+ genome_pkg <- get_genome_pkg(genome)
105
51
 
106
52
  log_info("Reading variant file ...")
107
53
  if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
@@ -124,91 +70,7 @@ snpinfo <- read.table(varfile, header=FALSE, stringsAsFactors=FALSE)
124
70
  colnames(snpinfo) <- c("chrom", "start", "end", "name", "score", "strand", "ref", "alt")
125
71
 
126
72
  log_info("Reading motif database ...")
127
- meme <- read_meme(motifdb)
128
-
129
- check_motifs <- function(motifdb_names) {
130
- motifs <- in_motifs[, motif_col, drop = TRUE]
131
- notfound_motifs <- setdiff(motifs, motifdb_names)
132
- if (length(notfound_motifs) > 0) {
133
- first_notfound <- head(notfound_motifs, 3)
134
- if (length(notfound_motifs) > 3) {
135
- first_notfound <- c(first_notfound, "...")
136
- notfound_file <- file.path(outdir, "notfound_motifs.txt")
137
- writeLines(notfound_motifs, notfound_file)
138
- msg1 <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
139
- msg2 <- paste0("Check the full list in ", notfound_file)
140
-
141
- if (notfound == "error") {
142
- stop(msg1, "\n", msg2)
143
- } else if (notfound == "ignore") {
144
- log_warn(msg1)
145
- log_warn(msg2)
146
- }
147
- } else {
148
- msg <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
149
- if (notfound == "error") {
150
- stop(msg)
151
- } else if (notfound == "ignore") {
152
- log_warn(msg)
153
- }
154
- }
155
-
156
- motifs <- setdiff(motifs, notfound_motifs)
157
- }
158
- return(motifs)
159
- }
160
-
161
- plot_variant <- function(motifbreakr_results) {
162
- log_info("Plotting variants ...")
163
- plotdir <- file.path(outdir, "plots")
164
- dir.create(plotdir, showWarnings = FALSE)
165
- results <- motifbreakr_results
166
- if (is.null(plots) || length(plots) == 0) {
167
- results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
168
- results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
169
- variants <- unique(results$SNP_id)
170
- } else {
171
- variants <- names(plots)
172
- }
173
- for (variant in variants) {
174
- log_info("- Variant: {variant}")
175
- if (is.null(plots[[variant]])) {
176
- plots[[variant]] <- list(devpars = devpars, which = "TRUE")
177
- }
178
- if (is.null(plots[[variant]]$which)) {
179
- plots[[variant]]$which <- "TRUE"
180
- }
181
- if (is.null(plots[[variant]]$devpars)) {
182
- plots[[variant]]$devpars <- devpars
183
- }
184
- if (is.null(plots[[variant]]$devpars$res)) {
185
- plots[[variant]]$devpars$res <- 100
186
- }
187
- res <- results[results$SNP_id == variant, , drop = FALSE]
188
- if (length(res) == 0) {
189
- stop(sprintf("Variant %s not found in results", variant))
190
- }
191
- res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
192
- if (length(res) == 0) {
193
- stop(sprintf("No variants to plot for %s", variant))
194
- }
195
- plotfile <- file.path(plotdir, sprintf("%s.png", slugify(variant)))
196
- # fix motifBreakR 2.12 using names to filter in plotMB
197
- names(res) <- res$SNP_id
198
- dv <- plots[[variant]]$devpars
199
- if (is.null(dv$height)) {
200
- dv$height <- 2.4 * dv$res + length(res) * 1.2 * dv$res
201
- }
202
- if (is.null(dv$width)) {
203
- left <- min(sapply(res$motifPos, `[`, 1))
204
- right <- max(sapply(res$motifPos, `[`, 2))
205
- dv$width <- 1.5 * dv$res + (right - left) * 0.3 * dv$res
206
- }
207
- png(plotfile, width = dv$width, height = dv$height, res = dv$res)
208
- motifbreakR::plotMB(res, variant)
209
- dev.off()
210
- }
211
- }
73
+ mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfound, outdir)
212
74
 
213
75
  tool <- tolower(tool)
214
76
  tool <- match.arg(tool, c("motifbreakr", "atsnp"))
@@ -1,36 +1,6 @@
1
1
  library(atSNP)
2
2
  library(rtracklayer)
3
3
 
4
- log_info("Converting universalmotif object to motif_library ...")
5
-
6
- motifdb_names <- sapply(meme, function(m) m@name)
7
- motifs <- check_motifs(motifdb_names)
8
- meme <- filter_motifs(meme, name = motifs)
9
- # Get the right order of motif names
10
- motifs <- sapply(meme, function(m) m@name)
11
-
12
- # used for atSNP
13
- mdb <- lapply(meme, function(m) t(m@motif))
14
- names(mdb) <- motifs
15
-
16
- # compose one used for plotting using motifbreakR
17
- motifdb_matrices <- lapply(meme, function(m) m@motif)
18
- names(motifdb_matrices) <- motifs
19
- motifdb_meta <- do.call(rbind, lapply(meme, function(m) {
20
- ats <- attributes(m)
21
- ats$dataSource <- basename(motifdb)
22
- ats$class <- NULL
23
- ats$motif <- NULL
24
- ats$gapinfo <- NULL
25
- ats$sequenceCount <- ats$nsites
26
- ats$providerId <- ats$name
27
- ats$providerName <- ats$name
28
- ats$organism <- if (is.null(ats$organism) || length(ats$organism) == 0) "Unknown" else ats$organism
29
- unlist(ats)
30
- }))
31
- rownames(motifdb_meta) <- motifs
32
- pmotifs <- MotifDb:::MotifList(motifdb_matrices, tbl.metadata = motifdb_meta)
33
-
34
4
  log_info("Converting snpinfo to atSNP object ...")
35
5
 
36
6
  # c("chrom", "start", "end", "name", "score", "strand", "ref", "alt", "ref_seq", "alt_seq")
@@ -53,7 +23,9 @@ write.table(
53
23
  file = atsnp_bed,
54
24
  sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE
55
25
  )
56
- k <- max(sapply(mdb, nrow))
26
+
27
+ motif_lib <- motifdb_to_motiflib(mdb)
28
+ k <- max(sapply(motif_lib, nrow))
57
29
  snps <- LoadSNPData(
58
30
  atsnp_bed,
59
31
  genome.lib = genome_pkg,
@@ -62,13 +34,12 @@ snps <- LoadSNPData(
62
34
  half.window.size = k
63
35
  )
64
36
 
65
- # run motifbreakR
66
37
  log_info("Running atSNP ...")
67
- atsnp_scores <- ComputeMotifScore(mdb, snps, ncores = ncores)
38
+ atsnp_scores <- ComputeMotifScore(motif_lib, snps, ncores = ncores)
68
39
 
69
40
  log_info("Calculating p values ...")
70
41
  atsnp_result <- ComputePValues(
71
- motif.lib = mdb,
42
+ motif.lib = motif_lib,
72
43
  snp.info = snps,
73
44
  motif.scores = atsnp_scores$motif.scores,
74
45
  ncores = ncores,
@@ -101,7 +72,7 @@ atsnp_result$motifPos <- sapply(1:nrow(atsnp_result), function(i) {
101
72
  paste(c(atsnp_result$ref_start[i] - k, atsnp_result$ref_end[i] - k), collapse = ",")
102
73
  })
103
74
  if (!is.null(regulator_col)) {
104
- atsnp_result$Regulator <- in_motifs[
75
+ atsnp_result$geneSymbol <- atsnp_result$Regulator <- in_motifs[
105
76
  match(atsnp_result$providerId, in_motifs[[motif_col]]),
106
77
  regulator_col,
107
78
  drop = TRUE
@@ -120,7 +91,30 @@ atsnp_result$alleleDiff <- -atsnp_result[[cutoff_col]]
120
91
  atsnp_result$effect <- "strong"
121
92
  atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
122
93
  atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
94
+ genome(atsnp_result) <- genome
123
95
  attributes(atsnp_result)$genome.package <- genome_pkg
124
- attributes(atsnp_result)$motifs <- pmotifs
96
+ attributes(atsnp_result)$motifs <- mdb
97
+
98
+ if (is.null(plots) || length(plots) == 0) {
99
+ atsnp_result <- atsnp_result[order(-abs(atsnp_result$alleleDiff)), , drop = FALSE]
100
+ atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
101
+ variants <- unique(atsnp_result$SNP_id)
102
+ } else {
103
+ variants <- names(plots)
104
+ }
105
+ for (variant in variants) {
106
+ log_info("- Variant: {variant}")
107
+ if (is.null(plots[[variant]])) {
108
+ plots[[variant]] <- list(devpars = devpars, which = "TRUE")
109
+ }
110
+ if (is.null(plots[[variant]]$which)) {
111
+ plots[[variant]]$which <- "TRUE"
112
+ }
113
+ if (is.null(plots[[variant]]$devpars)) {
114
+ plots[[variant]]$devpars <- devpars
115
+ }
116
+ res <- atsnp_result[atsnp_result$SNP_id == variant, , drop = FALSE]
117
+ res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
125
118
 
126
- plot_variant(atsnp_result)
119
+ plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
120
+ }
@@ -1,30 +1,6 @@
1
1
  library(motifbreakR)
2
- bsgenome <- getBSgenome(genome_pkg)
3
-
4
- log_info("Converting universalmotif object to MotifDb object ...")
5
-
6
- motifdb_names <- sapply(meme, function(m) m@name)
7
- motifs <- check_motifs(motifdb_names)
8
- meme <- filter_motifs(meme, name = motifs)
9
- # Get the right order of motif names
10
- motifs <- sapply(meme, function(m) m@name)
11
- motifdb_matrices <- lapply(meme, function(m) m@motif)
12
- names(motifdb_matrices) <- motifs
13
2
 
14
- motifdb_meta <- do.call(rbind, lapply(meme, function(m) {
15
- ats <- attributes(m)
16
- ats$dataSource <- basename(motifdb)
17
- ats$class <- NULL
18
- ats$motif <- NULL
19
- ats$gapinfo <- NULL
20
- ats$sequenceCount <- ats$nsites
21
- ats$providerId <- ats$name
22
- ats$providerName <- ats$name
23
- ats$organism <- if (is.null(ats$organism) || length(ats$organism) == 0) "Unknown" else ats$organism
24
- unlist(ats)
25
- }))
26
- rownames(motifdb_meta) <- motifs
27
- mdb <- MotifDb:::MotifList(motifdb_matrices, tbl.metadata = motifdb_meta)
3
+ bsgenome <- getBSgenome(genome_pkg)
28
4
 
29
5
  # `chrom`, `start`, `end`, `name`, `score`, `strand`, `ref`, `alt`.
30
6
  is_indel <- nchar(snpinfo$ref) != 1 | nchar(snpinfo$alt) != 1
@@ -93,4 +69,27 @@ write.table(
93
69
  )
94
70
  rm(results_to_save)
95
71
 
96
- plot_variant(results)
72
+ log_info("Plotting variants ...")
73
+ if (is.null(plots) || length(plots) == 0) {
74
+ results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
75
+ results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
76
+ variants <- unique(results$SNP_id)
77
+ } else {
78
+ variants <- names(plots)
79
+ }
80
+ for (variant in variants) {
81
+ log_info("- Variant: {variant}")
82
+ if (is.null(plots[[variant]])) {
83
+ plots[[variant]] <- list(devpars = devpars, which = "TRUE")
84
+ }
85
+ if (is.null(plots[[variant]]$which)) {
86
+ plots[[variant]]$which <- "TRUE"
87
+ }
88
+ if (is.null(plots[[variant]]$devpars)) {
89
+ plots[[variant]]$devpars <- devpars
90
+ }
91
+ res <- results[results$SNP_id == variant, , drop = FALSE]
92
+ res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
93
+
94
+ plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
95
+ }
@@ -0,0 +1,76 @@
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "scripts", "regulatory", "motifs-common.R" | source_r }}
3
+
4
+ library(BSgenome)
5
+ library(GenomicRanges)
6
+
7
+ infile <- {{in.infile | r}}
8
+ outdir <- {{out.outdir | r}}
9
+ genome <- {{envs.genome | r}}
10
+ motifdb <- {{envs.motifdb | r}}
11
+ motif_col <- {{envs.motif_col | r}}
12
+ regulator_col <- {{envs.regulator_col | r}}
13
+ regmotifs <- {{envs.regmotifs | r}}
14
+ notfound <- {{envs.notfound | r}}
15
+ devpars <- {{envs.devpars | r}}
16
+ plot_vars <- {{envs.plot_vars | r}}
17
+
18
+ if (is.null(motifdb) || !file.exists(motifdb)) {
19
+ stop("Motif database (envs.motifdb) is required and must exist")
20
+ }
21
+
22
+ if (is.null(genome)) {
23
+ stop("Reference genome (envs.ref) is required and must exist")
24
+ }
25
+
26
+ if (is.null(motif_col) && is.null(regulator_col)) {
27
+ stop("Either motif (envs.motif_col) or regulator (envs.regulator_col) column must be provided")
28
+ }
29
+
30
+ log_info("Reading input data ...")
31
+ indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
32
+
33
+ log_info("Ensuring regulators in the input data ...")
34
+ indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
35
+ genome_pkg <- get_genome_pkg(genome)
36
+
37
+ log_info("Reading motif database ...")
38
+ meme <- read_meme_to_motifdb(motifdb, indata, motif_col, regulator_col, notfound, outdir)
39
+
40
+ log_info("Composing motifbreakR results from input data ...")
41
+ indata$chr <- indata$chrom %||% indata$chr %||% indata$seqnames
42
+ indata$seqnames <- NULL
43
+ indata$strand <- indata$strand %||% "+"
44
+ indata$varType <- indata$varType %||% "SNV"
45
+ indata$geneSymbol <- indata$geneSymbol %||% indata$Regulator
46
+ indata$providerId <- indata$providerId %||% indata$motif
47
+ indata$providerName <- indata$providerName %||% indata$providerId
48
+ indata$dataSource <- indata$dataSource %||% strsplit(basename(motifdb), "\\.")[[1]][1]
49
+ indata$effect <- indata$effect %||% "strong"
50
+ indata$altPos <- indata$altPos %||% 1
51
+ indata$alleleDiff <- indata$alleleDiff %||% indata$score %||% 0
52
+
53
+ # check other required columns
54
+ for (col in c("start", "end", "SNP_id", "REF", "ALT", "motifPos")) {
55
+ if (!(col %in% colnames(indata))) {
56
+ stop("Column '", col, "' is required in the input data")
57
+ }
58
+ }
59
+ indata$motifPos <- lapply(indata$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
60
+ indata <- makeGRangesFromDataFrame(indata, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
61
+ genome(indata) <- genome
62
+ attributes(indata)$genome.package <- genome_pkg
63
+ attributes(indata)$motifs <- meme
64
+
65
+ log_info("Plotting variants ...")
66
+ if (is.null(plot_vars)) {
67
+ plot_vars <- unique(indata$SNP_id)
68
+ } else if (length(plot_vars) > 1) {
69
+ plot_vars <- unique(plot_vars)
70
+ } else {
71
+ plot_vars <- strsplit(plot_vars, ",")[[1]]
72
+ }
73
+ for (pvar in plot_vars) {
74
+ log_info("- Variant: {pvar}")
75
+ plot_variant_motifs(indata, pvar, devpars, outdir)
76
+ }