biopipen 0.31.5__py3-none-any.whl → 0.31.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

@@ -0,0 +1,322 @@
1
+ # make sure biopipen/utils/misc.R is loaded, log_warn is defined, and slugify is defined
2
+
3
+ library(rlang)
4
+ library(universalmotif)
5
+ library(MotifDb)
6
+
7
+ #' @title Common functions for regulatory analysis
8
+ #' @name regulatory-common
9
+ #' @author Panwen Wang
10
+
11
+ #' Read a regulator-motif mapping file
12
+ #'
13
+ #' @param rmfile Regulator-motif mapping file
14
+ #' @param motif_cols_allowed Allowed motif columns
15
+ #' @param reg_cols_allowed Allowed regulator columns
16
+ #' @return Data frame with regulators and motifs in the first and second columns, respectively
17
+ .read_regmotifs <- function(
18
+ rmfile,
19
+ motif_cols_allowed = c("Motif", "motif", "MOTIF", "Model", "model", "MODEL"),
20
+ reg_cols_allowed = c("Regulator", "regulator", "REGULATOR", "TF", "tf", "TF")
21
+ ) {
22
+ if (!file.exists(rmfile)) {
23
+ stop("Regulator-motif mapping file does not exist.")
24
+ }
25
+ regmotifs <- read.table(rmfile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
26
+ rm_motif_col <- intersect(motif_cols_allowed, colnames(regmotifs))
27
+ rm_reg_col <- intersect(reg_cols_allowed, colnames(regmotifs))
28
+ if (length(rm_motif_col) == 0) {
29
+ stop(paste0("No motif column found in the regulator-motif mapping file, provide one of: ", paste(motif_cols_allowed, collapse = ", ")))
30
+ }
31
+ if (length(rm_reg_col) == 0) {
32
+ stop(paste0("No regulator column found in the regulator-motif mapping file, provide one of: ", paste(reg_cols_allowed, collapse = ", ")))
33
+ }
34
+ if (length(rm_motif_col) > 1) {
35
+ stop(paste0("Multiple motif columns found (", paste(rm_motif_col, collapse = ", "), ") in the regulator-motif mapping file, provide only one"))
36
+ }
37
+ if (length(rm_reg_col) > 1) {
38
+ stop(paste0("Multiple regulator columns found (", paste(rm_reg_col, collapse = ", "), ") in the regulator-motif mapping file, provide only one"))
39
+ }
40
+ rm_motif_col <- rm_motif_col[1]
41
+ rm_reg_col <- rm_reg_col[1]
42
+ regmotifs <- regmotifs[, c(rm_motif_col, rm_reg_col), drop = FALSE]
43
+
44
+ return(regmotifs)
45
+ }
46
+
47
+ #' Handle not found items
48
+ #'
49
+ #' @param notfound_items Items that were not found
50
+ #' @param log_warn Function to log warnings
51
+ #' @param msg Message to display
52
+ #' @param notfound Action to take if items are not found
53
+ #' @param notfound_file File to save the full list of not found items
54
+ #' @param log_indent Indentation for log messages
55
+ .handle_notfound_items <- function (notfound_items, log_warn, msg, notfound, notfound_file, log_indent = "") {
56
+ if (length(notfound_items) > 0) {
57
+ first_notfound <- head(notfound_items, 3)
58
+ if (length(notfound_items) > 3) {
59
+ first_notfound <- c(first_notfound, "...")
60
+ writeLines(notfound_items, notfound_file)
61
+ msg1 <- paste0(log_indent, msg, ": ", paste(first_notfound, collapse = ", "))
62
+ msg2 <- paste0(log_indent, "Check the full list in ", notfound_file)
63
+ if (notfound == "error") {
64
+ stop(msg1, "\n", msg2)
65
+ } else if (notfound == "ignore") {
66
+ log_warn(msg1)
67
+ log_warn(msg2)
68
+ }
69
+ } else {
70
+ msg <- paste0(log_indent, msg, ": ", paste(first_notfound, collapse = ", "))
71
+ if (notfound == "error") {
72
+ stop(msg)
73
+ } else if (notfound == "ignore") {
74
+ log_warn(msg)
75
+ }
76
+ }
77
+ }
78
+ }
79
+
80
+ #' Read a MEME file to a MotifDb object
81
+ #' and filter the motifs based on the input data
82
+ #' and return the filtered MotifDb object
83
+ #' with metadata
84
+ #'
85
+ #' @param motifdb MEME file
86
+ #' @param indata Input data frame
87
+ #' @param motif_col Column name for the motif
88
+ #' @param regulator_col Column name for the regulator
89
+ #' @param notfound Action to take if motifs are not found
90
+ #' @param outdir Output directory, used to save un-matched motifs
91
+ #' @return MotifDb object
92
+ #' @export
93
+ read_meme_to_motifdb <- function(motifdb, indata, motif_col, regulator_col, notfound, outdir) {
94
+ meme <- read_meme(motifdb)
95
+ motifdb_names <- sapply(meme, function(m) m@name)
96
+ motifs <- check_motifs(indata[[motif_col]], motifdb_names, notfound, outdir)
97
+ meme <- filter_motifs(meme, name = motifs)
98
+ # Get the right order of motif names
99
+ motifs <- sapply(meme, function(m) m@name)
100
+ motifdb_matrices <- lapply(meme, function(m) m@motif)
101
+ names(motifdb_matrices) <- motifs
102
+ motifdb_meta <- do.call(rbind, lapply(meme, function(m) {
103
+ ats <- attributes(m)
104
+ ats$dataSource <- strsplit(basename(motifdb), "\\.")[[1]][1]
105
+ ats$class <- NULL
106
+ ats$motif <- NULL
107
+ ats$gapinfo <- NULL
108
+ ats$sequenceCount <- ats$nsites
109
+ ats$providerId <- ats$name
110
+ ats$providerName <- ats$name
111
+ ats$organism <- if (is.null(ats$organism) || length(ats$organism) == 0) "Unknown" else ats$organism
112
+ if (!is.null(regulator_col)) {
113
+ ats$geneSymbol <- indata[
114
+ indata[[motif_col]] == ats$name,
115
+ regulator_col,
116
+ drop = TRUE
117
+ ]
118
+ }
119
+ unlist(ats)
120
+ })
121
+ )
122
+ rownames(motifdb_meta) <- motifs
123
+ MotifDb:::MotifList(motifdb_matrices, tbl.metadata = motifdb_meta)
124
+ }
125
+
126
+ #' Convert a MotifDb object to a motif library
127
+ #' with motif names as keys
128
+ #' and PWMs as values
129
+ #' @param motifdb MotifDb object
130
+ #' @return Motif library
131
+ #' @export
132
+ motifdb_to_motiflib <- function(motifdb) {
133
+ lapply(motifdb, t)
134
+ }
135
+
136
+ #' Make sure the regulators and motifs in the input data from a regulator-motif mappings
137
+ #'
138
+ #' @param indata Input data frame
139
+ #' @param outdir Output directory, used to save un-matched regulators
140
+ #' @param motif_col Column name for the motif
141
+ #' @param regulator_col Column name for the regulator
142
+ #' @param regmotifs Regulator-motif mapping file
143
+ #' @param log_indent Indentation for log messages
144
+ #' @param notfound Action to take if regulators are not found in the mapping file
145
+ #' @return Data frame with regulators and motifs
146
+ #' @export
147
+ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error") {
148
+ if (is.null(motif_col)) {
149
+ if (is.null(regmotifs)) {
150
+ stop("Regulator-motif mapping file (envs.regmotifs) is required when no motif column (envs.motif_col) is provided")
151
+ }
152
+ regmotifs <- .read_regmotifs(regmotifs)
153
+ rm_motif_col <- colnames(regmotifs)[1]
154
+ rm_reg_col <- colnames(regmotifs)[2]
155
+ # check regulators
156
+ rm_regs <- regmotifs[[rm_reg_col]]
157
+ regulators <- indata[[regulator_col]]
158
+ notfound_regs <- setdiff(regulators, rm_regs)
159
+ .handle_notfound_items(
160
+ notfound_regs,
161
+ log_warn,
162
+ "The following regulators were not found in the regulator-motif mapping file",
163
+ notfound,
164
+ file.path(outdir, "notfound_regulators.txt"),
165
+ log_indent
166
+ )
167
+ indata <- indata[indata[[regulator_col]] %in% rm_regs, , drop = FALSE]
168
+ # add motif column
169
+ indata <- merge(indata, regmotifs, by.x = regulator_col, by.y = rm_reg_col, all.x = TRUE, suffixes = c("", "_db"))
170
+ # update motif column
171
+ motif_col <<- rm_motif_col
172
+ } else if (is.null(regulator_col)) {
173
+ if (is.null(regmotifs) || (is.character(regmotifs) && nchar(regmotifs) == 0)) {
174
+ # make motifs unique
175
+ indata <- indata[!duplicated(indata[[motif_col]]), , drop = FALSE]
176
+ } else if (!file.exists(regmotifs)) {
177
+ stop("Regulator-motif mapping file (envs.regmotifs) does not exist.")
178
+ } else {
179
+ # map the regulators
180
+ regmotifs <- .read_regmotifs(regmotifs)
181
+ rm_motif_col <- colnames(regmotifs)[1]
182
+ rm_reg_col <- colnames(regmotifs)[2]
183
+ rm_motifs <- regmotifs[[rm_motif_col]]
184
+ motifs <- indata[[motif_col]]
185
+ notfound_motifs <- setdiff(motifs, rm_motifs)
186
+ .handle_notfound_items(
187
+ notfound_motifs,
188
+ log_warn,
189
+ "The following motifs were not found in the regulator-motif mapping file",
190
+ notfound,
191
+ file.path(outdir, "notfound_motifs.txt"),
192
+ log_indent
193
+ )
194
+ indata <- indata[indata[[motif_col]] %in% rm_motifs, , drop = FALSE]
195
+ # add regulator column
196
+ indata <- merge(indata, regmotifs, by.x = motif_col, by.y = rm_motif_col, all.x = TRUE, suffixes = c("", "_db"))
197
+ # update regulator column
198
+ regulator_col <<- rm_reg_col
199
+ }
200
+ } else {
201
+ indata <- indata[!duplicated(indata[, c(regulator_col, motif_col), drop = FALSE]), , drop = FALSE]
202
+ }
203
+
204
+ return(indata)
205
+ }
206
+
207
+ #' Get the genome package name for a given genome
208
+ #'
209
+ #' @param genome Genome name
210
+ #' @return Genome package name
211
+ #' @export
212
+ get_genome_pkg <- function(genome) {
213
+ if (!grepl(".", genome, fixed = TRUE)) {
214
+ genome_pkg = sprintf("BSgenome.Hsapiens.UCSC.%s", genome)
215
+ } else {
216
+ genome_pkg = genome
217
+ }
218
+ if (!requireNamespace(genome_pkg, quietly = TRUE)) {
219
+ stop(sprintf("Genome package %s is not installed", genome_pkg))
220
+ }
221
+
222
+ library(package = genome_pkg, character.only = TRUE)
223
+ return(genome_pkg)
224
+ }
225
+
226
+ #' Check if motifs are in the motif database
227
+ #' and return the motifs that are found
228
+ #'
229
+ #' @param motifs Motifs to check
230
+ #' @param all_motifs All motifs in the motif database
231
+ #' @param notfound Action to take if motifs are not found
232
+ #' @param outdir Output directory, used to save un-matched motifs
233
+ #' @return Motifs that are found
234
+ #' @export
235
+ check_motifs <- function(motifs, all_motifs, notfound, outdir) {
236
+ notfound_motifs <- setdiff(motifs, all_motifs)
237
+ if (length(notfound_motifs) > 0) {
238
+ first_notfound <- head(notfound_motifs, 3)
239
+ if (length(notfound_motifs) > 3) {
240
+ first_notfound <- c(first_notfound, "...")
241
+ notfound_file <- file.path(outdir, "notfound_motifs.txt")
242
+ writeLines(notfound_motifs, notfound_file)
243
+ msg1 <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
244
+ msg2 <- paste0("Check the full list in ", notfound_file)
245
+
246
+ if (notfound == "error") {
247
+ stop(msg1, "\n", msg2)
248
+ } else if (notfound == "ignore") {
249
+ log_warn(msg1)
250
+ log_warn(msg2)
251
+ }
252
+ } else {
253
+ msg <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
254
+ if (notfound == "error") {
255
+ stop(msg)
256
+ } else if (notfound == "ignore") {
257
+ log_warn(msg)
258
+ }
259
+ }
260
+
261
+ motifs <- setdiff(motifs, notfound_motifs)
262
+ }
263
+ return(motifs)
264
+ }
265
+
266
+ #' Plot a genomic region surrounding a genomic variant, and
267
+ #' potentially disrupted motifs.
268
+ #'
269
+ #' @param results The motifbreakR results.
270
+ #' A GRanges object with the following columns:
271
+ #' - seqnames: Chromosome
272
+ #' - ranges: Start and end positions
273
+ #' - strand: Strand
274
+ #' -------------------
275
+ #' - SNP_id: Variant ID
276
+ #' - REF: Reference allele
277
+ #' - ALT: Alternative allele
278
+ #' - varType: Variant type. By default, "SNV"
279
+ #' - motifPos: Motif positions
280
+ #' - geneSymbol: Gene symbol, if not provided, try to get from the Regulator column
281
+ #' - dataSource: Motif database source
282
+ #' - providerName: Motif name
283
+ #' - providerId: Motif ID
284
+ #' - effect: Effect of the variant. By default, "strong"
285
+ #' - altPos: Alternative allele position. By default, 1
286
+ #' - alleleDiff: Allele difference, default 0, does not affect the plot for SNVs
287
+ #'
288
+ #' Attributes:
289
+ #' - genome.package: Genome package name
290
+ #' - motifs: Motif database, in MotifDb::MotifList format
291
+ #' @param variant Variant ID to be plotted
292
+ #' @param devpars List of device parameters
293
+ #' - res: Resolution, default 100
294
+ #' - width: Width of the plot, default NULL, calculated based on sequence length
295
+ #' - height: Height of the plot, default NULL, calculated based on the number of motifs
296
+ #' @param outdir Output directory. Plots will be saved in the sub-directory "<outdir>/plots/"
297
+ #' @export
298
+ plot_variant_motifs <- function(results, variant, devpars, outdir) {
299
+ plotdir <- file.path(outdir, "plots")
300
+ dir.create(plotdir, showWarnings = FALSE)
301
+
302
+ res <- results[results$SNP_id == variant, , drop = FALSE]
303
+ devpars <- devpars %||% list(res = 100, width = NULL, height = NULL)
304
+ if (length(res) == 0) {
305
+ stop(sprintf("Variant %s not found in results", variant))
306
+ }
307
+ devpars$res <- devpars$res %||% 100
308
+ devpars$height <- devpars$height %||% 2.4 * devpars$res + length(res) * 1.2 * devpars$res
309
+ if (is.null(devpars$width)) {
310
+ left <- min(sapply(res$motifPos, `[`, 1))
311
+ right <- max(sapply(res$motifPos, `[`, 2))
312
+ devpars$width <- 1.5 * devpars$res + (right - left) * 0.3 * devpars$res
313
+ devpars$width <- max(devpars$width, 5 * devpars$res)
314
+ }
315
+
316
+ plotfile <- file.path(plotdir, sprintf("%s.png", slugify(variant)))
317
+ # fix motifBreakR 2.12 using names to filter in plotMB
318
+ names(res) <- res$SNP_id
319
+ png(plotfile, width = devpars$width, height = devpars$height, res = devpars$res)
320
+ motifbreakR::plotMB(res, variant)
321
+ dev.off()
322
+ }
@@ -1,13 +1,15 @@
1
+ # shellcheck disable=SC1083
1
2
  compvcf={{in.compvcf | quote}}
2
3
  basevcf={{in.basevcf | quote}}
3
4
  outdir={{out.outdir | quote}}
4
5
  truvari={{envs.truvari | quote}}
5
6
  ref={{envs.ref | quote}}
6
7
  refdist={{envs.refdist | quote}}
7
- pctsim={{envs.pctsim | quote}}
8
+ pctseq={{envs.pctseq | quote}}
8
9
  pctsize={{envs.pctsize | quote}}
9
10
  pctovl={{envs.pctovl | quote}}
10
11
  sizemax={{envs.sizemax | default: 50000 | quote}}
12
+ # shellcheck disable=SC1054
11
13
  {% if envs.typeignore %}
12
14
  typeignore="--typeignore"
13
15
  {% else %}
@@ -15,20 +17,25 @@ typeignore=""
15
17
  {% endif %}
16
18
  {% if envs.multimatch %}
17
19
  multimatch="--multimatch"
20
+ # shellcheck disable=SC1009
18
21
  {% else %}
19
22
  multimatch=""
23
+ # shellcheck disable=SC1073
20
24
  {% endif %}
21
25
 
22
26
  rm -rf $outdir
23
- $truvari bench \
24
- -c "$compvcf" \
25
- -b "$basevcf" \
26
- -f "$ref" \
27
+ cmd="$truvari bench \
28
+ -c '$compvcf' \
29
+ -b '$basevcf' \
30
+ -f '$ref' \
27
31
  --refdist $refdist \
28
- --pctsim $pctsim \
32
+ --pctseq $pctseq \
29
33
  --pctsize $pctsize \
30
34
  --pctovl $pctovl \
31
35
  --sizemax $sizemax \
32
36
  $typeignore \
33
37
  $multimatch \
34
- -o $outdir
38
+ -o $outdir"
39
+
40
+ echo "$cmd"
41
+ eval "$cmd"
@@ -17,7 +17,7 @@ read_summary = function() {
17
17
 
18
18
  summaries = NULL
19
19
  for (indir in indirs) {
20
- summary = fromJSON(file=file.path(indir, "summary.txt"))
20
+ summary = fromJSON(file=file.path(indir, "summary.json"))
21
21
  summary$gt_matrix = NULL
22
22
  summary$Sample = sub(".truvari_bench", "", basename(indir), fixed=T)
23
23
  summaries = bind_rows(summaries, summary)
@@ -43,7 +43,6 @@ plot_summary = function(col) {
43
43
  summaries,
44
44
  "col",
45
45
  list(mapping = aes_string(x = "Sample", y = bQuote(col), fill = "Sample")),
46
-
47
46
  devpars = get_devpars(),
48
47
  outfile = outfile
49
48
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biopipen
3
- Version: 0.31.5
3
+ Version: 0.31.6
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
@@ -1,4 +1,4 @@
1
- biopipen/__init__.py,sha256=VSx4_WLVLq_7UtX4GtNLbObe0lMQRa5JR9eh0ofSz4A,23
1
+ biopipen/__init__.py,sha256=KU7MsdICtcB5jVm5DAaNainBCUqYItaZLSuj12ONgkE,23
2
2
  biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
4
4
  biopipen/core/config.toml,sha256=7IXvviRicZ2D1h6x3BVgbLJ96nsh-ikvZ0sVlQepqFE,1944
@@ -20,7 +20,7 @@ biopipen/ns/gsea.py,sha256=EsNRAPYsagaV2KYgr4Jv0KCnZGqayM209v4yOGGTIOI,7423
20
20
  biopipen/ns/misc.py,sha256=qXcm0RdR6W-xpYGgQn3v7JBeYRWwVm5gtgSj2tdVxx4,2935
21
21
  biopipen/ns/plot.py,sha256=XzLq0A8qCIQRbxhPEdWhEfbRZ8g3e4KriVz0RP8enNY,18078
22
22
  biopipen/ns/protein.py,sha256=33pzM-gvBTw0jH60mvfqnriM6uw2zj3katZ82nC9owI,3309
23
- biopipen/ns/regulatory.py,sha256=qvc9QrwgwCI_lg0DQ2QOZbAhC8BAD1qnQXSGtAGlVcY,11750
23
+ biopipen/ns/regulatory.py,sha256=gJjGVpJrdv-rg2t5UjK4AGuvtLNymaNYNvoD8PhlbvE,15929
24
24
  biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
25
25
  biopipen/ns/scrna.py,sha256=fXP_h7gchcuk_Jwos0IgY_P8ON6Q995OgKHgdrxfvAY,112868
26
26
  biopipen/ns/scrna_metabolic_landscape.py,sha256=6AhaynGG3lNRi96N2tReVT46BJMuEwooSSd2irBoN80,28347
@@ -28,7 +28,7 @@ biopipen/ns/snp.py,sha256=-Jx5Hsv_7KV7TqLU0nHCaPkMEN0CFdi4tNVlyq0rUZ4,27259
28
28
  biopipen/ns/stats.py,sha256=DlPyK5Vsg6ZEkV9SDS3aAw21eXzvOHgqeZDkXPhg7go,20509
29
29
  biopipen/ns/tcgamaf.py,sha256=AFbUJIxiMSvsVY3RcHgjRFuMnNh2DG3Mr5slLNEyz6o,1455
30
30
  biopipen/ns/tcr.py,sha256=0PCF8iPZ629z6P3RHoAWEpMWmuDslomTGcMopjqvXmE,88304
31
- biopipen/ns/vcf.py,sha256=zidwskYZ3IIY1sAKYp6WXOiEOdrJjw438JQW1TC7l9s,22694
31
+ biopipen/ns/vcf.py,sha256=OYWuAWADba1xLwvHmIPwXYin_rUaAFQq7N38DQvoYzs,22746
32
32
  biopipen/ns/web.py,sha256=4itJzaju8VBARIyZjDeh5rsVKpafFq_whixnvL8sXes,5368
33
33
  biopipen/reports/bam/CNAClinic.svelte,sha256=D4IxQcgDCPQZMbXog-aZP5iJEQTK2N4i0C60e_iXyfs,213
34
34
  biopipen/reports/bam/CNVpytor.svelte,sha256=s03SlhbEPd8-_44Dy_cqE8FSErhUdqStLK39te5o7ZE,1364
@@ -126,12 +126,12 @@ biopipen/scripts/plot/Scatter.R,sha256=fg4H5rgdr6IePTMAIysiElnZme0vCh1T0wrwH2Q9x
126
126
  biopipen/scripts/plot/VennDiagram.R,sha256=Am9umSGr2QxZc2MIMGMBhpoEqta3qt_ItF-9_Y53SXE,704
127
127
  biopipen/scripts/protein/Prodigy.py,sha256=W56e51SkaWqthrkCSr2HUqhE9NfJQWZj4y0HXIqaYRA,4459
128
128
  biopipen/scripts/protein/ProdigySummary.R,sha256=1s3ofk6Kvs--GAAvzV8SdAyq5LrYozWtIlL32b6ZarE,3806
129
- biopipen/scripts/regulatory/MotifAffinityTest.R,sha256=1sR3sWRZbxDeFO290LcpzZglzOLc13SSvibDON96PCg,8852
130
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R,sha256=SAyTm2-6g5qVJFRrLxEY0QJrLWTkwDi_J_9J7HhtTN8,4438
131
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R,sha256=wCK4tLx1iWh_OwW7ZvLTCjTGWCIfVqw-lYC0-JqIPqg,3338
129
+ biopipen/scripts/regulatory/MotifAffinityTest.R,sha256=McAnbduE_6SMD_4RuftBemPdfJD9LeFYUYwqL3fzfjU,3047
130
+ biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R,sha256=lzP3EtmpMucWviLDgXLeP_JvG4VADykBOl49CkftiR8,4366
131
+ biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R,sha256=WpkPVNvWonmZqQ8khdDg2VHhda7ZHexvLFGRz4qgv88,3304
132
132
  biopipen/scripts/regulatory/MotifScan.py,sha256=WtSbs8z08oeTgzjr0LfIDmjbUdknAh1raa_QPQ_NCFg,5336
133
- biopipen/scripts/regulatory/atSNP.R,sha256=TXJARbE0rDIzSq6Spacz_HsM_DXdREJ95ZsBg26trgw,1288
134
- biopipen/scripts/regulatory/motifBreakR.R,sha256=trzvvTvwc5gSO427wkqx93FecuQxLGa9kNqodKa8S0U,70236
133
+ biopipen/scripts/regulatory/VariantMotifPlot.R,sha256=RNnBc0bboGfrJOPk5CsUbFRBMBvVX8zgGsrI5eybNyo,2874
134
+ biopipen/scripts/regulatory/motifs-common.R,sha256=7deFrEzZKYzNhmYTsBqqb91CbIj2vtF7lRiPX0yGkO8,13277
135
135
  biopipen/scripts/rnaseq/Simulation-ESCO.R,sha256=68cEHDdJclX8P8Q7ey9yBOfK09M_kxlL6zgYXsEL2Rs,6378
136
136
  biopipen/scripts/rnaseq/Simulation-RUVcorr.R,sha256=6C6Ke5RLF0fC2V9WQPoFEdqoDabCnhslZBIyB6zhIxc,1155
137
137
  biopipen/scripts/rnaseq/Simulation.R,sha256=PK9tZS88AcBPStcFalZlMU0KE0gSqFSQvhUoQ-8eg90,871
@@ -250,8 +250,8 @@ biopipen/scripts/vcf/BcftoolsAnnotate.py,sha256=iS-T6IhumqePW5kdyi_Tb6rubyIiCMjS
250
250
  biopipen/scripts/vcf/BcftoolsFilter.py,sha256=AdQMXFTNLCS5eqYWMNIMbkK8qXJ5j8GYm7HdPopVk0c,2573
251
251
  biopipen/scripts/vcf/BcftoolsSort.py,sha256=tU0pTrEIB-7x6iOSfU-KpYY1rEidi6Q4179NntY3cGc,3778
252
252
  biopipen/scripts/vcf/BcftoolsView.py,sha256=Sj3KkYPpwQFo5kmZC5MRxItrSE5KVZi0jNYrRFck3Ow,2465
253
- biopipen/scripts/vcf/TruvariBench.sh,sha256=80yLQ73OzSgsJ4ltzgpcWxYvvX1hFnCG8YSBhhhRQ9Y,765
254
- biopipen/scripts/vcf/TruvariBenchSummary.R,sha256=Q1VCYqOZ-VI8lgXW6Yqmw6NkLH168e7V6eYFbr0W4EE,1516
253
+ biopipen/scripts/vcf/TruvariBench.sh,sha256=5M7lZhO4laNJvCVCHudf8DYArKNXoiPWuSkXgRi2t_A,908
254
+ biopipen/scripts/vcf/TruvariBenchSummary.R,sha256=rdNNIPoiwqnK6oEOlQUUhnL1MF958W_nDjRCkA5ubz4,1516
255
255
  biopipen/scripts/vcf/TruvariConsistency.R,sha256=6h20v8qztbl1KZInJwoSK_t5XwqhKMTMzPWNPhoAjlM,2314
256
256
  biopipen/scripts/vcf/Vcf2Bed.py,sha256=LzyJ9qW1s5mbfF8maLc77_0rE98KMc2lq1R94_NFbSU,855
257
257
  biopipen/scripts/vcf/VcfAnno.py,sha256=FW626rAs_WSU7AHQMKjfkYoByUGh_gVyJM97neGfOMo,802
@@ -284,7 +284,7 @@ biopipen/utils/reference.py,sha256=oi5evicLwHxF0KAIPNZohBeHJLJQNWFJH0cr2y5pgcg,5
284
284
  biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
285
285
  biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
286
286
  biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
287
- biopipen-0.31.5.dist-info/METADATA,sha256=mRJi-cY3E8tLValjumEgu28oAiy5NNpFMQRsrNiRPVg,882
288
- biopipen-0.31.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
289
- biopipen-0.31.5.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
290
- biopipen-0.31.5.dist-info/RECORD,,
287
+ biopipen-0.31.6.dist-info/METADATA,sha256=2NGpF5QMNq7lG0y8MQIGpfFYyRE9lYz17RpF6dEtq0k,882
288
+ biopipen-0.31.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
289
+ biopipen-0.31.6.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
290
+ biopipen-0.31.6.dist-info/RECORD,,
@@ -1,33 +0,0 @@
1
- snpinfo2atsnp <- function(snpinfo) {
2
- # c("chrom", "start", "end", "name", "score", "strand", "ref", "alt", "ref_seq", "alt_seq")
3
- if (any(nchar(snpinfo$ref) != 1) || any(nchar(snpinfo$alt) != 1)) {
4
- stop("Only SNVs are supported by atSNP. Consider using motifbreakR instead if you have indels.")
5
- }
6
- base_encodings <- c(A = 1, C = 2, G = 3, T = 4)
7
- transition <- matrix(
8
- c(
9
- 0.3225035, 0.1738422, 0.24915044, 0.2545039,
10
- 0.3451410, 0.2642147, 0.05245011, 0.3381942,
11
- 0.2813089, 0.2136604, 0.26749171, 0.2375390,
12
- 0.2149776, 0.2071733, 0.25309238, 0.3247568
13
- ),
14
- nrow = 4,
15
- byrow = TRUE
16
- )
17
- rownames(transition) <- colnames(transition) <- names(base_encodings)
18
- list(
19
- sequence_matrix = unname(sapply(
20
- snpinfo$ref_seq,
21
- function(s) as.integer(base_encodings[strsplit(s, "")[[1]]])
22
- )),
23
- ref_base = as.integer(base_encodings[snpinfo$ref]),
24
- snp_base = as.integer(base_encodings[snpinfo$alt]),
25
- snpids = snpinfo$name,
26
- transition = transition,
27
- prior = c(A = 0.287, C = 0.211, G = 0.213, T = 0.289),
28
- rsid.na = NULL,
29
- rsid.rm = NULL,
30
- rsid.duplicate = NULL,
31
- rsid.missing = NULL
32
- )
33
- }