biopipen 0.31.4__py3-none-any.whl → 0.31.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

@@ -0,0 +1,322 @@
1
+ # make sure biopipen/utils/misc.R is loaded, log_warn is defined, and slugify is defined
2
+
3
+ library(rlang)
4
+ library(universalmotif)
5
+ library(MotifDb)
6
+
7
+ #' @title Common functions for regulatory analysis
8
+ #' @name regulatory-common
9
+ #' @author Panwen Wang
10
+
11
+ #' Read a regulator-motif mapping file
12
+ #'
13
+ #' @param rmfile Regulator-motif mapping file
14
+ #' @param motif_cols_allowed Allowed motif columns
15
+ #' @param reg_cols_allowed Allowed regulator columns
16
+ #' @return Data frame with regulators and motifs in the first and second columns, respectively
17
+ .read_regmotifs <- function(
18
+ rmfile,
19
+ motif_cols_allowed = c("Motif", "motif", "MOTIF", "Model", "model", "MODEL"),
20
+ reg_cols_allowed = c("Regulator", "regulator", "REGULATOR", "TF", "tf", "TF")
21
+ ) {
22
+ if (!file.exists(rmfile)) {
23
+ stop("Regulator-motif mapping file does not exist.")
24
+ }
25
+ regmotifs <- read.table(rmfile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
26
+ rm_motif_col <- intersect(motif_cols_allowed, colnames(regmotifs))
27
+ rm_reg_col <- intersect(reg_cols_allowed, colnames(regmotifs))
28
+ if (length(rm_motif_col) == 0) {
29
+ stop(paste0("No motif column found in the regulator-motif mapping file, provide one of: ", paste(motif_cols_allowed, collapse = ", ")))
30
+ }
31
+ if (length(rm_reg_col) == 0) {
32
+ stop(paste0("No regulator column found in the regulator-motif mapping file, provide one of: ", paste(reg_cols_allowed, collapse = ", ")))
33
+ }
34
+ if (length(rm_motif_col) > 1) {
35
+ stop(paste0("Multiple motif columns found (", paste(rm_motif_col, collapse = ", "), ") in the regulator-motif mapping file, provide only one"))
36
+ }
37
+ if (length(rm_reg_col) > 1) {
38
+ stop(paste0("Multiple regulator columns found (", paste(rm_reg_col, collapse = ", "), ") in the regulator-motif mapping file, provide only one"))
39
+ }
40
+ rm_motif_col <- rm_motif_col[1]
41
+ rm_reg_col <- rm_reg_col[1]
42
+ regmotifs <- regmotifs[, c(rm_motif_col, rm_reg_col), drop = FALSE]
43
+
44
+ return(regmotifs)
45
+ }
46
+
47
+ #' Handle not found items
48
+ #'
49
+ #' @param notfound_items Items that were not found
50
+ #' @param log_warn Function to log warnings
51
+ #' @param msg Message to display
52
+ #' @param notfound Action to take if items are not found
53
+ #' @param notfound_file File to save the full list of not found items
54
+ #' @param log_indent Indentation for log messages
55
+ .handle_notfound_items <- function (notfound_items, log_warn, msg, notfound, notfound_file, log_indent = "") {
56
+ if (length(notfound_items) > 0) {
57
+ first_notfound <- head(notfound_items, 3)
58
+ if (length(notfound_items) > 3) {
59
+ first_notfound <- c(first_notfound, "...")
60
+ writeLines(notfound_items, notfound_file)
61
+ msg1 <- paste0(log_indent, msg, ": ", paste(first_notfound, collapse = ", "))
62
+ msg2 <- paste0(log_indent, "Check the full list in ", notfound_file)
63
+ if (notfound == "error") {
64
+ stop(msg1, "\n", msg2)
65
+ } else if (notfound == "ignore") {
66
+ log_warn(msg1)
67
+ log_warn(msg2)
68
+ }
69
+ } else {
70
+ msg <- paste0(log_indent, msg, ": ", paste(first_notfound, collapse = ", "))
71
+ if (notfound == "error") {
72
+ stop(msg)
73
+ } else if (notfound == "ignore") {
74
+ log_warn(msg)
75
+ }
76
+ }
77
+ }
78
+ }
79
+
80
+ #' Read a MEME file to a MotifDb object
81
+ #' and filter the motifs based on the input data
82
+ #' and return the filtered MotifDb object
83
+ #' with metadata
84
+ #'
85
+ #' @param motifdb MEME file
86
+ #' @param indata Input data frame
87
+ #' @param motif_col Column name for the motif
88
+ #' @param regulator_col Column name for the regulator
89
+ #' @param notfound Action to take if motifs are not found
90
+ #' @param outdir Output directory, used to save un-matched motifs
91
+ #' @return MotifDb object
92
+ #' @export
93
+ read_meme_to_motifdb <- function(motifdb, indata, motif_col, regulator_col, notfound, outdir) {
94
+ meme <- read_meme(motifdb)
95
+ motifdb_names <- sapply(meme, function(m) m@name)
96
+ motifs <- check_motifs(indata[[motif_col]], motifdb_names, notfound, outdir)
97
+ meme <- filter_motifs(meme, name = motifs)
98
+ # Get the right order of motif names
99
+ motifs <- sapply(meme, function(m) m@name)
100
+ motifdb_matrices <- lapply(meme, function(m) m@motif)
101
+ names(motifdb_matrices) <- motifs
102
+ motifdb_meta <- do.call(rbind, lapply(meme, function(m) {
103
+ ats <- attributes(m)
104
+ ats$dataSource <- strsplit(basename(motifdb), "\\.")[[1]][1]
105
+ ats$class <- NULL
106
+ ats$motif <- NULL
107
+ ats$gapinfo <- NULL
108
+ ats$sequenceCount <- ats$nsites
109
+ ats$providerId <- ats$name
110
+ ats$providerName <- ats$name
111
+ ats$organism <- if (is.null(ats$organism) || length(ats$organism) == 0) "Unknown" else ats$organism
112
+ if (!is.null(regulator_col)) {
113
+ ats$geneSymbol <- indata[
114
+ indata[[motif_col]] == ats$name,
115
+ regulator_col,
116
+ drop = TRUE
117
+ ]
118
+ }
119
+ unlist(ats)
120
+ })
121
+ )
122
+ rownames(motifdb_meta) <- motifs
123
+ MotifDb:::MotifList(motifdb_matrices, tbl.metadata = motifdb_meta)
124
+ }
125
+
126
+ #' Convert a MotifDb object to a motif library
127
+ #' with motif names as keys
128
+ #' and PWMs as values
129
+ #' @param motifdb MotifDb object
130
+ #' @return Motif library
131
+ #' @export
132
+ motifdb_to_motiflib <- function(motifdb) {
133
+ lapply(motifdb, t)
134
+ }
135
+
136
+ #' Make sure the regulators and motifs in the input data from a regulator-motif mappings
137
+ #'
138
+ #' @param indata Input data frame
139
+ #' @param outdir Output directory, used to save un-matched regulators
140
+ #' @param motif_col Column name for the motif
141
+ #' @param regulator_col Column name for the regulator
142
+ #' @param regmotifs Regulator-motif mapping file
143
+ #' @param log_indent Indentation for log messages
144
+ #' @param notfound Action to take if regulators are not found in the mapping file
145
+ #' @return Data frame with regulators and motifs
146
+ #' @export
147
+ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error") {
148
+ if (is.null(motif_col)) {
149
+ if (is.null(regmotifs)) {
150
+ stop("Regulator-motif mapping file (envs.regmotifs) is required when no motif column (envs.motif_col) is provided")
151
+ }
152
+ regmotifs <- .read_regmotifs(regmotifs)
153
+ rm_motif_col <- colnames(regmotifs)[1]
154
+ rm_reg_col <- colnames(regmotifs)[2]
155
+ # check regulators
156
+ rm_regs <- regmotifs[[rm_reg_col]]
157
+ regulators <- indata[[regulator_col]]
158
+ notfound_regs <- setdiff(regulators, rm_regs)
159
+ .handle_notfound_items(
160
+ notfound_regs,
161
+ log_warn,
162
+ "The following regulators were not found in the regulator-motif mapping file",
163
+ notfound,
164
+ file.path(outdir, "notfound_regulators.txt"),
165
+ log_indent
166
+ )
167
+ indata <- indata[indata[[regulator_col]] %in% rm_regs, , drop = FALSE]
168
+ # add motif column
169
+ indata <- merge(indata, regmotifs, by.x = regulator_col, by.y = rm_reg_col, all.x = TRUE, suffixes = c("", "_db"))
170
+ # update motif column
171
+ motif_col <<- rm_motif_col
172
+ } else if (is.null(regulator_col)) {
173
+ if (is.null(regmotifs) || (is.character(regmotifs) && nchar(regmotifs) == 0)) {
174
+ # make motifs unique
175
+ indata <- indata[!duplicated(indata[[motif_col]]), , drop = FALSE]
176
+ } else if (!file.exists(regmotifs)) {
177
+ stop("Regulator-motif mapping file (envs.regmotifs) does not exist.")
178
+ } else {
179
+ # map the regulators
180
+ regmotifs <- .read_regmotifs(regmotifs)
181
+ rm_motif_col <- colnames(regmotifs)[1]
182
+ rm_reg_col <- colnames(regmotifs)[2]
183
+ rm_motifs <- regmotifs[[rm_motif_col]]
184
+ motifs <- indata[[motif_col]]
185
+ notfound_motifs <- setdiff(motifs, rm_motifs)
186
+ .handle_notfound_items(
187
+ notfound_motifs,
188
+ log_warn,
189
+ "The following motifs were not found in the regulator-motif mapping file",
190
+ notfound,
191
+ file.path(outdir, "notfound_motifs.txt"),
192
+ log_indent
193
+ )
194
+ indata <- indata[indata[[motif_col]] %in% rm_motifs, , drop = FALSE]
195
+ # add regulator column
196
+ indata <- merge(indata, regmotifs, by.x = motif_col, by.y = rm_motif_col, all.x = TRUE, suffixes = c("", "_db"))
197
+ # update regulator column
198
+ regulator_col <<- rm_reg_col
199
+ }
200
+ } else {
201
+ indata <- indata[!duplicated(indata[, c(regulator_col, motif_col), drop = FALSE]), , drop = FALSE]
202
+ }
203
+
204
+ return(indata)
205
+ }
206
+
207
+ #' Get the genome package name for a given genome
208
+ #'
209
+ #' @param genome Genome name
210
+ #' @return Genome package name
211
+ #' @export
212
+ get_genome_pkg <- function(genome) {
213
+ if (!grepl(".", genome, fixed = TRUE)) {
214
+ genome_pkg = sprintf("BSgenome.Hsapiens.UCSC.%s", genome)
215
+ } else {
216
+ genome_pkg = genome
217
+ }
218
+ if (!requireNamespace(genome_pkg, quietly = TRUE)) {
219
+ stop(sprintf("Genome package %s is not installed", genome_pkg))
220
+ }
221
+
222
+ library(package = genome_pkg, character.only = TRUE)
223
+ return(genome_pkg)
224
+ }
225
+
226
+ #' Check if motifs are in the motif database
227
+ #' and return the motifs that are found
228
+ #'
229
+ #' @param motifs Motifs to check
230
+ #' @param all_motifs All motifs in the motif database
231
+ #' @param notfound Action to take if motifs are not found
232
+ #' @param outdir Output directory, used to save un-matched motifs
233
+ #' @return Motifs that are found
234
+ #' @export
235
+ check_motifs <- function(motifs, all_motifs, notfound, outdir) {
236
+ notfound_motifs <- setdiff(motifs, all_motifs)
237
+ if (length(notfound_motifs) > 0) {
238
+ first_notfound <- head(notfound_motifs, 3)
239
+ if (length(notfound_motifs) > 3) {
240
+ first_notfound <- c(first_notfound, "...")
241
+ notfound_file <- file.path(outdir, "notfound_motifs.txt")
242
+ writeLines(notfound_motifs, notfound_file)
243
+ msg1 <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
244
+ msg2 <- paste0("Check the full list in ", notfound_file)
245
+
246
+ if (notfound == "error") {
247
+ stop(msg1, "\n", msg2)
248
+ } else if (notfound == "ignore") {
249
+ log_warn(msg1)
250
+ log_warn(msg2)
251
+ }
252
+ } else {
253
+ msg <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
254
+ if (notfound == "error") {
255
+ stop(msg)
256
+ } else if (notfound == "ignore") {
257
+ log_warn(msg)
258
+ }
259
+ }
260
+
261
+ motifs <- setdiff(motifs, notfound_motifs)
262
+ }
263
+ return(motifs)
264
+ }
265
+
266
+ #' Plot a genomic region surrounding a genomic variant, and
267
+ #' potentially disrupted motifs.
268
+ #'
269
+ #' @param results The motifbreakR results.
270
+ #' A GRanges object with the following columns:
271
+ #' - seqnames: Chromosome
272
+ #' - ranges: Start and end positions
273
+ #' - strand: Strand
274
+ #' -------------------
275
+ #' - SNP_id: Variant ID
276
+ #' - REF: Reference allele
277
+ #' - ALT: Alternative allele
278
+ #' - varType: Variant type. By default, "SNV"
279
+ #' - motifPos: Motif positions
280
+ #' - geneSymbol: Gene symbol, if not provided, try to get from the Regulator column
281
+ #' - dataSource: Motif database source
282
+ #' - providerName: Motif name
283
+ #' - providerId: Motif ID
284
+ #' - effect: Effect of the variant. By default, "strong"
285
+ #' - altPos: Alternative allele position. By default, 1
286
+ #' - alleleDiff: Allele difference, default 0, does not affect the plot for SNVs
287
+ #'
288
+ #' Attributes:
289
+ #' - genome.package: Genome package name
290
+ #' - motifs: Motif database, in MotifDb::MotifList format
291
+ #' @param variant Variant ID to be plotted
292
+ #' @param devpars List of device parameters
293
+ #' - res: Resolution, default 100
294
+ #' - width: Width of the plot, default NULL, calculated based on sequence length
295
+ #' - height: Height of the plot, default NULL, calculated based on the number of motifs
296
+ #' @param outdir Output directory. Plots will be saved in the sub-directory "<outdir>/plots/"
297
+ #' @export
298
+ plot_variant_motifs <- function(results, variant, devpars, outdir) {
299
+ plotdir <- file.path(outdir, "plots")
300
+ dir.create(plotdir, showWarnings = FALSE)
301
+
302
+ res <- results[results$SNP_id == variant, , drop = FALSE]
303
+ devpars <- devpars %||% list(res = 100, width = NULL, height = NULL)
304
+ if (length(res) == 0) {
305
+ stop(sprintf("Variant %s not found in results", variant))
306
+ }
307
+ devpars$res <- devpars$res %||% 100
308
+ devpars$height <- devpars$height %||% 2.4 * devpars$res + length(res) * 1.2 * devpars$res
309
+ if (is.null(devpars$width)) {
310
+ left <- min(sapply(res$motifPos, `[`, 1))
311
+ right <- max(sapply(res$motifPos, `[`, 2))
312
+ devpars$width <- 1.5 * devpars$res + (right - left) * 0.3 * devpars$res
313
+ devpars$width <- max(devpars$width, 5 * devpars$res)
314
+ }
315
+
316
+ plotfile <- file.path(plotdir, sprintf("%s.png", slugify(variant)))
317
+ # fix motifBreakR 2.12 using names to filter in plotMB
318
+ names(res) <- res$SNP_id
319
+ png(plotfile, width = devpars$width, height = devpars$height, res = devpars$res)
320
+ motifbreakR::plotMB(res, variant)
321
+ dev.off()
322
+ }
@@ -1,13 +1,15 @@
1
+ # shellcheck disable=SC1083
1
2
  compvcf={{in.compvcf | quote}}
2
3
  basevcf={{in.basevcf | quote}}
3
4
  outdir={{out.outdir | quote}}
4
5
  truvari={{envs.truvari | quote}}
5
6
  ref={{envs.ref | quote}}
6
7
  refdist={{envs.refdist | quote}}
7
- pctsim={{envs.pctsim | quote}}
8
+ pctseq={{envs.pctseq | quote}}
8
9
  pctsize={{envs.pctsize | quote}}
9
10
  pctovl={{envs.pctovl | quote}}
10
11
  sizemax={{envs.sizemax | default: 50000 | quote}}
12
+ # shellcheck disable=SC1054
11
13
  {% if envs.typeignore %}
12
14
  typeignore="--typeignore"
13
15
  {% else %}
@@ -15,20 +17,25 @@ typeignore=""
15
17
  {% endif %}
16
18
  {% if envs.multimatch %}
17
19
  multimatch="--multimatch"
20
+ # shellcheck disable=SC1009
18
21
  {% else %}
19
22
  multimatch=""
23
+ # shellcheck disable=SC1073
20
24
  {% endif %}
21
25
 
22
26
  rm -rf $outdir
23
- $truvari bench \
24
- -c "$compvcf" \
25
- -b "$basevcf" \
26
- -f "$ref" \
27
+ cmd="$truvari bench \
28
+ -c '$compvcf' \
29
+ -b '$basevcf' \
30
+ -f '$ref' \
27
31
  --refdist $refdist \
28
- --pctsim $pctsim \
32
+ --pctseq $pctseq \
29
33
  --pctsize $pctsize \
30
34
  --pctovl $pctovl \
31
35
  --sizemax $sizemax \
32
36
  $typeignore \
33
37
  $multimatch \
34
- -o $outdir
38
+ -o $outdir"
39
+
40
+ echo "$cmd"
41
+ eval "$cmd"
@@ -17,7 +17,7 @@ read_summary = function() {
17
17
 
18
18
  summaries = NULL
19
19
  for (indir in indirs) {
20
- summary = fromJSON(file=file.path(indir, "summary.txt"))
20
+ summary = fromJSON(file=file.path(indir, "summary.json"))
21
21
  summary$gt_matrix = NULL
22
22
  summary$Sample = sub(".truvari_bench", "", basename(indir), fixed=T)
23
23
  summaries = bind_rows(summaries, summary)
@@ -43,7 +43,6 @@ plot_summary = function(col) {
43
43
  summaries,
44
44
  "col",
45
45
  list(mapping = aes_string(x = "Sample", y = bQuote(col), fill = "Sample")),
46
-
47
46
  devpars = get_devpars(),
48
47
  outfile = outfile
49
48
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biopipen
3
- Version: 0.31.4
3
+ Version: 0.31.6
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
@@ -1,4 +1,4 @@
1
- biopipen/__init__.py,sha256=ZRJesu3_rpj9DcGHrsN5Zl7GDxuBTLAkcDxRMvMJE5c,23
1
+ biopipen/__init__.py,sha256=KU7MsdICtcB5jVm5DAaNainBCUqYItaZLSuj12ONgkE,23
2
2
  biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
4
4
  biopipen/core/config.toml,sha256=7IXvviRicZ2D1h6x3BVgbLJ96nsh-ikvZ0sVlQepqFE,1944
@@ -7,7 +7,7 @@ biopipen/core/filters.py,sha256=5bZsbpdW7DCxqiteRdb2gelmXvfqWPmPsFxrpHdWsoE,1298
7
7
  biopipen/core/proc.py,sha256=60lUP3PcUAaKbDETo9N5PEIoeOYrLgcSmuytmrhcx8g,912
8
8
  biopipen/core/testing.py,sha256=lZ_R5ZbYPO2NPuLHdbzg6HbD_f4j8paVVbyeUqwg6FE,3411
9
9
  biopipen/ns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- biopipen/ns/bam.py,sha256=jLhIztPiKsGfMpDgbcEpsybFskKkquIx9OKyTOc_L7Q,9172
10
+ biopipen/ns/bam.py,sha256=-xVagotsURyOtwKzv72L-2f9P7467OVzgvP96syfHZc,10628
11
11
  biopipen/ns/bed.py,sha256=HsTCJge7XNfCZyCBJ4iifNKQ5we4VZSpRx8XL8--y5A,6689
12
12
  biopipen/ns/cellranger.py,sha256=yPBoNzVSY74J7uyVucaob5lqZKKru5-hYSM4f4Nr2OY,5553
13
13
  biopipen/ns/cellranger_pipeline.py,sha256=EWkPJTujamNSMQoRnKfhUiIj6TkMfRmCSUbPfd8Tv8E,4011
@@ -19,7 +19,8 @@ biopipen/ns/gene.py,sha256=rty-Bjcf87v2vyb9X4kRvfrQ6XQYXgN4f2ftFO0nWA8,3888
19
19
  biopipen/ns/gsea.py,sha256=EsNRAPYsagaV2KYgr4Jv0KCnZGqayM209v4yOGGTIOI,7423
20
20
  biopipen/ns/misc.py,sha256=qXcm0RdR6W-xpYGgQn3v7JBeYRWwVm5gtgSj2tdVxx4,2935
21
21
  biopipen/ns/plot.py,sha256=XzLq0A8qCIQRbxhPEdWhEfbRZ8g3e4KriVz0RP8enNY,18078
22
- biopipen/ns/regulatory.py,sha256=qvc9QrwgwCI_lg0DQ2QOZbAhC8BAD1qnQXSGtAGlVcY,11750
22
+ biopipen/ns/protein.py,sha256=33pzM-gvBTw0jH60mvfqnriM6uw2zj3katZ82nC9owI,3309
23
+ biopipen/ns/regulatory.py,sha256=gJjGVpJrdv-rg2t5UjK4AGuvtLNymaNYNvoD8PhlbvE,15929
23
24
  biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
24
25
  biopipen/ns/scrna.py,sha256=fXP_h7gchcuk_Jwos0IgY_P8ON6Q995OgKHgdrxfvAY,112868
25
26
  biopipen/ns/scrna_metabolic_landscape.py,sha256=6AhaynGG3lNRi96N2tReVT46BJMuEwooSSd2irBoN80,28347
@@ -27,7 +28,7 @@ biopipen/ns/snp.py,sha256=-Jx5Hsv_7KV7TqLU0nHCaPkMEN0CFdi4tNVlyq0rUZ4,27259
27
28
  biopipen/ns/stats.py,sha256=DlPyK5Vsg6ZEkV9SDS3aAw21eXzvOHgqeZDkXPhg7go,20509
28
29
  biopipen/ns/tcgamaf.py,sha256=AFbUJIxiMSvsVY3RcHgjRFuMnNh2DG3Mr5slLNEyz6o,1455
29
30
  biopipen/ns/tcr.py,sha256=0PCF8iPZ629z6P3RHoAWEpMWmuDslomTGcMopjqvXmE,88304
30
- biopipen/ns/vcf.py,sha256=0aKH_YSLy_-JzV-_VZb0EoScv7JKGrDU7BaeWHjDuRo,22699
31
+ biopipen/ns/vcf.py,sha256=OYWuAWADba1xLwvHmIPwXYin_rUaAFQq7N38DQvoYzs,22746
31
32
  biopipen/ns/web.py,sha256=4itJzaju8VBARIyZjDeh5rsVKpafFq_whixnvL8sXes,5368
32
33
  biopipen/reports/bam/CNAClinic.svelte,sha256=D4IxQcgDCPQZMbXog-aZP5iJEQTK2N4i0C60e_iXyfs,213
33
34
  biopipen/reports/bam/CNVpytor.svelte,sha256=s03SlhbEPd8-_44Dy_cqE8FSErhUdqStLK39te5o7ZE,1364
@@ -44,6 +45,7 @@ biopipen/reports/cnvkit/CNVkitScatter.svelte,sha256=8QLihBVH8RaHtjOUTU7xh4CCmFyx
44
45
  biopipen/reports/delim/SampleInfo.svelte,sha256=Vpjrdd3AXNDNhx2pzDaFA61xPMYAFKvvIoVCqUswiyo,365
45
46
  biopipen/reports/gsea/FGSEA.svelte,sha256=3gNbZovmRWK6QjtxfgZgmft6LUurVZfQyHBRqyGMosk,405
46
47
  biopipen/reports/gsea/GSEA.svelte,sha256=lYHf8h8RLx3i-jNCEGu_LM-dbYm9ZJDzyAEadsZXzmQ,417
48
+ biopipen/reports/protein/ProdigySummary.svelte,sha256=WEYPwmcmgtSqpFpTvNPV40yZR-7ERa5LgZni2RXxUZQ,348
47
49
  biopipen/reports/scrna/CellsDistribution.svelte,sha256=Mg6P0tazpzOxsOAtc-0LGEim5KprPt5KIgoW3TzrxxA,413
48
50
  biopipen/reports/scrna/DimPlots.svelte,sha256=ZLbtN0ioevRyEky4jb_DkDGAcpy_jAhaHfFym5ELEPM,479
49
51
  biopipen/reports/scrna/MarkersFinder.svelte,sha256=77rD1psj0VJykPDhfwS-B8mubvaasREAE6RYR2atTN4,444
@@ -74,7 +76,8 @@ biopipen/reports/utils/gsea.liq,sha256=5uxNPyIr0xd7nrZePJlIsRyqCPx1uVWso7ehE1F-C
74
76
  biopipen/reports/utils/misc.liq,sha256=HLK3mkWtIMQqBtM2IIRFUdKKTcY8cvBtyHJB9DbWBdw,1653
75
77
  biopipen/reports/vcf/TruvariBenchSummary.svelte,sha256=yew8HRN-YahBzX504Micah5BTnoL_PQzQfN_87TiuOA,577
76
78
  biopipen/reports/vcf/TruvariConsistency.svelte,sha256=BBvtxi1EPmGH7j5M5zMOcLEhKWZOlKoXp1lrQGAmz_0,647
77
- biopipen/scripts/bam/BamMerge.py,sha256=jyaJI0-TulAxaCzvrYjC8PujH_ECNInevnPbh2_XQtU,3477
79
+ biopipen/scripts/bam/BamMerge.py,sha256=Gd5P8V-CSsTAA8ZrUxetR-I49GjJ3VJNjrqu7-EZwXQ,3642
80
+ biopipen/scripts/bam/BamSampling.py,sha256=Pi6CXAbBFVRGh8-0WrkB-3v3oxinfahQk11H0IdBNmQ,2312
78
81
  biopipen/scripts/bam/BamSplitChroms.py,sha256=b7GS2I4X0dLOhlPg_r9-buoIHTWlq6zHI3Rox94LXR8,4893
79
82
  biopipen/scripts/bam/CNAClinic.R,sha256=mQXwtShL54HZXGCPqgPKPrU74_6K_8PqtOtG0mgA-F0,5062
80
83
  biopipen/scripts/bam/CNVpytor.py,sha256=hOUli9BDMOoth0or-tjUYC1AP3yNOuxUS6G3Rhcg99s,18000
@@ -121,12 +124,14 @@ biopipen/scripts/plot/QQPlot.R,sha256=Xil19FJ7jbsxo1yU7dBhhZo2_95Gz6gpTyuv9F0RDN
121
124
  biopipen/scripts/plot/ROC.R,sha256=cjmmYRQycYisqRmlkZE9nbmwfo-04wdJKVmlOsiEFAM,2451
122
125
  biopipen/scripts/plot/Scatter.R,sha256=fg4H5rgdr6IePTMAIysiElnZme0vCh1T0wrwH2Q9xkM,2840
123
126
  biopipen/scripts/plot/VennDiagram.R,sha256=Am9umSGr2QxZc2MIMGMBhpoEqta3qt_ItF-9_Y53SXE,704
124
- biopipen/scripts/regulatory/MotifAffinityTest.R,sha256=1sR3sWRZbxDeFO290LcpzZglzOLc13SSvibDON96PCg,8852
125
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R,sha256=SAyTm2-6g5qVJFRrLxEY0QJrLWTkwDi_J_9J7HhtTN8,4438
126
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R,sha256=wCK4tLx1iWh_OwW7ZvLTCjTGWCIfVqw-lYC0-JqIPqg,3338
127
+ biopipen/scripts/protein/Prodigy.py,sha256=W56e51SkaWqthrkCSr2HUqhE9NfJQWZj4y0HXIqaYRA,4459
128
+ biopipen/scripts/protein/ProdigySummary.R,sha256=1s3ofk6Kvs--GAAvzV8SdAyq5LrYozWtIlL32b6ZarE,3806
129
+ biopipen/scripts/regulatory/MotifAffinityTest.R,sha256=McAnbduE_6SMD_4RuftBemPdfJD9LeFYUYwqL3fzfjU,3047
130
+ biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R,sha256=lzP3EtmpMucWviLDgXLeP_JvG4VADykBOl49CkftiR8,4366
131
+ biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R,sha256=WpkPVNvWonmZqQ8khdDg2VHhda7ZHexvLFGRz4qgv88,3304
127
132
  biopipen/scripts/regulatory/MotifScan.py,sha256=WtSbs8z08oeTgzjr0LfIDmjbUdknAh1raa_QPQ_NCFg,5336
128
- biopipen/scripts/regulatory/atSNP.R,sha256=TXJARbE0rDIzSq6Spacz_HsM_DXdREJ95ZsBg26trgw,1288
129
- biopipen/scripts/regulatory/motifBreakR.R,sha256=trzvvTvwc5gSO427wkqx93FecuQxLGa9kNqodKa8S0U,70236
133
+ biopipen/scripts/regulatory/VariantMotifPlot.R,sha256=RNnBc0bboGfrJOPk5CsUbFRBMBvVX8zgGsrI5eybNyo,2874
134
+ biopipen/scripts/regulatory/motifs-common.R,sha256=7deFrEzZKYzNhmYTsBqqb91CbIj2vtF7lRiPX0yGkO8,13277
130
135
  biopipen/scripts/rnaseq/Simulation-ESCO.R,sha256=68cEHDdJclX8P8Q7ey9yBOfK09M_kxlL6zgYXsEL2Rs,6378
131
136
  biopipen/scripts/rnaseq/Simulation-RUVcorr.R,sha256=6C6Ke5RLF0fC2V9WQPoFEdqoDabCnhslZBIyB6zhIxc,1155
132
137
  biopipen/scripts/rnaseq/Simulation.R,sha256=PK9tZS88AcBPStcFalZlMU0KE0gSqFSQvhUoQ-8eg90,871
@@ -245,8 +250,8 @@ biopipen/scripts/vcf/BcftoolsAnnotate.py,sha256=iS-T6IhumqePW5kdyi_Tb6rubyIiCMjS
245
250
  biopipen/scripts/vcf/BcftoolsFilter.py,sha256=AdQMXFTNLCS5eqYWMNIMbkK8qXJ5j8GYm7HdPopVk0c,2573
246
251
  biopipen/scripts/vcf/BcftoolsSort.py,sha256=tU0pTrEIB-7x6iOSfU-KpYY1rEidi6Q4179NntY3cGc,3778
247
252
  biopipen/scripts/vcf/BcftoolsView.py,sha256=Sj3KkYPpwQFo5kmZC5MRxItrSE5KVZi0jNYrRFck3Ow,2465
248
- biopipen/scripts/vcf/TruvariBench.sh,sha256=80yLQ73OzSgsJ4ltzgpcWxYvvX1hFnCG8YSBhhhRQ9Y,765
249
- biopipen/scripts/vcf/TruvariBenchSummary.R,sha256=Q1VCYqOZ-VI8lgXW6Yqmw6NkLH168e7V6eYFbr0W4EE,1516
253
+ biopipen/scripts/vcf/TruvariBench.sh,sha256=5M7lZhO4laNJvCVCHudf8DYArKNXoiPWuSkXgRi2t_A,908
254
+ biopipen/scripts/vcf/TruvariBenchSummary.R,sha256=rdNNIPoiwqnK6oEOlQUUhnL1MF958W_nDjRCkA5ubz4,1516
250
255
  biopipen/scripts/vcf/TruvariConsistency.R,sha256=6h20v8qztbl1KZInJwoSK_t5XwqhKMTMzPWNPhoAjlM,2314
251
256
  biopipen/scripts/vcf/Vcf2Bed.py,sha256=LzyJ9qW1s5mbfF8maLc77_0rE98KMc2lq1R94_NFbSU,855
252
257
  biopipen/scripts/vcf/VcfAnno.py,sha256=FW626rAs_WSU7AHQMKjfkYoByUGh_gVyJM97neGfOMo,802
@@ -279,7 +284,7 @@ biopipen/utils/reference.py,sha256=oi5evicLwHxF0KAIPNZohBeHJLJQNWFJH0cr2y5pgcg,5
279
284
  biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
280
285
  biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
281
286
  biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
282
- biopipen-0.31.4.dist-info/METADATA,sha256=-ZA3ibCLubauXddI46VdKgbKvzBKiLR8sk0HWQArEZo,882
283
- biopipen-0.31.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
284
- biopipen-0.31.4.dist-info/entry_points.txt,sha256=69SbeMaF47Z2DS40yo-qDyoBKmMmumrNnsjEZMOioCE,625
285
- biopipen-0.31.4.dist-info/RECORD,,
287
+ biopipen-0.31.6.dist-info/METADATA,sha256=2NGpF5QMNq7lG0y8MQIGpfFYyRE9lYz17RpF6dEtq0k,882
288
+ biopipen-0.31.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
289
+ biopipen-0.31.6.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
290
+ biopipen-0.31.6.dist-info/RECORD,,
@@ -11,6 +11,7 @@ gene=biopipen.ns.gene
11
11
  gsea=biopipen.ns.gsea
12
12
  misc=biopipen.ns.misc
13
13
  plot=biopipen.ns.plot
14
+ protein=biopipen.ns.protein
14
15
  regulatory=biopipen.ns.regulatory
15
16
  rnaseq=biopipen.ns.rnaseq
16
17
  scrna=biopipen.ns.scrna
@@ -1,33 +0,0 @@
1
- snpinfo2atsnp <- function(snpinfo) {
2
- # c("chrom", "start", "end", "name", "score", "strand", "ref", "alt", "ref_seq", "alt_seq")
3
- if (any(nchar(snpinfo$ref) != 1) || any(nchar(snpinfo$alt) != 1)) {
4
- stop("Only SNVs are supported by atSNP. Consider using motifbreakR instead if you have indels.")
5
- }
6
- base_encodings <- c(A = 1, C = 2, G = 3, T = 4)
7
- transition <- matrix(
8
- c(
9
- 0.3225035, 0.1738422, 0.24915044, 0.2545039,
10
- 0.3451410, 0.2642147, 0.05245011, 0.3381942,
11
- 0.2813089, 0.2136604, 0.26749171, 0.2375390,
12
- 0.2149776, 0.2071733, 0.25309238, 0.3247568
13
- ),
14
- nrow = 4,
15
- byrow = TRUE
16
- )
17
- rownames(transition) <- colnames(transition) <- names(base_encodings)
18
- list(
19
- sequence_matrix = unname(sapply(
20
- snpinfo$ref_seq,
21
- function(s) as.integer(base_encodings[strsplit(s, "")[[1]]])
22
- )),
23
- ref_base = as.integer(base_encodings[snpinfo$ref]),
24
- snp_base = as.integer(base_encodings[snpinfo$alt]),
25
- snpids = snpinfo$name,
26
- transition = transition,
27
- prior = c(A = 0.287, C = 0.211, G = 0.213, T = 0.289),
28
- rsid.na = NULL,
29
- rsid.rm = NULL,
30
- rsid.duplicate = NULL,
31
- rsid.missing = NULL
32
- )
33
- }