PyPI - biopipen - Versions diffs - 0.28.1__py3-none-any.whl → 0.29.1__py3-none-any.whl - Mend

biopipen 0.28.1py3-none-any.whl → 0.29.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (85) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +8 -0
biopipen/ns/bam.py +0 -2
biopipen/ns/bed.py +35 -0
biopipen/ns/cellranger_pipeline.py +5 -5
biopipen/ns/cnv.py +18 -2
biopipen/ns/cnvkit_pipeline.py +16 -11
biopipen/ns/gene.py +68 -23
biopipen/ns/misc.py +2 -15
biopipen/ns/plot.py +204 -0
biopipen/ns/regulatory.py +214 -0
biopipen/ns/scrna.py +31 -5
biopipen/ns/snp.py +516 -8
biopipen/ns/stats.py +167 -3
biopipen/ns/vcf.py +196 -0
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/scripts/bam/CNVpytor.py +144 -46
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMerge.py +1 -1
biopipen/scripts/cnv/AneuploidyScore.R +30 -7
biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
biopipen/scripts/cnv/TMADScore.R +21 -5
biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
biopipen/scripts/delim/SampleInfo.R +10 -5
biopipen/scripts/gene/GeneNameConversion.R +65 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/plot/Manhattan.R +146 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +226 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +126 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +96 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/atSNP.R +33 -0
biopipen/scripts/regulatory/motifBreakR.R +1594 -0
biopipen/scripts/scrna/MarkersFinder.R +69 -67
biopipen/scripts/scrna/SeuratClustering.R +71 -29
biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
biopipen/scripts/scrna/SeuratPreparing.R +252 -122
biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
biopipen/scripts/snp/MatrixEQTL.R +85 -44
biopipen/scripts/snp/Plink2GTMat.py +133 -0
biopipen/scripts/snp/PlinkCallRate.R +190 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +298 -0
biopipen/scripts/snp/PlinkFromVcf.py +78 -0
biopipen/scripts/snp/PlinkHWE.R +80 -0
biopipen/scripts/snp/PlinkHet.R +92 -0
biopipen/scripts/snp/PlinkIBD.R +200 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/Mediation.R +94 -0
biopipen/scripts/stats/MetaPvalue.R +2 -1
biopipen/scripts/stats/MetaPvalue1.R +70 -0
biopipen/scripts/tcr/TCRClusterStats.R +12 -7
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/VcfFix_utils.py +1 -1
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/utils/gene.R +83 -37
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.R +56 -0
biopipen/utils/misc.py +5 -2
biopipen/utils/reference.py +54 -10
{biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/METADATA +2 -2
{biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/RECORD +80 -51
{biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/entry_points.txt +1 -1
biopipen/ns/bcftools.py +0 -111
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
{biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/WHEEL +0 -0

biopipen/scripts/gene/GenePromoters.R ADDED Viewed

@@ -0,0 +1,61 @@
+library(rlang)
+library(rtracklayer)
+infile <- {{in.infile | r}}
+outfile <- {{out.outfile | r}}
+up <- {{envs.up | r}}
+down <- {{envs.down | r}}
+notfound <- {{envs.notfound | r}}
+refgene <- {{envs.refgene | r}}
+header <- {{envs.header | r}}
+genecol <- {{envs.genecol | r}}
+match_id <- {{envs.match_id | r}}
+sort_ <- {{envs.sort | r}}
+chrsize <- {{envs.chrsize | r}}
+down <- down %||% up
+refgenes <- readGFF(refgene)
+refcol <- ifelse(match_id, "gene_id", "gene_name")
+if (infile == "/dev/null") {
+    genes <- unique(refgenes[[refcol]])
+} else {
+    data <- read.table(infile, header=header, sep="\t", stringsAsFactors=FALSE, check.names=FALSE)
+    genes <- data[[genecol]]
+    rm(data)
+}
+notfound_genes <- setdiff(genes, refgenes[[refcol]])
+if (notfound == "error" && length(notfound_genes) > 0) {
+    stop(paste(
+        "The following genes were not found in the reference annotation:",
+        paste(notfound_genes, collapse=", ")
+    ))
+} else if (notfound == 'skip') {
+    genes <- genes[!genes %in% notfound_genes]
+}
+# Select the genes that are in the reference annotation and keep the order
+# of the records in genes
+refgenes <- refgenes[match(genes, refgenes[[refcol]]), , drop = FALSE]
+refgenes <- unique(makeGRangesFromDataFrame(refgenes, keep.extra.columns=TRUE))
+proms <- promoters(refgenes, up=up, down=down)
+# Scores must be non-NA numeric values
+elementMetadata(proms)$name <- elementMetadata(proms)[[refcol]]
+score(proms) <- 0
+start(proms) <- pmax(1, start(proms))
+if (sort_) {
+    chrom_sizes <- read.table(chrsize, header=FALSE, stringsAsFactors=FALSE, sep="\t")
+    common_chroms <- intersect(chrom_sizes$V1, seqlevels(proms))
+    if (length(common_chroms) == 0) {
+        stop("No common chromosomes found between the promoters and the chromosome sizes. Do you use the correct chromosome sizes file?")
+    }
+    proms <- keepSeqlevels(proms, common_chroms, pruning.mode="coarse")
+    seqlevels(proms) <- common_chroms
+    proms <- sort(proms, ignore.strand = TRUE)
+}
+export.bed(proms, outfile)

biopipen/scripts/misc/Shell.sh ADDED Viewed

@@ -0,0 +1,15 @@
+# shellcheck disable=all
+export infile={{in.infile | quote}}
+export outfile={{out.outfile | quote}}
+is_outdir={{envs.outdir | int}}
+cmd_given={{envs.cmd | bool | int}}
+{% set _ = out.outfile | dirname | joinpath: "cmd.sh" | as_path | attr: 'write_text' | call: envs.cmd %}
+cmd="{{proc.lang}} {{out.outfile | dirname | joinpath: 'cmd.sh'}}"
+if [[ "$cmd_given" -eq 0 ]]; then
+    echo "No command given." 1>&2
+    exit 1
+fi
+if [[ $is_outdir -eq 1 ]]; then
+    mkdir -p "$outfile"
+fi
+eval "$cmd"

biopipen/scripts/plot/Manhattan.R ADDED Viewed

@@ -0,0 +1,146 @@
+source("{{biopipen_dir}}/utils/misc.R")
+library(rlang)
+library(ggmanh)
+infile <- {{in.infile | r}}
+outfile <- {{out.outfile | r}}
+chrom_col <- {{envs.chrom_col | r}}
+pos_col <- {{envs.pos_col | r}}
+pval_col <- {{envs.pval_col | r}}
+label_col <- {{envs.label_col | r}}
+devpars <- {{envs.devpars | r}}
+title <- {{envs.title | r}}
+ylabel <- {{envs.ylabel | r}}
+rescale <- {{envs.rescale | r}}
+rescale_ratio_threshold <- {{envs.rescale_ratio_threshold | r}}
+signif <- {{envs.signif | r}}
+hicolors <- {{envs.hicolors | r}}
+thin_n <- {{envs.thin_n | r}}
+thin_bins <- {{envs.thin_bins | r}}
+zoom <- {{envs.zoom | r}}
+zoom_devpars <- {{envs.zoom_devpars | r}}
+chroms <- {{envs.chroms | r}}
+args <- {{envs.args | r: todot="-"}}
+data <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
+# normalize columns
+cnames <- colnames(data)
+if (is.numeric(chrom_col)) { chrom_col <- cnames[chrom_col] }
+if (is.numeric(pos_col)) { pos_col <- cnames[pos_col] }
+if (is.numeric(pval_col)) { pval_col <- cnames[pval_col] }
+if (is.numeric(label_col)) { label_col <- cnames[label_col] }
+# normalize chroms
+norm_chroms <- function(chrs) {
+    chrs <- as.character(chrs)
+    if (length(chrs) == 1 && grepl(",", chrs)) {
+        chrs <- trimws(unlist(strsplit(chrs, ",")))
+    }
+    if (length(chrs) > 1) {
+        return(unique(unlist(sapply(chrs, function(chr) norm_chroms(chr)))))
+    }
+    if (!grepl("-", chrs)) { return(chrs) }
+    # expand chr1-22 -> chr1, chr2, ..., chr22
+    # chr1-22 -> 'chr1', '22'
+    chrs <- unlist(strsplit(chrs, "-"))
+    if (length(chrs) != 2) {
+        stop(paste0("Invalid chroms: ", chrs))
+    }
+    # detect prefix
+    prefix1 <- gsub("[0-9]", "", chrs[1])
+    prefix2 <- gsub("[0-9]", "", chrs[2])
+    if (nchar(prefix2) > 0 && prefix1 != prefix2) {
+        stop(paste0("Invalid chroms: ", chrs, " (prefix mismatch)"))
+    }
+    chr_a <- as.integer(substring(chrs[1], nchar(prefix1) + 1))
+    chr_b <- as.integer(substring(chrs[2], nchar(prefix2) + 1))
+    chr_min <- min(chr_a, chr_b)
+    chr_max <- max(chr_a, chr_b)
+    return(paste0(prefix1, chr_min:chr_max))
+}
+log_info("Preparing data for plotting ...")
+if (length(chroms) == 1 && chroms == "auto") {
+    chroms <- unique(data[[chrom_col]])
+} else {
+    chroms <- norm_chroms(chroms)
+}
+# prepare data
+mp_prep_args = list()
+if (length(signif) == 1 && is.character(signif)) {
+    signif <- as.numeric(trimws(unlist(strsplit(signif, ","))))
+}
+siglevel <- min(signif)
+if (!is.null(label_col)) {
+    data$.label <- ifelse(data[[pval_col]] < siglevel, data[[label_col]], "")
+}
+if (!is.null(hicolors)) {
+    sig_str <- "Significant"
+    nsig_str <- "Not significant"
+    data$.highlight <- ifelse(data[[pval_col]] < siglevel, sig_str, nsig_str)
+    if (length(hicolors) == 1) { hicolors <- c(hicolors, "grey") }
+    names(hicolors) <- c(sig_str, nsig_str)
+    mp_prep_args$highlight.colname <- ".highlight"
+    mp_prep_args$highlight.col <- hicolors
+}
+mp_prep_args$x <- data
+mp_prep_args$chr.colname <- chrom_col
+mp_prep_args$pos.colname <- pos_col
+mp_prep_args$pval.colname <- pval_col
+mp_prep_args$chr.order <- chroms
+if (!is.null(thin_n) && thin_n > 0) {
+    mp_prep_args$thin.n <- thin_n
+    mp_prep_args$thin.bins <- thin_bins
+}
+mpdata <- do_call(manhattan_data_preprocess, mp_prep_args)
+# plot
+log_info("Plotting Manhattan plot ...")
+args$x <- mpdata
+args$signif <- signif
+args$plot.title <- title
+args$rescale <- rescale
+args$rescale.ratio.threshold <- rescale_ratio_threshold
+args$y.label <- ylabel
+if (!is.null(hicolors)) { args$color.by.highlight <- TRUE }
+if (!is.null(label_col)) { args$label.colname <- ".label" }
+g <- do_call(manhattan_plot, args)
+png(outfile, width=devpars$width, height=devpars$height, res=devpars$res)
+print(g)
+dev.off()
+# zoom into chromosomes
+all_chroms <- as.character(unique(mpdata$data[[mpdata$chr.colname]]))
+if (!is.null(zoom)) {
+    log_info("Zooming into chromosomes ...")
+    zoom <- norm_chroms(zoom)
+    for (z in zoom) {
+        if (!z %in% all_chroms) {
+            log_warn("- {z}: not found in data")
+            next
+        }
+        log_info("- {z}")
+        args_z <- args
+        args_z$chromosome <- z
+        args_z$plot.title <- paste0(title, " (", z, ")")
+        args_z$x.label <- "Position"
+        g_z <- do_call(manhattan_plot, args_z)
+        outfile_z <- gsub("\\.png$", paste0("-", z, ".png"), outfile)
+        zm_devpars <- zoom_devpars
+        zm_devpars$res <- zm_devpars$res %||% devpars$res
+        zm_devpars$height <- zm_devpars$height %||% devpars$height
+        png(
+            outfile_z,
+            width=zm_devpars$width,
+            height=zm_devpars$height,
+            res=zm_devpars$res
+        )
+        print(g_z)
+        dev.off()
+    }
+}

biopipen/scripts/plot/QQPlot.R ADDED Viewed

@@ -0,0 +1,146 @@
+source("{{biopipen_dir}}/utils/misc.R")
+library(rlang)
+library(stats)
+library(ggplot2)
+library(ggprism)
+library(qqplotr)
+theme_set(theme_prism())
+infile <- {{in.infile | r}}
+theorfile <- {{in.theorfile | r}}
+outfile <- {{out.outfile | r}}
+val_col <- {{envs.val_col | r}}
+theor_col <- {{envs.theor_col | r}}
+theor_trans <- {{envs.theor_trans | r}}
+theor_funs <- {{envs.theor_funs | r}}
+devpars <- {{envs.devpars | r}}
+title <- {{envs.title | r}}
+xlabel <- {{envs.xlabel | r}}
+ylabel <- {{envs.ylabel | r}}
+kind <- {{envs.kind | r}}
+trans <- {{envs.trans | r}}
+args <- {{envs.args | r}}
+band_args <- {{envs.band | r}}
+line_args <- {{envs.line | r}}
+point_args <- {{envs.point | r}}
+ggs <- {{envs.ggs | r}}
+.eval_fun <- function(fun) {
+    if (is.character(fun)) {
+        fun <- trimws(fun)
+        if (grepl("^-\\s*[a-zA-Z\\.][0-9a-zA-Z\\._]*$", fun)) {
+            fun <- trimws(substring(fun, 2))
+            fun <- eval(parse(text = fun))
+            return(function(x) -fun(x))
+        } else {
+            return(eval(parse(text = fun)))
+        }
+    } else {
+        return(fun)
+    }
+}
+indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
+if (is.numeric(val_col)) {
+    val_col <- colnames(indata)[val_col]
+}
+if (!is.null(trans)) {
+    trans <- .eval_fun(trans)
+    indata[[val_col]] <- trans(indata[[val_col]])
+}
+if (!is.null(theor_col)) {
+    if (is.numeric(theor_col)) {
+        theor_col <- colnames(theor)[theor_col]
+    }
+    if (!is.null(theorfile)) {
+        theor <- read.table(theorfile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
+        theor_vals <- theor[[theor_col]]
+    } else {
+        theor_vals <- indata[[theor_col]]
+    }
+    if (!is.null(theor_trans)) {
+        theor_trans <- .eval_fun(theor_trans)
+        theor_vals <- theor_trans(theor_vals)
+    }
+    theor_vals <- sort(na.omit(theor_vals))
+}
+band_fun <- ifelse(kind == "pp", stat_pp_band, stat_qq_band)
+line_fun <- ifelse(kind == "pp", stat_pp_line, stat_qq_line)
+point_fun <- ifelse(kind == "pp", stat_pp_point, stat_qq_point)
+for (fun in names(theor_funs)) {
+    assign(fun, .eval_fun(theor_funs[[fun]]))
+}
+if (!is.null(band_args) || isFALSE(band_args)) {
+    if (isTRUE(band_args$disabled)) {
+        band_args <- NULL
+    } else {
+        band_args$disabled <- NULL
+        band_args <- list_update(band_args, args)
+        if (band_args$distribution == "custom") {
+            band_args$dparams <- band_args$dparams %||% list()
+            band_args$dparams$values <- theor_vals
+        }
+    }
+}
+if (!is.null(line_args) || isFALSE(line_args)) {
+    if (isTRUE(line_args$disabled)) {
+        line_args <- NULL
+    } else {
+        line_args$disabled <- NULL
+        line_args <- list_update(line_args, args)
+        if (line_args$distribution == "custom") {
+            line_args$dparams <- line_args$dparams %||% list()
+            line_args$dparams$values <- theor_vals
+        }
+    }
+}
+if (!is.null(point_args) || isFALSE(point_args)) {
+    if (isTRUE(point_args$disabled)) {
+        point_args <- NULL
+    } else {
+        point_args$disabled <- NULL
+        point_args <- list_update(point_args, args)
+        if (point_args$distribution == "custom") {
+            point_args$dparams <- point_args$dparams %||% list()
+            point_args$dparams$values <- theor_vals
+        }
+    }
+}
+title <- title %||% waiver()
+xlabel <- xlabel %||% waiver()
+ylabel <- ylabel %||% waiver()
+indata <- indata[complete.cases(indata), , drop = FALSE]
+indata <- indata[order(indata[[val_col]]), , drop = FALSE]
+p <- ggplot(data = indata, mapping = aes(sample = !!sym(val_col))) +
+    labs(title = title, x = xlabel, y = ylabel)
+if (!is.null(band_args)) {
+    p <- p + do_call(band_fun, band_args)
+}
+if (!is.null(line_args)) {
+    p <- p + do_call(line_fun, line_args)
+}
+if (!is.null(point_args)) {
+    p <- p + do_call(point_fun, point_args)
+}
+if (!is.null(ggs)) {
+    for (gg in ggs) {
+        p <- p + eval(parse(text = gg))
+    }
+}
+png(outfile, width=devpars$width, height=devpars$height, res=devpars$res)
+print(p)
+dev.off()

biopipen/scripts/regulatory/MotifAffinityTest.R ADDED Viewed

@@ -0,0 +1,226 @@
+# Script for regulatory.MotifAffinityTest
+source("{{biopipen_dir}}/utils/misc.R")
+library(BiocParallel)
+library(BSgenome)
+library(universalmotif)
+motiffile <- {{in.motiffile | r}}
+varfile <- {{in.varfile | r}}
+outdir <- {{out.outdir | r}}
+ncores <- {{envs.ncores | r}}
+tool <- {{envs.tool | r}}
+bcftools <- {{envs.bcftools | r}}
+genome <- {{envs.genome | r}}
+motif_col <- {{envs.motif_col | r}}
+regulator_col <- {{envs.regulator_col | r}}
+notfound <- {{envs.notfound | r}}
+motifdb <- {{envs.motifdb | r}}
+regmotifs <- {{envs.regmotifs | r}}
+devpars <- {{envs.devpars | r}}
+plot_nvars <- {{envs.plot_nvars | r}}
+plots <- {{envs.plots | r}}
+cutoff <- {{envs.cutoff | r}}
+if (is.null(motifdb) || !file.exists(motifdb)) {
+    stop("Motif database (envs.motifdb) is required and must exist")
+}
+if (is.null(genome)) {
+    stop("Reference genome (envs.ref) is required and must exist")
+}
+if (is.null(motiffile) || !file.exists(motiffile)) {
+    stop("Motif file (in.motiffile) is required and must exist")
+}
+if (is.null(varfile) || !file.exists(varfile)) {
+    stop("Variant file (in.varfile) is required and must exist")
+}
+if (is.null(motif_col) && is.null(regulator_col)) {
+    stop("Either motif (envs.motif_col) or regulator (envs.regulator_col) column must be provided")
+}
+log_info("Reading input regulator/motif file ...")
+in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
+if (is.null(motif_col)) {
+    log_info("Inferring motifs from regulators ...")
+    if (is.null(regmotifs) || !file.exists(regmotifs)) {
+        stop("Regulator motifs (envs.regmotifs) is required and must exist when no motif column (envs.motif_col) is provided")
+    }
+    regmotifs <- read.table(regmotifs, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
+    rm_motif_col <- c('Motif', 'motif', 'MOTIF', 'Model', 'model', 'MODEL')
+    rm_reg_col <- c('Regulator', 'regulator', 'REGULATOR', 'TF', 'tf', 'TF', 'Transcription factor', 'transcription factor', 'Transcription Factor')
+    rm_motif_col <- intersect(rm_motif_col, colnames(regmotifs))
+    rm_reg_col <- intersect(rm_reg_col, colnames(regmotifs))
+    if (length(rm_motif_col) == 0) {
+        stop("No motif column found in envs.regmotifs, provide one of: ", paste(rm_motif_col, collapse = ", "))
+    }
+    if (length(rm_reg_col) == 0) {
+        stop("No regulator column found in envs.regmotifs, provide one of: ", paste(rm_reg_col, collapse = ", "))
+    }
+    rm_motif_col <- rm_motif_col[1]
+    rm_reg_col <- rm_reg_col[1]
+    # check regulators
+    rm_regs <- regmotifs[, rm_reg_col, drop = TRUE]
+    regulators <- in_motifs[, regulator_col, drop = TRUE]
+    notfound_regs <- setdiff(regulators, rm_regs)
+    if (length(notfound_regs) > 0 && notfound == "error") {
+        first_notfound <- head(notfound_regs, 3)
+        if (length(notfound_regs) > 3) {
+            first_notfound <- c(first_notfound, "...")
+            notfound_file <- file.path(outdir, "notfound_regulators.txt")
+            writeLines(notfound_regs, notfound_file)
+            msg1 <- paste0("The following regulators were not found in the envs.regmotifs file: ", paste(first_notfound, collapse = ", "))
+            msg2 <- paste0("Check the full list in ", notfound_file)
+            stop(msg1, "\n", msg2)
+        } else {
+            msg <- paste0("The following regulators were not found in the regmotifs file: ", paste(first_notfound, collapse = ", "))
+            stop(msg)
+        }
+    }
+    in_motifs <- in_motifs[in_motifs[, regulator_col] %in% rm_regs, , drop = FALSE]
+    # add motif column
+    in_motifs <- merge(in_motifs, regmotifs, by.x = regulator_col, by.y = rm_reg_col, all.x = TRUE, suffixes = c("", "_db"))
+    motif_col <- rm_motif_col
+}
+if (is.null(regulator_col)) {
+    # make motifs unique
+    in_moitfs <- in_motifs[!duplicated(in_motifs[, motif_col]), , drop = FALSE]
+} else {
+    in_motifs <- in_motifs[!duplicated(in_motifs[, c(regulator_col, motif_col)]), , drop = FALSE]
+}
+if (!grepl(".", genome, fixed = TRUE)) {
+    genome_pkg = sprintf("BSgenome.Hsapiens.UCSC.%s", genome)
+} else {
+    genome_pkg = genome
+}
+if (!requireNamespace(genome_pkg, quietly = TRUE)) {
+    stop(sprintf("Genome package %s is not installed", genome_pkg))
+}
+log_info("Reading variant file ...")
+if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
+    log_info("Converting VCF file to BED file ...")
+    varfile_bed <- file.path(outdir, gsub("\\.vcf(\\.gz)?$", ".bed", basename(varfile)))
+    cmd <- c(
+        bcftools, "query",
+        "-f", "%CHROM\\t%POS0\\t%END\\t%ID\\t0\\t+\\t%REF\\t%ALT{0}\\n",
+        "-i", 'FILTER="PASS" || FILTER="." || FILTER=""',
+        "-o", varfile_bed,
+        varfile
+    )
+    run_command(cmd, fg = TRUE)
+    varfile <- varfile_bed
+}
+# `chrom`, `start`, `end`, `name`, `score`, `strand`, `ref`, `alt`.
+snpinfo <- read.table(varfile, header=FALSE, stringsAsFactors=FALSE)
+colnames(snpinfo) <- c("chrom", "start", "end", "name", "score", "strand", "ref", "alt")
+log_info("Reading motif database ...")
+meme <- read_meme(motifdb)
+check_motifs <- function(motifdb_names) {
+    motifs <- in_motifs[, motif_col, drop = TRUE]
+    notfound_motifs <- setdiff(motifs, motifdb_names)
+    if (length(notfound_motifs) > 0) {
+        first_notfound <- head(notfound_motifs, 3)
+        if (length(notfound_motifs) > 3) {
+            first_notfound <- c(first_notfound, "...")
+            notfound_file <- file.path(outdir, "notfound_motifs.txt")
+            writeLines(notfound_motifs, notfound_file)
+            msg1 <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
+            msg2 <- paste0("Check the full list in ", notfound_file)
+            if (notfound == "error") {
+                stop(msg1, "\n", msg2)
+            } else if (notfound == "ignore") {
+                log_warn(msg1)
+                log_warn(msg2)
+            }
+        } else {
+            msg <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
+            if (notfound == "error") {
+                stop(msg)
+            } else if (notfound == "ignore") {
+                log_warn(msg)
+            }
+        }
+        motifs <- setdiff(motifs, notfound_motifs)
+    }
+    return(motifs)
+}
+plot_variant <- function(motifbreakr_results) {
+    log_info("Plotting variants ...")
+    plotdir <- file.path(outdir, "plots")
+    dir.create(plotdir, showWarnings = FALSE)
+    results <- motifbreakr_results
+    if (is.null(plots) || length(plots) == 0) {
+        results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
+        results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
+        variants <- unique(results$SNP_id)
+    } else {
+        variants <- names(plots)
+    }
+    for (variant in variants) {
+        log_info("- Variant: {variant}")
+        if (is.null(plots[[variant]])) {
+            plots[[variant]] <- list(devpars = devpars, which = "TRUE")
+        }
+        if (is.null(plots[[variant]]$which)) {
+            plots[[variant]]$which <- "TRUE"
+        }
+        if (is.null(plots[[variant]]$devpars)) {
+            plots[[variant]]$devpars <- devpars
+        }
+        if (is.null(plots[[variant]]$devpars$res)) {
+            plots[[variant]]$devpars$res <- 100
+        }
+        res <- results[results$SNP_id == variant, , drop = FALSE]
+        if (length(res) == 0) {
+            stop(sprintf("Variant %s not found in results", variant))
+        }
+        res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
+        if (length(res) == 0) {
+            stop(sprintf("No variants to plot for %s", variant))
+        }
+        plotfile <- file.path(plotdir, sprintf("%s.png", slugify(variant)))
+        # fix motifBreakR 2.12 using names to filter in plotMB
+        names(res) <- res$SNP_id
+        dv <- plots[[variant]]$devpars
+        if (is.null(dv$height)) {
+            dv$height <- 2.4 * dv$res + length(res) * 1.2 * dv$res
+        }
+        if (is.null(dv$width)) {
+            left <- min(sapply(res$motifPos, `[`, 1))
+            right <- max(sapply(res$motifPos, `[`, 2))
+            dv$width <- 1.5 * dv$res + (right - left) * 0.3 * dv$res
+        }
+        png(plotfile, width = dv$width, height = dv$height, res = dv$res)
+        motifbreakR::plotMB(res, variant)
+        dev.off()
+    }
+}
+tool <- tolower(tool)
+tool <- match.arg(tool, c("motifbreakr", "atsnp"))
+if (tool == "motifbreakr") {
+    motifbreakr_args <- {{envs.motifbreakr_args | r}}
+    {% set sourcefile = biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_MotifBreakR.R" %}
+    # {{ sourcefile | getmtime }}
+    source("{{sourcefile}}")
+} else {  # atsnp
+    atsnp_args <- {{envs.atsnp_args | r}}
+    {% set sourcefile = biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_AtSNP.R" %}
+    # {{ sourcefile | getmtime }}
+    source("{{sourcefile}}")
+}

biopipen 0.28.1__py3-none-any.whl → 0.29.1__py3-none-any.whl

Potentially problematic release.

biopipen 0.28.1py3-none-any.whl → 0.29.1py3-none-any.whl