PyPI - biopipen - Versions diffs - 0.28.0__py3-none-any.whl → 0.29.0__py3-none-any.whl - Mend

biopipen 0.28.0py3-none-any.whl → 0.29.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (83) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +8 -0
biopipen/ns/bam.py +0 -2
biopipen/ns/bed.py +35 -0
biopipen/ns/cellranger_pipeline.py +5 -5
biopipen/ns/cnv.py +18 -2
biopipen/ns/cnvkit_pipeline.py +16 -11
biopipen/ns/gene.py +68 -23
biopipen/ns/misc.py +2 -15
biopipen/ns/plot.py +146 -0
biopipen/ns/regulation.py +214 -0
biopipen/ns/scrna.py +15 -3
biopipen/ns/snp.py +516 -8
biopipen/ns/stats.py +74 -2
biopipen/ns/vcf.py +196 -0
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/scripts/bam/CNVpytor.py +144 -46
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMerge.py +1 -1
biopipen/scripts/cnv/AneuploidyScore.R +30 -7
biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
biopipen/scripts/cnv/TMADScore.R +21 -5
biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
biopipen/scripts/gene/GeneNameConversion.R +65 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/plot/Manhattan.R +140 -0
biopipen/scripts/plot/QQPlot.R +62 -0
biopipen/scripts/regulation/MotifAffinityTest.R +226 -0
biopipen/scripts/regulation/MotifAffinityTest_AtSNP.R +126 -0
biopipen/scripts/regulation/MotifAffinityTest_MotifBreakR.R +96 -0
biopipen/scripts/regulation/MotifScan.py +159 -0
biopipen/scripts/regulation/atSNP.R +33 -0
biopipen/scripts/regulation/motifBreakR.R +1594 -0
biopipen/scripts/scrna/CellsDistribution.R +2 -0
biopipen/scripts/scrna/MarkersFinder.R +59 -67
biopipen/scripts/scrna/SeuratClustering.R +63 -29
biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
biopipen/scripts/snp/MatrixEQTL.R +84 -43
biopipen/scripts/snp/Plink2GTMat.py +133 -0
biopipen/scripts/snp/PlinkCallRate.R +190 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +298 -0
biopipen/scripts/snp/PlinkFromVcf.py +78 -0
biopipen/scripts/snp/PlinkHWE.R +80 -0
biopipen/scripts/snp/PlinkHet.R +92 -0
biopipen/scripts/snp/PlinkIBD.R +197 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/MetaPvalue.R +2 -1
biopipen/scripts/stats/MetaPvalue1.R +70 -0
biopipen/scripts/tcr/TCRClusterStats.R +12 -7
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/VcfFix_utils.py +1 -1
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/utils/gene.R +83 -37
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.R +56 -0
biopipen/utils/misc.py +5 -2
biopipen/utils/reference.py +54 -10
{biopipen-0.28.0.dist-info → biopipen-0.29.0.dist-info}/METADATA +2 -2
{biopipen-0.28.0.dist-info → biopipen-0.29.0.dist-info}/RECORD +78 -50
{biopipen-0.28.0.dist-info → biopipen-0.29.0.dist-info}/entry_points.txt +1 -1
biopipen/ns/bcftools.py +0 -111
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
{biopipen-0.28.0.dist-info → biopipen-0.29.0.dist-info}/WHEEL +0 -0

biopipen/scripts/cnv/AneuploidyScore.R CHANGED Viewed

@@ -127,13 +127,32 @@ getCAA <- function(segf, cytoarm, tcn_col,
   return(as(seg_cyto_chr, "GRangesList"))
 }
-segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
-seg = data.frame(
-    seqnames = segments[, chrom_col],
-    start = segments[, start_col],
-    end = segments[, end_col],
-    seg.mean = segments[, seg_col]
-)
+if (endsWith(segfile, ".vcf") || endsWith(segfile, ".vcf.gz")) {
+  library(VariantAnnotation)
+  vcf = readVcf(segfile)
+  seg = data.frame(
+      seqnames = as.character(seqnames(vcf)),
+      start = start(vcf),
+      end = vcf@info[[end_col]],
+      seg.mean = vcf@info[[seg_col]]
+  )
+} else if (endsWith(segfile, ".bed")) {
+  segments = read.table(segfile, header=F, row.names=NULL, sep="\t", stringsAsFactors=F)
+  seg = data.frame(
+      seqnames = segments[, 1],
+      start = segments[, 2],
+      end = segments[, 3],
+      seg.mean = segments[, 5]
+  )
+} else {
+  segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
+  seg = data.frame(
+      seqnames = segments[, chrom_col],
+      start = segments[, start_col],
+      end = segments[, end_col],
+      seg.mean = segments[, seg_col]
+  )
+}
 {% if envs.segmean_transform %}
 segmean_transform = {{envs.segmean_transform}}
@@ -168,6 +187,10 @@ if (is.character(cn_transform)) {
 }
 {% endif %}
+seg <- seg[
+  !is.na(seg$seg.mean) & !is.na(seg$TCN) & !is.infinite(seg$seg.mean) & !is.infinite(seg$TCN),,
+  drop=FALSE]
 write.table(seg, file.path(outdir, "seg.txt"), sep="\t", quote=F, row.names=F, col.names=T)
 wgd_ploidy = checkIfWGD(

biopipen/scripts/cnv/AneuploidyScoreSummary.R CHANGED Viewed

@@ -52,8 +52,11 @@ if (!is.null(group_cols)) {
 if (!is.null(metafile)) {
     metadf = read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
-    sample_col = colnames(metadf)[1]
-    colnames(metadf)[1] = "Sample"
+    if (!is.null(metadf$Sample)) {
+        metadf$Sample = as.character(metadf$Sample)
+    } else {
+        colnames(metadf)[1] = "Sample"
+    }
     metadf = metadf[metadf$Sample %in% sams, c("Sample", meta_cols), drop=FALSE]
     if (nrow(metadf) != length(sams)) {
         stop(paste("Not all samples in metafile:", paste(setdiff(sams, metadf$Sample), collapse=", ")))

biopipen/scripts/cnv/TMADScore.R CHANGED Viewed

@@ -11,11 +11,27 @@ if (is.character(segmean_transform)) {
     segmean_transform = eval(parse(text=segmean_transform))
 } # otherwise NULL
-segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
-seg = data.frame(
-    chrom = segments[, chrom_col],
-    log2 = segments[, seg_col]
-)
+if (endsWith(segfile, ".vcf") || endsWith(segfile, ".vcf.gz")) {
+  library(VariantAnnotation)
+  segments = readVcf(segfile)
+  seg = data.frame(
+      chrom = as.character(seqnames(segments)),
+      log2 = segments@info[[seg_col]]
+  )
+} else if (endsWith(segfile, ".bed")) {
+  segments = read.table(segfile, header=F, row.names=NULL, sep="\t", stringsAsFactors=F)
+  seg = data.frame(
+      chrom = segments[, 1],
+      log2 = segments[, 5]
+  )
+} else {
+  segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
+  seg = data.frame(
+      chrom = segments[, chrom_col],
+      log2 = segments[, seg_col]
+  )
+}
 rm(segments)
 if (!is.null(excl_chroms) && length(excl_chroms) > 0) {

biopipen/scripts/cnv/TMADScoreSummary.R CHANGED Viewed

@@ -49,8 +49,12 @@ if (!is.null(group_cols)) {
 data = data.frame(Sample = sams, tMAD = tmads)
 if (file.exists(metafile) && length(meta_cols) > 0) {
     metadf = read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
-    sample_col = colnames(metadf)[1]
-    meta = metadf[, c(sample_col, meta_cols), drop=FALSE]
+    if (!is.null(metadf$Sample)) {
+        metadf$Sample = as.character(metadf$Sample)
+    } else {
+        colnames(metadf)[1] = "Sample"
+    }
+    meta = metadf[, c("Sample", meta_cols), drop=FALSE]
     colnames(meta) = c("Sample", meta_cols)
     data = data %>% left_join(meta, by="Sample")
 }

biopipen/scripts/cnvkit/CNVkitAccess.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from pathlib import Path
 from biopipen.utils.misc import run_command, dict_to_cli_args
 excfiles = {{in.excfiles | repr}}  # pyright: ignore
@@ -12,7 +13,7 @@ def main():
         "": [cnvkit, "access"],
         "s": min_gap_size,
         "o": outfile,
-        "_": reffile,
+        "_": Path(reffile).expanduser(),
     }
     if excfiles:
         other_args["exclude"] = excfiles

biopipen/scripts/cnvkit/CNVkitAutobin.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from pathlib import Path
 from biopipen.utils.misc import run_command, dict_to_cli_args
 bamfiles = {{in.bamfiles | repr}}  # pyright: ignore
@@ -20,7 +21,7 @@ short_names = {{envs.short_names | repr}}  # pyright: ignore
 def main():
     args = dict(
-        f=reffile,
+        f=Path(reffile).expanduser(),
         m=method,
         g=accfile,
         t=baitfile,
@@ -29,7 +30,7 @@ def main():
         target_min_size=target_min_size,
         antitarget_max_size=antitarget_max_size,
         antitarget_min_size=antitarget_min_size,
-        annotate=annotate,
+        annotate=Path(annotate).expanduser(),
         short_names=short_names,
         target_output_bed=target_file,
         antitarget_output_bed=antitarget_file,

biopipen/scripts/cnvkit/CNVkitBatch.py CHANGED Viewed

@@ -42,7 +42,7 @@ def gen_access():
         exclude=access_excludes or False,
         s=access_min_gap_size or False,
         o=accessfile,
-        _=ref,
+        _=Path(ref).expanduser(),
     )
     args[""] = [cnvkit, "access"]
     run_command(dict_to_cli_args(args, dashify=True), fg=True)

biopipen/scripts/cnvkit/CNVkitCoverage.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from pathlib import Path
 from biopipen.utils.misc import run_command, dict_to_cli_args
 bamfile = {{in.bamfile | quote}}  # pyright: ignore
@@ -13,7 +14,7 @@ ncores = {{envs.ncores | repr}}  # pyright: ignore
 def main():
     args = dict(
-        f=reffile,
+        f=Path(reffile).expanduser(),
         c=count,
         q=min_mapq,
         p=ncores,

biopipen/scripts/cnvkit/CNVkitGuessBaits.py CHANGED Viewed

@@ -60,7 +60,7 @@ params.update({
     "o": targetfile,
     "c": covfile,
     "p": ncores,
-    "f": ref,
+    "f": Path(ref).expanduser(),
     "s": samtools,
     "_": bamfiles,
 })

biopipen/scripts/cnvkit/CNVkitHeatmap.py CHANGED Viewed

@@ -4,7 +4,7 @@ from diot import Diot
 from biopipen.utils.misc import run_command, dict_to_cli_args
-segfiles = {{in.segfiles | repr}}  # pyright: ignore
+segfiles = {{in.segfiles | repr}}  # pyright: ignore # noqa
 sample_sex = {{in.sample_sex | repr}}  # pyright: ignore
 outdir = {{out.outdir | repr}}  # pyright: ignore
 cnvkit = {{envs.cnvkit | quote}}  # pyright: ignore

biopipen/scripts/cnvkit/CNVkitReference.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from pathlib import Path
 from biopipen.utils.misc import run_command, dict_to_cli_args
 covfiles = {{in.covfiles | repr}}  # pyright: ignore
@@ -18,7 +19,7 @@ no_rmask = {{envs.no_rmask | repr}}  # pyright: ignore
 def main():
     args = dict(
-        f=reffile,
+        f=Path(reffile).expanduser(),
         o=outfile,
         c=cluster,
         min_cluster_size=min_cluster_size,

biopipen/scripts/gene/GeneNameConversion.R ADDED Viewed

@@ -0,0 +1,65 @@
+source("{{biopipen_dir}}/utils/misc.R")
+source("{{biopipen_dir}}/utils/gene.R")
+infile <- {{in.infile | quote}}
+outfile <- {{out.outfile | quote}}
+notfound <- {{envs.notfound | r}}
+genecol <- {{envs.genecol | r}}
+output <- {{envs.output | r}}
+dup <- {{envs.dup | r}}
+infmt <- {{envs.infmt | r}}
+outfmt <- {{envs.outfmt | r}}
+species <- {{envs.species | r}}
+if (is.na(notfound)) {
+    notfound = "na"
+}
+df <- read.table(infile, header=TRUE, sep="\t", check.names=FALSE)
+if (genecol == 0) {
+    log_warn("envs.genecol should be 1-based, but 0 was given. Using 1 instead.")
+    genecol <- 1
+}
+if (is.numeric(genecol)) { genecol <- colnames(df)[genecol] }
+if (dup == "combine") { dup <- ";" }
+genes <- df[[genecol]]
+converted <- gene_name_conversion(
+    genes=genes,
+    species=species,
+    infmt=infmt,
+    outfmt=outfmt,
+    notfound=notfound,
+    dup=dup
+)
+#    <genecol> <outfmt>
+# 1  1255_g_at   GUCA1A
+# 2    1316_at     THRA
+# 3    1320_at   PTPN21
+# 4    1294_at  MIR5193
+# order the converted dataframe by the original gene column
+converted <- converted[order(match(converted$query, genes)), , drop=FALSE]
+outcol <- outfmt
+if (notfound == "skip" || notfound == "ignore") {
+    df <- df[df[[genecol]] %in% converted$query, , drop=FALSE]
+}
+if (output == "append") {
+    if (outfmt %in% colnames(df)) {
+        log_warn("The output column name already exists in the input dataframe. Appending with a suffix `_1`.")
+        outcol <- paste(outfmt, "_1", sep="")
+    }
+    df[[outcol]] <- converted[[outfmt]]
+} else if (output == "replace") {
+    df[[genecol]] <- converted[[outfmt]]
+} else if (output == "with-query") {
+    df <- converted
+} else {
+    df <- converted[, outfmt, drop=FALSE]
+}
+write.table(df, file=outfile, sep="\t", quote=FALSE, row.names=FALSE)

biopipen/scripts/gene/GenePromoters.R ADDED Viewed

@@ -0,0 +1,61 @@
+library(rlang)
+library(rtracklayer)
+infile <- {{in.infile | r}}
+outfile <- {{out.outfile | r}}
+up <- {{envs.up | r}}
+down <- {{envs.down | r}}
+notfound <- {{envs.notfound | r}}
+refgene <- {{envs.refgene | r}}
+header <- {{envs.header | r}}
+genecol <- {{envs.genecol | r}}
+match_id <- {{envs.match_id | r}}
+sort_ <- {{envs.sort | r}}
+chrsize <- {{envs.chrsize | r}}
+down <- down %||% up
+refgenes <- readGFF(refgene)
+refcol <- ifelse(match_id, "gene_id", "gene_name")
+if (infile == "/dev/null") {
+    genes <- unique(refgenes[[refcol]])
+} else {
+    data <- read.table(infile, header=header, sep="\t", stringsAsFactors=FALSE, check.names=FALSE)
+    genes <- data[[genecol]]
+    rm(data)
+}
+notfound_genes <- setdiff(genes, refgenes[[refcol]])
+if (notfound == "error" && length(notfound_genes) > 0) {
+    stop(paste(
+        "The following genes were not found in the reference annotation:",
+        paste(notfound_genes, collapse=", ")
+    ))
+} else if (notfound == 'skip') {
+    genes <- genes[!genes %in% notfound_genes]
+}
+# Select the genes that are in the reference annotation and keep the order
+# of the records in genes
+refgenes <- refgenes[match(genes, refgenes[[refcol]]), , drop = FALSE]
+refgenes <- unique(makeGRangesFromDataFrame(refgenes, keep.extra.columns=TRUE))
+proms <- promoters(refgenes, up=up, down=down)
+# Scores must be non-NA numeric values
+elementMetadata(proms)$name <- elementMetadata(proms)[[refcol]]
+score(proms) <- 0
+start(proms) <- pmax(1, start(proms))
+if (sort_) {
+    chrom_sizes <- read.table(chrsize, header=FALSE, stringsAsFactors=FALSE, sep="\t")
+    common_chroms <- intersect(chrom_sizes$V1, seqlevels(proms))
+    if (length(common_chroms) == 0) {
+        stop("No common chromosomes found between the promoters and the chromosome sizes. Do you use the correct chromosome sizes file?")
+    }
+    proms <- keepSeqlevels(proms, common_chroms, pruning.mode="coarse")
+    seqlevels(proms) <- common_chroms
+    proms <- sort(proms, ignore.strand = TRUE)
+}
+export.bed(proms, outfile)

biopipen/scripts/misc/Shell.sh ADDED Viewed

@@ -0,0 +1,15 @@
+# shellcheck disable=all
+export infile={{in.infile | quote}}
+export outfile={{out.outfile | quote}}
+is_outdir={{envs.outdir | int}}
+cmd_given={{envs.cmd | bool | int}}
+{% set _ = out.outfile | dirname | joinpath: "cmd.sh" | as_path | attr: 'write_text' | call: envs.cmd %}
+cmd="{{proc.lang}} {{out.outfile | dirname | joinpath: 'cmd.sh'}}"
+if [[ "$cmd_given" -eq 0 ]]; then
+    echo "No command given." 1>&2
+    exit 1
+fi
+if [[ $is_outdir -eq 1 ]]; then
+    mkdir -p "$outfile"
+fi
+eval "$cmd"

biopipen/scripts/plot/Manhattan.R ADDED Viewed

@@ -0,0 +1,140 @@
+source("{{biopipen_dir}}/utils/misc.R")
+library(rlang)
+library(ggmanh)
+infile <- {{in.infile | r}}
+outfile <- {{out.outfile | r}}
+chrom_col <- {{envs.chrom_col | r}}
+pos_col <- {{envs.pos_col | r}}
+pval_col <- {{envs.pval_col | r}}
+label_col <- {{envs.label_col | r}}
+devpars <- {{envs.devpars | r}}
+title <- {{envs.title | r}}
+ylabel <- {{envs.ylabel | r}}
+rescale <- {{envs.rescale | r}}
+rescale_ratio_threshold <- {{envs.rescale_ratio_threshold | r}}
+signif <- {{envs.signif | r}}
+hicolors <- {{envs.hicolors | r}}
+thin_n <- {{envs.thin_n | r}}
+thin_bins <- {{envs.thin_bins | r}}
+zoom <- {{envs.zoom | r}}
+zoom_devpars <- {{envs.zoom_devpars | r}}
+chroms <- {{envs.chroms | r}}
+args <- {{envs.args | r: todot="-"}}
+data <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
+# normalize columns
+cnames <- colnames(data)
+if (is.numeric(chrom_col)) { chrom_col <- cnames[chrom_col] }
+if (is.numeric(pos_col)) { pos_col <- cnames[pos_col] }
+if (is.numeric(pval_col)) { pval_col <- cnames[pval_col] }
+if (is.numeric(label_col)) { label_col <- cnames[label_col] }
+# normalize chroms
+norm_chroms <- function(chrs) {
+    chrs <- as.character(chrs)
+    if (length(chrs) == 1 && grepl(",", chrs)) {
+        chrs <- trimws(unlist(strsplit(chrs, ",")))
+    }
+    if (length(chrs) > 1) {
+        return(unique(unlist(sapply(chrs, function(chr) norm_chroms(chr)))))
+    }
+    if (!grepl("-", chrs)) { return(chrs) }
+    # expand chr1-22 -> chr1, chr2, ..., chr22
+    # chr1-22 -> 'chr1', '22'
+    chrs <- unlist(strsplit(chrs, "-"))
+    if (length(chrs) != 2) {
+        stop(paste0("Invalid chroms: ", chrs))
+    }
+    # detect prefix
+    prefix1 <- gsub("[0-9]", "", chrs[1])
+    prefix2 <- gsub("[0-9]", "", chrs[2])
+    if (nchar(prefix2) > 0 && prefix1 != prefix2) {
+        stop(paste0("Invalid chroms: ", chrs, " (prefix mismatch)"))
+    }
+    chr_a <- as.integer(substring(chrs[1], nchar(prefix1) + 1))
+    chr_b <- as.integer(substring(chrs[2], nchar(prefix2) + 1))
+    chr_min <- min(chr_a, chr_b)
+    chr_max <- max(chr_a, chr_b)
+    return(paste0(prefix1, chr_min:chr_max))
+}
+log_info("Preparing data for plotting ...")
+if (length(chroms) == 1 && chroms == "auto") {
+    chroms <- unique(data[[chrom_col]])
+} else {
+    chroms <- norm_chroms(chroms)
+}
+# prepare data
+mp_prep_args = list()
+if (length(signif) == 1 && is.character(signif)) {
+    signif <- as.numeric(trimws(unlist(strsplit(signif, ","))))
+}
+siglevel <- min(signif)
+if (!is.null(label_col)) {
+    data$.label <- ifelse(data[[pval_col]] < siglevel, data[[label_col]], "")
+}
+if (!is.null(hicolors)) {
+    sig_str <- "Significant"
+    nsig_str <- "Not significant"
+    data$.highlight <- ifelse(data[[pval_col]] < siglevel, sig_str, nsig_str)
+    if (length(hicolors) == 1) { hicolors <- c(hicolors, "grey") }
+    names(hicolors) <- c(sig_str, nsig_str)
+    mp_prep_args$highlight.colname <- ".highlight"
+    mp_prep_args$highlight.col <- hicolors
+}
+mp_prep_args$x <- data
+mp_prep_args$chr.colname <- chrom_col
+mp_prep_args$pos.colname <- pos_col
+mp_prep_args$pval.colname <- pval_col
+mp_prep_args$chr.order <- chroms
+if (!is.null(thin_n) && thin_n > 0) {
+    mp_prep_args$thin.n <- thin_n
+    mp_prep_args$thin.bins <- thin_bins
+}
+mpdata <- do_call(manhattan_data_preprocess, mp_prep_args)
+# plot
+log_info("Plotting Manhattan plot ...")
+args$x <- mpdata
+args$signif <- signif
+args$plot.title <- title
+args$rescale <- rescale
+args$rescale.ratio.threshold <- rescale_ratio_threshold
+if (!is.null(hicolors)) { args$color.by.highlight <- TRUE }
+if (!is.null(label_col)) { args$label.colname <- ".label" }
+g <- do_call(manhattan_plot, args)
+png(outfile, width=devpars$width, height=devpars$height, res=devpars$res)
+print(g)
+dev.off()
+# zoom into chromosomes
+if (!is.null(zoom)) {
+    log_info("Zooming into chromosomes ...")
+    zoom <- norm_chroms(zoom)
+    for (z in zoom) {
+        log_info("- {z}")
+        args_z <- args
+        args_z$chromosome <- z
+        args_z$plot.title <- paste0(title, " (", z, ")")
+        args_z$x.label <- "Position"
+        g_z <- do_call(manhattan_plot, args_z)
+        outfile_z <- gsub("\\.png$", paste0("-", z, ".png"), outfile)
+        zm_devpars <- zoom_devpars
+        zm_devpars$res <- zm_devpars$res %||% devpars$res
+        zm_devpars$height <- zm_devpars$height %||% devpars$height
+        png(
+            outfile_z,
+            width=zm_devpars$width,
+            height=zm_devpars$height,
+            res=zm_devpars$res
+        )
+        print(g_z)
+        dev.off()
+    }
+}

biopipen/scripts/plot/QQPlot.R ADDED Viewed

@@ -0,0 +1,62 @@
+source("{{biopipen_dir}}/utils/misc.R")
+library(ggplot2)
+library(ggprism)
+library(qqplotr)
+theme_set(theme_prism())
+infile <- {{in.infile | r}}
+outfile <- {{out.outfile | r}}
+val_col <- {{envs.val_col | r}}
+devpars <- {{envs.devpars | r}}
+title <- {{envs.title | r}}
+xlabel <- {{envs.xlabel | r}}
+ylabel <- {{envs.ylabel | r}}
+kind <- {{envs.kind | r}}
+trans <- {{envs.trans | r}}
+band_args <- {{envs.band | r}}
+line_args <- {{envs.line | r}}
+point_args <- {{envs.point | r}}
+ggs <- {{envs.ggs | r}}
+indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
+if (is.numeric(val_col)) { val_col <- colnames(indata)[val_col] }
+band_fun <- ifelse(kind == "pp", stat_pp_band, stat_qq_band)
+line_fun <- ifelse(kind == "pp", stat_pp_line, stat_qq_line)
+point_fun <- ifelse(kind == "pp", stat_pp_point, stat_qq_point)
+title <- title %||% waiver()
+xlabel <- xlabel %||% waiver()
+ylabel <- ylabel %||% waiver()
+if (!is.null(trans)) {
+    trans <- trimws(trans)
+    if (trans == "-log10") {
+        trans <- function(x) -log10(x)
+    } else {
+        trans <- eval(parse(text = trans))
+    }
+    indata$.trans_val <- trans(indata[[val_col]])
+    val_col <- ".trans_val"
+}
+indata <- indata[!is.na(indata[[val_col]]), , drop=FALSE]
+p <- ggplot(data = indata, mapping = aes(sample = !!sym(val_col))) +
+    do_call(band_fun, band_args) +
+    do_call(line_fun, line_args) +
+    do_call(point_fun, point_args) +
+    labs(title = title, x = xlabel, y = ylabel)
+if (!is.null(ggs)) {
+    for (gg in ggs) {
+        p <- p + eval(parse(text = gg))
+    }
+}
+png(outfile, width=devpars$width, height=devpars$height, res=devpars$res)
+print(p)
+dev.off()

biopipen 0.28.0__py3-none-any.whl → 0.29.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.28.0py3-none-any.whl → 0.29.0py3-none-any.whl