PyPI - biopipen - Versions diffs - 0.29.0__py3-none-any.whl → 0.29.1__py3-none-any.whl - Mend

biopipen 0.29.0py3-none-any.whl → 0.29.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (24) hide show

biopipen/__init__.py +1 -1
biopipen/ns/plot.py +66 -8
biopipen/ns/{regulation.py → regulatory.py} +3 -3
biopipen/ns/scrna.py +16 -2
biopipen/ns/stats.py +93 -1
biopipen/scripts/delim/SampleInfo.R +10 -5
biopipen/scripts/plot/Manhattan.R +6 -0
biopipen/scripts/plot/QQPlot.R +100 -16
biopipen/scripts/{regulation → regulatory}/MotifAffinityTest.R +3 -3
biopipen/scripts/{regulation → regulatory}/MotifScan.py +1 -1
biopipen/scripts/scrna/MarkersFinder.R +28 -18
biopipen/scripts/scrna/SeuratClustering.R +8 -0
biopipen/scripts/scrna/SeuratPreparing.R +252 -122
biopipen/scripts/snp/MatrixEQTL.R +2 -2
biopipen/scripts/snp/PlinkIBD.R +3 -0
biopipen/scripts/stats/Mediation.R +94 -0
{biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/METADATA +1 -1
{biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/RECORD +24 -23
{biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/entry_points.txt +1 -1
/biopipen/scripts/{regulation → regulatory}/MotifAffinityTest_AtSNP.R +0 -0
/biopipen/scripts/{regulation → regulatory}/MotifAffinityTest_MotifBreakR.R +0 -0
/biopipen/scripts/{regulation → regulatory}/atSNP.R +0 -0
/biopipen/scripts/{regulation → regulatory}/motifBreakR.R +0 -0
{biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/WHEEL +0 -0

biopipen/scripts/scrna/SeuratPreparing.R CHANGED Viewed

@@ -1,19 +1,27 @@
 source("{{biopipen_dir}}/utils/misc.R")
+source("{{biopipen_dir}}/utils/caching.R")
 library(Seurat)
 library(future)
 library(bracer)
 library(ggplot2)
 library(dplyr)
-library(tidyseurat)
+# library(tidyseurat)
-metafile = {{in.metafile | quote}}
-rdsfile = {{out.rdsfile | quote}}
-joboutdir = {{job.outdir | quote}}
-envs = {{envs | r: todot = "-", skip = 1}}
+metafile <- {{in.metafile | quote}}
+rdsfile <- {{out.rdsfile | quote}}
+joboutdir <- {{job.outdir | quote}}
+envs <- {{envs | r: todot = "-", skip = 1}}
+if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
+if (length(envs$cache) > 1) {
+    log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
+    envs$cache <- envs$cache[1]
+}
 set.seed(8525)
-options(future.globals.maxSize = 80000 * 1024^2)
+# 8TB
+options(future.globals.maxSize = 8 * 1024 ^ 4)
 options(future.rng.onMisuse="ignore")
 options(Seurat.object.assay.version = "v5")
 plan(strategy = "multicore", workers = envs$ncores)
@@ -34,7 +42,7 @@ add_report(
     h1 = "Filters and QC"
 )
-metadata = read.table(
+metadata <- read.table(
     metafile,
     header = TRUE,
     row.names = NULL,
@@ -42,6 +50,16 @@ metadata = read.table(
     check.names = FALSE
 )
+cache_sig <- capture.output(str(metadata))
+dig_sig <- digest::digest(cache_sig, algo = "md5")
+dig_sig <- substr(dig_sig, 1, 8)
+cache_dir <- NULL
+if (is.character(envs$cache)) {
+    cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seuratpreparing_cache"))
+    dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
+    writeLines(cache_sig, file.path(cache_dir, "signature.txt"))
+}
 meta_cols = colnames(metadata)
 if (!"Sample" %in% meta_cols) {
     stop("Error: Column `Sample` is not found in metafile.")
@@ -90,21 +108,21 @@ rename_files = function(e, sample, path) {
 perform_cell_qc <- function(sobj, per_sample = FALSE) {
-    log_prefix = ifelse(per_sample, "  ", "- ")
+    log_prefix <- ifelse(per_sample, "  ", "- ")
     log_info("{log_prefix}Adding metadata for QC ...")
-    sobj$percent.mt = PercentageFeatureSet(sobj, pattern = "^MT-")
-    sobj$percent.ribo = PercentageFeatureSet(sobj, pattern = "^RP[SL]")
-    sobj$percent.hb = PercentageFeatureSet(sobj, pattern = "^HB[^(P)]")
-    sobj$percent.plat = PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
+    sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-")
+    sobj$percent.ribo <- PercentageFeatureSet(sobj, pattern = "^RP[SL]")
+    sobj$percent.hb <- PercentageFeatureSet(sobj, pattern = "^HB[^(P)]")
+    sobj$percent.plat <- PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
     if (is.null(envs$cell_qc) || length(envs$cell_qc) == 0) {
         log_warn("{log_prefix}No cell QC criteria is provided. All cells will be kept.")
-        cell_qc = "TRUE"
+        cell_qc <- "TRUE"
     } else {
-        cell_qc = envs$cell_qc
+        cell_qc <- envs$cell_qc
     }
-    sobj = sobj %>% mutate(.QC = !!rlang::parse_expr(cell_qc))
+    sobj@meta.data <- sobj@meta.data %>% mutate(.QC = !!rlang::parse_expr(cell_qc))
     if (is.null(cell_qc_df)) {
         cell_qc_df <<- sobj@meta.data[, c("Sample", ".QC", feats), drop = FALSE]
@@ -114,8 +132,8 @@ perform_cell_qc <- function(sobj, per_sample = FALSE) {
     # Do the filtering
     log_info("{log_prefix}Filtering cells using QC criteria ...")
-    sobj = sobj %>% filter(.QC)
-    sobj$.QC = NULL
+    sobj <- subset(sobj, subset = .QC)
+    sobj$.QC <- NULL
     return(sobj)
 }
@@ -281,42 +299,83 @@ load_sample = function(sample) {
     obj
 }
-# Load data
-log_info("Reading samples individually ...")
-obj_list = lapply(samples, load_sample)
-log_info("Merging samples ...")
-sobj = Reduce(merge, obj_list)
+cached <- get_cached(
+    list(cell_qc = envs$cell_qc, cell_qc_per_sample = envs$cell_qc_per_sample, use_sct = envs$use_sct),
+    "CellQC",
+    cache_dir
+)
+if (!is.null(cached$data)) {
+    log_info("Loading cell-QC'ed object from cache ...")
+    sobj <- cached$data$sobj
+    cell_qc_df <- cached$data$cell_qc_df
+    cached$data$sobj <- NULL
+    cached$data$cell_qc_df <- NULL
+    cached$data <- NULL
+    rm(cached)
+    gc()
+} else {
+    # Load data
+    log_info("Reading samples individually ...")
+    obj_list = lapply(samples, load_sample)
+    log_info("Merging samples ...")
+    sobj = Reduce(merge, obj_list)
+    rm(obj_list)
+    gc()
+    if (!envs$cell_qc_per_sample) {
+        log_info("Performing cell QC ...")
+        sobj = perform_cell_qc(sobj)
+    }
-if (!envs$cell_qc_per_sample) {
-    log_info("Performing cell QC ...")
-    sobj = perform_cell_qc(sobj)
+    cached$data = list(sobj = sobj, cell_qc_df = cell_qc_df)
+    save_to_cache(cached, "CellQC", cache_dir)
 }
 # plot and report the QC
 log_info("Plotting and reporting QC ...")
 dim_df = report_cell_qc(nrow(sobj))
-log_info("Filtering genes ...")
 if (is.list(envs$gene_qc)) {
-    genes <- rownames(sobj)
-    filtered <- FALSE
-    if (!is.null(envs$gene_qc$min_cells) && envs$gene_qc$min_cells > 0) {
-        genes = genes[Matrix::rowSums(sobj) >= envs$gene_qc$min_cells]
-        filtered <- TRUE
-    }
-    excludes <- envs$gene_qc$excludes
-    if (!is.null(excludes)) {
-        if (length(excludes) == 1) {
-            excludes <- trimws(unlist(strsplit(excludes, ",")))
+    cached <- get_cached(
+        list(
+            cell_qc = envs$cell_qc,
+            gene_qc = envs$gene_qc,
+            cell_qc_per_sample = envs$cell_qc_per_sample,
+            use_sct = envs$use_sct
+        ),
+        "GeneQC",
+        cache_dir
+    )
+    if (!is.null(cached$data)) {
+        log_info("Loading gene-QC'ed object from cache ...")
+        sobj <- cached$data
+        cached$data <- NULL
+        rm(cached)
+        gc()
+    } else {
+        log_info("Filtering genes ...")
+        genes <- rownames(sobj)
+        filtered <- FALSE
+        if (!is.null(envs$gene_qc$min_cells) && envs$gene_qc$min_cells > 0) {
+            genes = genes[Matrix::rowSums(sobj) >= envs$gene_qc$min_cells]
+            filtered <- TRUE
         }
-        for (ex in excludes) {
-            genes <- genes[!grepl(ex, genes)]
+        excludes <- envs$gene_qc$excludes
+        if (!is.null(excludes)) {
+            if (length(excludes) == 1) {
+                excludes <- trimws(unlist(strsplit(excludes, ",")))
+            }
+            for (ex in excludes) {
+                genes <- genes[!grepl(ex, genes)]
+            }
+            filtered <- TRUE
         }
-        filtered <- TRUE
-    }
-    if (filtered) {
-        sobj = subset(sobj, features = genes)
+        if (filtered) {
+            sobj = subset(sobj, features = genes)
+        }
+        cached$data <- sobj
+        save_to_cache(cached, "GeneQC", cache_dir)
     }
 }
 dim_df = rbind(
@@ -350,96 +409,167 @@ add_report(
     paste(capture.output(str(args)), collapse = ", ")
 }
-log_info("Performing transformation/scaling ...")
-# Not joined yet
-# sobj[["RNA"]] <- split(sobj[["RNA"]], f = sobj$Sample)
-if (envs$use_sct) {
-    log_info("- Running SCTransform ...")
-    SCTransformArgs <- envs$SCTransform
-    # log to stdout but don't populate it to running log
-    print(paste0("  SCTransform: ", .formatArgs(SCTransformArgs)))
-    log_debug("  SCTransform: {.formatArgs(SCTransformArgs)}")
-    SCTransformArgs$object <- sobj
-    sobj <- do_call(SCTransform, SCTransformArgs)
-    # Default is to use the SCT assay
+envs_cache <- envs
+envs_cache$ncores <- NULL
+envs_cache$DoubletFinder <- NULL
+envs_cache$IntegrateLayers <- NULL
+cached <- get_cached(envs_cache, "Transformed", cache_dir)
+if (!is.null(cached$data)) {
+    log_info("Loading transformed object from cache ...")
+    sobj <- cached$data
+    cached$data <- NULL
+    rm(cached)
+    gc()
 } else {
-    log_info("- Running NormalizeData ...")
-    NormalizeDataArgs <- envs$NormalizeData
-    print(paste0("  NormalizeData: ", .formatArgs(NormalizeDataArgs)))
-    log_debug("  NormalizeData: {.formatArgs(NormalizeDataArgs)}")
-    NormalizeDataArgs$object <- sobj
-    sobj <- do_call(NormalizeData, NormalizeDataArgs)
-    log_info("- Running FindVariableFeatures ...")
-    FindVariableFeaturesArgs <- envs$FindVariableFeatures
-    print(paste0("  FindVariableFeatures: ", .formatArgs(FindVariableFeaturesArgs)))
-    log_debug("  FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
-    FindVariableFeaturesArgs$object <- sobj
-    sobj <- do_call(FindVariableFeatures, FindVariableFeaturesArgs)
-    log_info("- Running ScaleData ...")
-    ScaleDataArgs <- envs$ScaleData
-    print(paste0("  ScaleData: ", .formatArgs(ScaleDataArgs)))
-    log_debug("  ScaleData: {.formatArgs(ScaleDataArgs)}")
-    ScaleDataArgs$object <- sobj
-    sobj <- do_call(ScaleData, ScaleDataArgs)
+    log_info("Performing transformation/scaling ...")
+    # Not joined yet
+    # sobj[["RNA"]] <- split(sobj[["RNA"]], f = sobj$Sample)
+    if (envs$use_sct) {
+        log_info("- Running SCTransform ...")
+        SCTransformArgs <- envs$SCTransform
+        # log to stdout but don't populate it to running log
+        print(paste0("  SCTransform: ", .formatArgs(SCTransformArgs)))
+        log_debug("  SCTransform: {.formatArgs(SCTransformArgs)}")
+        SCTransformArgs$object <- sobj
+        sobj <- do_call(SCTransform, SCTransformArgs)
+        # Default is to use the SCT assay
+        # Cleanup memory
+        SCTransformArgs$object <- NULL
+        rm(SCTransformArgs)
+        gc()
+    } else {
+        log_info("- Running NormalizeData ...")
+        NormalizeDataArgs <- envs$NormalizeData
+        print(paste0("  NormalizeData: ", .formatArgs(NormalizeDataArgs)))
+        log_debug("  NormalizeData: {.formatArgs(NormalizeDataArgs)}")
+        NormalizeDataArgs$object <- sobj
+        sobj <- do_call(NormalizeData, NormalizeDataArgs)
+        # Cleanup memory
+        NormalizeDataArgs$object <- NULL
+        rm(NormalizeDataArgs)
+        gc()
+        log_info("- Running FindVariableFeatures ...")
+        FindVariableFeaturesArgs <- envs$FindVariableFeatures
+        print(paste0("  FindVariableFeatures: ", .formatArgs(FindVariableFeaturesArgs)))
+        log_debug("  FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
+        FindVariableFeaturesArgs$object <- sobj
+        sobj <- do_call(FindVariableFeatures, FindVariableFeaturesArgs)
+        # Cleanup memory
+        FindVariableFeaturesArgs$object <- NULL
+        rm(FindVariableFeaturesArgs)
+        gc()
+        log_info("- Running ScaleData ...")
+        ScaleDataArgs <- envs$ScaleData
+        print(paste0("  ScaleData: ", .formatArgs(ScaleDataArgs)))
+        log_debug("  ScaleData: {.formatArgs(ScaleDataArgs)}")
+        ScaleDataArgs$object <- sobj
+        sobj <- do_call(ScaleData, ScaleDataArgs)
+        # Cleanup memory
+        ScaleDataArgs$object <- NULL
+        rm(ScaleDataArgs)
+        gc()
+    }
+    log_info("- Running RunPCA ...")
+    RunPCAArgs <- envs$RunPCA
+    RunPCAArgs$npcs <- if (is.null(RunPCAArgs$npcs)) { 50 } else { min(RunPCAArgs$npcs, ncol(sobj) - 1) }
+    print(paste0("  RunPCA: ", .formatArgs(RunPCAArgs)))
+    log_debug("  RunPCA: {.formatArgs(RunPCAArgs)}")
+    RunPCAArgs$object <- sobj
+    sobj <- do_call(RunPCA, RunPCAArgs)
+    # Cleanup memory
+    RunPCAArgs$object <- NULL
+    rm(RunPCAArgs)
+    gc()
+    cached$data <- sobj
+    save_to_cache(cached, "Transformed", cache_dir)
 }
-log_info("- Running RunPCA ...")
-RunPCAArgs <- envs$RunPCA
-RunPCAArgs$npcs <- if (is.null(RunPCAArgs$npcs)) { 50 } else { min(RunPCAArgs$npcs, ncol(sobj) - 1) }
-print(paste0("  RunPCA: ", .formatArgs(RunPCAArgs)))
-log_debug("  RunPCA: {.formatArgs(RunPCAArgs)}")
-RunPCAArgs$object <- sobj
-sobj <- do_call(RunPCA, RunPCAArgs)
-if (!envs$no_integration) {
-    log_info("- Running IntegrateLayers (method = {envs$IntegrateLayers$method}) ...")
-    IntegrateLayersArgs <- envs$IntegrateLayers
-    method <- IntegrateLayersArgs$method
-    if (!is.null(IntegrateLayersArgs$reference) && is.character(IntegrateLayersArgs$reference)) {
-        log_info("  Using reference samples: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
-        IntegrateLayersArgs$reference <- match(IntegrateLayersArgs$reference, samples)
-        log_info("  Transferred to indices: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
-    }
-    if (method %in% c("CCA", "cca")) { method <- "CCAIntegration" } else
-    if (method %in% c("RPCA", "rpca")) { method <- "RPCAIntegration" } else
-    if (method %in% c("Harmony", "harmony")) { method <- "HarmonyIntegration" } else
-    if (method %in% c("FastMNN", "fastmnn")) { method <- "FastMNNIntegration" } else
-    if (method %in% c("scVI", "scvi")) { method <- "scVIIntegration" } else
-    { stop(paste0("Unknown integration method: ", method)) }
-    if (envs$use_sct && is.null(IntegrateLayersArgs$normalization.method)) {
-        IntegrateLayersArgs$normalization.method <- "SCT"
+envs_cache <- envs
+envs_cache$ncores <- NULL
+envs_cache$DoubletFinder <- NULL
+cached <- get_cached(envs_cache, "Integrated", cache_dir)
+if (!is.null(cached$data)) {
+    log_info("Loading integrated/layer-joined object from cache ...")
+    sobj <- cached$data
+    cached$data <- NULL
+    rm(cached)
+    gc()
+} else {
+    if (!envs$no_integration) {
+        log_info("- Running IntegrateLayers (method = {envs$IntegrateLayers$method}) ...")
+        IntegrateLayersArgs <- envs$IntegrateLayers
+        method <- IntegrateLayersArgs$method
+        if (!is.null(IntegrateLayersArgs$reference) && is.character(IntegrateLayersArgs$reference)) {
+            log_info("  Using reference samples: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
+            IntegrateLayersArgs$reference <- match(IntegrateLayersArgs$reference, samples)
+            log_info("  Transferred to indices: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
+        }
+        if (method %in% c("CCA", "cca")) { method <- "CCAIntegration" } else
+        if (method %in% c("RPCA", "rpca")) { method <- "RPCAIntegration" } else
+        if (method %in% c("Harmony", "harmony")) { method <- "HarmonyIntegration" } else
+        if (method %in% c("FastMNN", "fastmnn")) { method <- "FastMNNIntegration" } else
+        if (method %in% c("scVI", "scvi")) { method <- "scVIIntegration" } else
+        { stop(paste0("Unknown integration method: ", method)) }
+        if (envs$use_sct && is.null(IntegrateLayersArgs$normalization.method)) {
+            IntegrateLayersArgs$normalization.method <- "SCT"
+        }
+        IntegrateLayersArgs$method <- eval(parse(text = method))
+        new_reductions <- list(
+            "CCAIntegration" = "integrated.cca",
+            "RPCAIntegration" = "integrated.rpca",
+            "HarmonyIntegration" = "harmony",
+            "FastMNNIntegration" = "integration.mnn",
+            "scVIIntegration" = "integrated.scvi"
+        )
+        if (is.null(IntegrateLayersArgs$new.reduction)) {
+            IntegrateLayersArgs$new.reduction <- new_reductions[[method]]
+        }
+        print(paste0("  IntegrateLayers: ", .formatArgs(IntegrateLayersArgs)))
+        log_debug("  IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
+        IntegrateLayersArgs$object <- sobj
+        sobj <- do_call(IntegrateLayers, IntegrateLayersArgs)
+        # Save it for dimension reduction plots
+        sobj@misc$integrated_new_reduction <- IntegrateLayersArgs$new.reduction
+        # Cleanup memory
+        IntegrateLayersArgs$object <- NULL
+        rm(IntegrateLayersArgs)
+        gc()
     }
-    IntegrateLayersArgs$method <- eval(parse(text = method))
-    new_reductions <- list(
-        "CCAIntegration" = "integrated.cca",
-        "RPCAIntegration" = "integrated.rpca",
-        "HarmonyIntegration" = "harmony",
-        "FastMNNIntegration" = "integration.mnn",
-        "scVIIntegration" = "integrated.scvi"
-    )
-    if (is.null(IntegrateLayersArgs$new.reduction)) {
-        IntegrateLayersArgs$new.reduction <- new_reductions[[method]]
+    if (!envs$use_sct) {
+        log_info("- Joining layers ...")
+        sobj <- JoinLayers(sobj)
     }
-    print(paste0("  IntegrateLayers: ", .formatArgs(IntegrateLayersArgs)))
-    log_debug("  IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
-    IntegrateLayersArgs$object <- sobj
-    sobj <- do_call(IntegrateLayers, IntegrateLayersArgs)
-    # Save it for dimension reduction plots
-    sobj@misc$integrated_new_reduction <- IntegrateLayersArgs$new.reduction
-}
-if (!envs$use_sct) {
-    log_info("- Joining layers ...")
-    sobj <- JoinLayers(sobj)
+    cached$data <- sobj
+    save_to_cache(cached, "Integrated", cache_dir)
 }
+# This is the last step, doesn't need to be cached
 if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletFinder$PCs > 0) {
     library(DoubletFinder)
     log_info("Running DoubletFinder ...")
     log_info("- Preparing Seurat object ...")
+    if (is.null(envs$DoubletFinder$ncores)) {
+        envs$DoubletFinder$ncores <- envs$ncores
+    }
     # More controls from envs?
     sobj <- FindNeighbors(sobj, dims = 1:envs$DoubletFinder$PCs)
     sobj <- FindClusters(sobj)
@@ -449,7 +579,7 @@ if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletF
         sobj,
         PCs = 1:envs$DoubletFinder$PCs,
         sct = envs$use_sct,
-        num.cores = envs$ncores
+        num.cores = envs$DoubletFinder$ncores
     )
     sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
     bcmvn <- find.pK(sweep.stats)
@@ -546,7 +676,7 @@ if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletF
     )
 }
-log_info("Saving filtered seurat object ...")
+log_info("Saving QC'ed seurat object ...")
 saveRDS(sobj, rdsfile)
 save_report(joboutdir)

biopipen/scripts/snp/MatrixEQTL.R CHANGED Viewed

@@ -107,7 +107,7 @@ engine_params$snps = snps
 engine_params$gene = gene
 engine_params$cvrt = cvrt
 engine_params$output_file_name = if(trans_enabled) alleqtl else NULL
-engine_params$pvOutputThreshold = if(trans_enabled) transp else 0
+engine_params$pvOutputThreshold = if(trans_enabled) min(transp, 1) else 0
 engine_params$useModel = model
 engine_params$errorCovariance = numeric()
 engine_params$verbose = TRUE
@@ -180,7 +180,7 @@ if (cis_enabled) {
     log_info("Running MatrixEQTL with cis-eQTLs enabled ...")
     engine_params$output_file_name.cis = outfile
-    engine_params$pvOutputThreshold.cis = pval
+    engine_params$pvOutputThreshold.cis = min(pval, 1)
     engine_params$cisDist = dist
     engine_params$snpspos = snppos_data
     engine_params$genepos = genepos_data

biopipen/scripts/snp/PlinkIBD.R CHANGED Viewed

@@ -34,6 +34,7 @@ cmd <- c(
     "--threads", ncores,
     "--bfile", input,
     "--indep-pairwise", indep,
+	"--keep-allele-order",
 	# One should be mindful of running this with < 50 samples
 	# "--bad-ld",
     "--out", output
@@ -49,6 +50,7 @@ cmd <- c(
     "--threads", ncores,
     "--bfile", input,
     "--extract", prunein,
+	"--keep-allele-order",
     "--genome",
     "--out", output
 )
@@ -122,6 +124,7 @@ cmd <- c(
     "--threads", ncores,
     "--bfile", input,
     "--remove", ibd_fail_file,
+	"--keep-allele-order",
 	"--make-bed",
     "--out", output
 )

biopipen/scripts/stats/Mediation.R ADDED Viewed

@@ -0,0 +1,94 @@
+source("{{biopipen_dir}}/utils/misc.R")
+library(rlang)
+library(parallel)
+library(mediation)
+infile <- {{in.infile | r}}
+fmlfile <- {{in.fmlfile | r}}
+outfile <- {{out.outfile | r}}
+ncores <- {{envs.ncores | r}}
+sims <- {{envs.sims | r}}
+args <- {{envs.args | r}}
+padj <- {{envs.padj | r}}
+cases <- {{envs.cases | r}}
+transpose_input <- {{envs.transpose_input | r}}
+set.seed(123)
+log_info("Reading input file ...")
+indata <- read.table(infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
+if (transpose_input) { indata <- t(indata) }
+log_info("Reading formula file/cases ...")
+if (!is.null(fmlfile)) {
+    if (!is.null(cases) && length(cases) > 0) {
+        log_warn("envs.cases ignored as in.fmlfile is provided")
+    }
+    fmldata <- read.table(fmlfile, header = TRUE, sep = "\t", row.names = NULL)
+    # Case   M   Y   X   Cov     Model_M    Model_Y
+    cases <- split(fmldata, fmldata$Case)
+} else if (is.null(cases) || length(cases) == 0) {
+    stop("Either envs.cases or in.fmlfile must be provided")
+}
+args <- args %||% list()
+medanalysis = function(casename) {
+    case <- cases[[casename]]
+    log_info("- Case:", casename)
+    M <- case$M
+    Y <- case$Y
+    X <- case$X
+    covs <- case$Cov
+    modelm <- match.fun(case$Model_M)
+    modely <- match.fun(case$Model_Y)
+    fmlm <- as.formula(sprintf("%s ~ %s", bQuote(M), bQuote(X)))
+    fmly <- as.formula(sprintf("%s ~ %s + %s", bQuote(Y), bQuote(M), bQuote(X)))
+    if (!is.null(covs) && length(covs) == 1) {
+        covs <- trimws(strsplit(covs, ",")[[1]])
+    }
+    if (!is.null(covs)) {
+        cov_fml <- as.formula(sprintf("~ . + %s", paste(bQuote(covs), collapse = " + ")))
+        fmlm <- update.formula(fmlm, cov_fml)
+        fmly <- update.formula(fmly, cov_fml)
+    }
+    margs <- args
+    args$sims <- sims
+    args$model.m <- modelm(fmlm, data = indata)
+    args$model.y <- modely(fmly, data = indata)
+    args$treat <- X
+    args$mediator <- M
+    args$outcome <- Y
+    if (!is.null(covs)) {
+        args$covariates <- indata[, covs, drop = FALSE]
+    }
+    med <- do_call(mediate, args)
+    if (is.na(med$d1.p) || is.na(med$n1)) {
+        NULL
+    } else {
+        data.frame(
+            Case         = casename,
+            M            = M,
+            X            = X,
+            Y            = Y,
+            ACME         = med$d1,
+            ACME95CI1    = med$d1.ci[1],
+            ACME95CI2    = med$d1.ci[2],
+            TotalEffect  = med$tau.coef,
+            ADE          = med$z1,
+            PropMediated = med$n1,
+            Pval         = med$d1.p
+        )
+    }
+}
+out <- do_call(rbind, mclapply(names(cases), medanalysis, mc.cores = ncores))
+if (padj != "none") {
+    out$Padj <- p.adjust(out$Pval, method = padj)
+}
+write.table(out, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)

{biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: biopipen
-Version: 0.29.0
+Version: 0.29.1
 Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
 License: MIT
 Author: pwwang

biopipen 0.29.0__py3-none-any.whl → 0.29.1__py3-none-any.whl

Potentially problematic release.

biopipen 0.29.0py3-none-any.whl → 0.29.1py3-none-any.whl