PyPI - biopipen - Versions diffs - 0.29.0__py3-none-any.whl → 0.29.1__py3-none-any.whl - Mend

biopipen 0.29.0py3-none-any.whl → 0.29.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (24) hide show

biopipen/__init__.py +1 -1
biopipen/ns/plot.py +66 -8
biopipen/ns/{regulation.py → regulatory.py} +3 -3
biopipen/ns/scrna.py +16 -2
biopipen/ns/stats.py +93 -1
biopipen/scripts/delim/SampleInfo.R +10 -5
biopipen/scripts/plot/Manhattan.R +6 -0
biopipen/scripts/plot/QQPlot.R +100 -16
biopipen/scripts/{regulation → regulatory}/MotifAffinityTest.R +3 -3
biopipen/scripts/{regulation → regulatory}/MotifScan.py +1 -1
biopipen/scripts/scrna/MarkersFinder.R +28 -18
biopipen/scripts/scrna/SeuratClustering.R +8 -0
biopipen/scripts/scrna/SeuratPreparing.R +252 -122
biopipen/scripts/snp/MatrixEQTL.R +2 -2
biopipen/scripts/snp/PlinkIBD.R +3 -0
biopipen/scripts/stats/Mediation.R +94 -0
{biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/METADATA +1 -1
{biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/RECORD +24 -23
{biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/entry_points.txt +1 -1
/biopipen/scripts/{regulation → regulatory}/MotifAffinityTest_AtSNP.R +0 -0
/biopipen/scripts/{regulation → regulatory}/MotifAffinityTest_MotifBreakR.R +0 -0
/biopipen/scripts/{regulation → regulatory}/atSNP.R +0 -0
/biopipen/scripts/{regulation → regulatory}/motifBreakR.R +0 -0
{biopipen-0.29.0.dist-info → biopipen-0.29.1.dist-info}/WHEEL +0 -0

biopipen/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.29.0"
1	+ __version__ = "0.29.1"

biopipen/ns/plot.py CHANGED Viewed

@@ -221,7 +221,7 @@ class Manhattan(Proc):
         "label_col": None,
         "devpars": {"res": 100, "width": 1000, "height": 500},
         "zoom_devpars": {"width": 500, "height": None, "res": None},
-        "title": "Manhattan Plot",
+        "title": None,
         "ylabel": "-log10(p-value)",
         "rescale": True,
         "rescale_ratio_threshold": 5,
@@ -245,6 +245,11 @@ class QQPlot(Proc):
         infile: The input file for data
             It should contain at least one column of p-values or the values to be
             plotted. Header is required.
+        theorfile: The file for theoretical values (optional)
+            This file should contain at least one column of theoretical values.
+            The values will be passed to `envs.theor_qfunc` to calculate the theoretical
+            quantiles.
+            Header is required.
     Output:
         outfile: The output figure file
@@ -266,33 +271,86 @@ class QQPlot(Proc):
         kind (choice): The kind of the plot, `qq` or `pp`
             - qq: QQ-plot
             - pp: PP-plot
-        band (ns): The arguments for `stat_qq_band()` or `stat_pp_band()`
+        theor_col: The column for theoretical values in `in.theorfile` if provided,
+            otherwise in `in.infile`.
+            An integer (1-based) or a string indicating the column name.
+            If `distribution` of `band`, `line`, or `point` is `custom`, this column
+            must be provided.
+        theor_trans: The transformation of the theoretical values.
+            The `theor_funs` have default functions to take the theoretical values.
+            This transformation will be applied to the theoretical values before
+            passing to the `theor_funs`.
+        theor_funs (ns): The R functions to generate density, quantile and deviates
+            of the theoretical distribution base on the theoretical values
+            if `distribution` of `band`, `line`, or `point` is `custom`.
+            - dcustom: The density function, used by band
+            - qcustom: The quantile function, used by point
+            - rcustom: The deviates function, used by line
+        args (ns): The common arguments for `envs.band`, `envs.line` and `envs.point`.
+            - distribution: The distribution of the theoretical quantiles
+                When `custom` is used, the `envs.theor_col` should be provided and
+                `values` will be added to `dparams` automatically.
+            - dparams (type=json): The parameters for the distribution
+            - <more>: Other shared arguments between `stat_*_band`, `stat_*_line`
+                and `stat_*_point`.
+        band (ns): The arguments for `stat_qq_band()` or `stat_pp_band()`.
             See <https://rdrr.io/cran/qqplotr/man/stat_qq_band.html> and
             <https://rdrr.io/cran/qqplotr/man/stat_pp_band.html>.
+            Set to `None` or `band.disabled` to True to disable the band.
+            - disabled (flag): Disable the band
+            - distribution: The distribution of the theoretical quantiles
+                When `custom` is used, the `envs.theor_col` should be provided and
+                `values` will be added to `dparams` automatically.
+            - dparams (type=json): The parameters for the distribution
             - <more>: Additional arguments for `stat_qq_band()` or `stat_pp_band()`
-        line (ns): The arguments for `stat_qq_line()` or `stat_pp_line()`
+        line (ns): The arguments for `stat_qq_line()` or `stat_pp_line()`.
             See <https://rdrr.io/cran/qqplot/man/stat_qq_line.html> and
             <https://rdrr.io/cran/qqplot/man/stat_pp_line.html>.
+            Set to `None` or `line.disabled` to True to disable the line.
+            - disabled (flag): Disable the line
+            - distribution: The distribution of the theoretical quantiles
+                When `custom` is used, the `envs.theor_col` should be provided and
+                `values` will be added to `dparams` automatically.
+            - dparams (type=json): The parameters for the distribution
             - <more>: Additional arguments for `stat_qq_line()` or `stat_pp_line()`
-        point (ns): The arguments for `geom_qq_point()` or `geom_pp_point()`
+        point (ns): The arguments for `geom_qq_point()` or `geom_pp_point()`.
             See <https://rdrr.io/cran/qqplot/man/stat_qq_point.html> and
             <https://rdrr.io/cran/qqplot/man/stat_pp_point.html>.
+            Set to `None` or `point.disabled` to True to disable the point.
+            - disabled (flag): Disable the point
+            - distribution: The distribution of the theoretical quantiles
+                When `custom` is used, the `envs.theor_col` should be provided and
+                `values` will be added to `dparams` automatically.
+            - dparams (type=json): The parameters for the distribution
+            - <more>: Additional arguments for `geom_qq_point()` or `geom_pp_point()`
         ggs (list): Additional ggplot expression to adjust the plot.
     """
-    input = "infile:file"
+    input = "infile:file, theorfile:file"
     output = "outfile:file:{{in.infile | stem}}.{{envs.kind}}.png"
     lang = config.lang.rscript
     envs = {
         "val_col": 1,
+        "theor_col": None,
+        "theor_trans": None,
+        "theor_funs": {
+            "dcustom": """
+              function(x, values, ...) {
+                density(values, from = min(values), to = max(values), n = length(x))$y
+              }
+            """,
+            "qcustom": "function(p, values, ...) {quantile(values, probs = p)}",
+            "rcustom": "function(n, values, ...) { sample(values, n, replace = TRUE) }",
+        },
+        "args": {"distribution": "norm", "dparams": {}},
         "devpars": {"res": 100, "width": 1000, "height": 1000},
         "xlabel": "Theoretical Quantiles",
         "ylabel": "Observed Quantiles",
         "title": "QQ-plot",
         "trans": None,
         "kind": "qq",
-        "band": {},
-        "line": {},
-        "point": {},
+        "band": {"disabled": False, "distribution": None, "dparams": None},
+        "line": {"disabled": False, "distribution": None, "dparams": None},
+        "point": {"disabled": False, "distribution": None, "dparams": None},
         "ggs": None,
     }
     script = "file://../scripts/plot/QQPlot.R"

biopipen/ns/{regulation.py → regulatory.py} RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Provides processes for the regulation related"""
+"""Provides processes for the regulatory related"""
 from ..core.proc import Proc
 from ..core.config import config
@@ -86,7 +86,7 @@ class MotifScan(Proc):
         "q_cutoff": False,
         "args": {},
     }
-    script = "file://../scripts/regulation/MotifScan.py"
+    script = "file://../scripts/regulatory/MotifScan.py"
 class MotifAffinityTest(Proc):
@@ -211,4 +211,4 @@ class MotifAffinityTest(Proc):
         "motifbreakr_args": {"method": "default"},
         "atsnp_args": {"padj_cutoff": True, "padj": "BH", "p": "pval_diff"},
     }
-    script = "file://../scripts/regulation/MotifAffinityTest.R"
+    script = "file://../scripts/regulatory/MotifAffinityTest.R"

biopipen/ns/scrna.py CHANGED Viewed

@@ -53,7 +53,7 @@ class SeuratPreparing(Proc):
     See also
     - <https://satijalab.org/seurat/articles/pbmc3k_tutorial.html#standard-pre-processing-workflow-1)>
-    - <https://nbisweden.github.io/workshop-scRNAseq/labs/compiled/seurat/seurat_01_qc.html#Create_one_merged_object>
+    - <https://satijalab.org/seurat/articles/integration_introduction>
     This process will read the scRNA-seq data, based on the information provided by
     `SampleInfo`, specifically, the paths specified by the `RNAData` column.
@@ -210,6 +210,19 @@ class SeuratPreparing(Proc):
             - PCs (type=int): Number of PCs to use for 'doubletFinder' function.
             - doublets (type=float): Number of expected doublets as a proportion of the pool size.
             - pN (type=float): Number of doublets to simulate as a proportion of the pool size.
+            - ncores (type=int): Number of cores to use for `DoubletFinder::paramSweep`.
+                Set to `None` to use `envs.ncores`.
+                Since parallelization of the function usually exhausts memory, if big `envs.ncores` does not work
+                for `DoubletFinder`, set this to a smaller number.
+        cache (type=auto): Whether to cache the information at different steps.
+            If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
+            The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
+            the input and envs of the process.
+            See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
+            <https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
+            To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
+            `<signature>.RDS` in the cache directory.
     Requires:
         r-seurat:
@@ -238,7 +251,8 @@ class SeuratPreparing(Proc):
             "min_cells": 5,
         },
         "IntegrateLayers": {"method": "harmony"},
-        "DoubletFinder": {"PCs": 0, "pN": 0.25, "doublets": 0.075},
+        "DoubletFinder": {"PCs": 0, "pN": 0.25, "doublets": 0.075, "ncores": 1},
+        "cache": config.path.tmpdir,
     }
     script = "file://../scripts/scrna/SeuratPreparing.R"
     plugin_opts = {

biopipen/ns/stats.py CHANGED Viewed

@@ -73,11 +73,103 @@ class ChowTest(Proc):
     script = "file://../scripts/stats/ChowTest.R"
+class Mediation(Proc):
+    """Mediation analysis.
+    The flowchart of mediation analysis:
+    ![Mediation Analysis](https://library.virginia.edu/sites/default/files/inline-images/mediation_flowchart-1.png)
+    Reference:
+        - <https://library.virginia.edu/data/articles/introduction-to-mediation-analysis>
+        - <https://en.wikipedia.org/wiki/Mediation_(statistics)>
+        - <https://tilburgsciencehub.com/topics/analyze/regression/linear-regression/mediation-analysis/>
+        - <https://ademos.people.uic.edu/Chapter14.html>
+    Input:
+        infile: The input data file. The rows are samples and the columns are
+            features. It must be tab-delimited.
+            ```
+            Sample   F1   F2   F3   ...   Fn
+            S1       1.2  3.4  5.6        7.8
+            S2       2.3  4.5  6.7        8.9
+            ...
+            Sm       5.6  7.8  9.0        1.2
+            ```
+        fmlfile: The formula file.
+            ```
+            Case   M   Y   X   Cov     Model_M    Model_Y
+            Case1  F1  F2  F3  F4,F5   glm        lm
+            ...
+            ```
+            Where Y is the outcome variable, X is the predictor variable, M is the
+            mediator variable, and Case is the case name. Model_M and Model_Y are the
+            models for M and Y, respectively.
+            `envs.cases` will be ignored if this is provided.
+    Output:
+        outfile: The output file.
+            Columns to help understand the results:
+            Total Effect: a total effect of X on Y (without M) (`Y ~ X`).
+            ADE: A Direct Effect of X on Y after taking into account a mediation effect of M (`Y ~ X + M`).
+            ACME: The Mediation Effect, the total effect minus the direct effect,
+            which equals to a product of a coefficient of X in the second step and a coefficient of M in the last step.
+            The goal of mediation analysis is to obtain this indirect effect and see if it's statistically significant.
+    Envs:
+        ncores (type=int): Number of cores to use for parallelization for cases.
+        sims (type=int): Number of Monte Carlo draws for nonparametric bootstrap or quasi-Bayesian approximation.
+            Will be passed to `mediation::mediate` function.
+        args (ns): Other arguments passed to `mediation::mediate` function.
+            - <more>: More arguments passed to `mediation::mediate` function.
+                See: <https://rdrr.io/cran/mediation/man/mediate.html>
+        padj (choice): The method for (ACME) p-value adjustment.
+            - none: No p-value adjustment (no Padj column in outfile).
+            - holm: Holm-Bonferroni method.
+            - hochberg: Hochberg method.
+            - hommel: Hommel method.
+            - bonferroni: Bonferroni method.
+            - BH: Benjamini-Hochberg method.
+            - BY: Benjamini-Yekutieli method.
+            - fdr: FDR correction method.
+        cases (type=json): The cases for mediation analysis.
+            Ignored if `in.fmlfile` is provided.
+            A json/dict with case names as keys and values as a dict of M, Y, X, Cov, Model_M, Model_Y.
+            For example:
+            ```json
+            {
+                "Case1": {
+                    "M": "F1",
+                    "Y": "F2",
+                    "X": "F3",
+                    "Cov": "F4,F5",
+                    "Model_M": "glm",
+                    "Model_Y": "lm"
+                },
+                ...
+            }
+            ```
+        transpose_input (flag): Whether to transpose the input file.
+    """  # noqa: E501
+    input = "infile:file, fmlfile:file"
+    output = "outfile:file:{{in.infile | stem}}.mediation.txt"
+    lang = config.lang.rscript
+    envs = {
+        "ncores": config.misc.ncores,
+        "sims": 1000,
+        "args": {},
+        "padj": "none",
+        "cases": {},
+        "transpose_input": False,
+    }
+    script = "file://../scripts/stats/Mediation.R"
 class LiquidAssoc(Proc):
     """Liquid association tests.
     See Also https://github.com/gundt/fastLiquidAssociation
-    Requieres https://github.com/pwwang/fastLiquidAssociation
+    Requires https://github.com/pwwang/fastLiquidAssociation
     Input:
         infile: The input data file. The rows are samples and the columns are

biopipen/scripts/delim/SampleInfo.R CHANGED Viewed

@@ -88,7 +88,11 @@ for (name in names(stats)) {
     group <- if (is.null(stat$group)) sym("..group") else sym(stat$group)
     count_on <- paste0("..count.", stat$on)
     if (!is_continuous) {
-        data <- data %>% add_count(!!group, name = count_on)
+        if (!is.null(stat$each)) {
+            data <- data %>% add_count(!!group, !!sym(stat$each), name = count_on)
+        } else {
+            data <- data %>% add_count(!!group, name = count_on)
+        }
     }
     if (is.null(stat$devpars)) {
@@ -141,18 +145,19 @@ for (name in names(stats)) {
         } else {
             data <- data %>%
                 distinct(!!group, !!sym(stat$each), .keep_all = TRUE) %>%
+                mutate(!!group := factor(!!group, levels = unique(!!group))) %>%
                 group_by(!!sym(stat$each))
         }
         p <- ggplot(
-            data %>% arrange(!!group),
-            aes(x = "", y = !!sym(count_on), fill = !!group, label = !!sym(count_on))
+            data %>% mutate(.size = sum(!!sym(count_on))),
+            aes(x = sqrt(.size) / 2, width = sqrt(.size), y = !!sym(count_on), fill = !!group, label = !!sym(count_on))
         ) +
-            geom_bar(stat="identity", width=1, color="white", position = position_stack(reverse = TRUE)) +
+            geom_bar(stat="identity", color="white", position = position_fill(reverse = TRUE)) +
             coord_polar("y", start = 0) +
             theme_void() +
             theme(plot.title = element_text(hjust = 0.5)) +
             geom_label_repel(
-                position = position_stack(vjust = 0.5),
+                position = position_fill(reverse = TRUE,vjust = .5),
                 color="#333333",
                 fill="#EEEEEE",
                 size=4

biopipen/scripts/plot/Manhattan.R CHANGED Viewed

@@ -105,6 +105,7 @@ args$signif <- signif
 args$plot.title <- title
 args$rescale <- rescale
 args$rescale.ratio.threshold <- rescale_ratio_threshold
+args$y.label <- ylabel
 if (!is.null(hicolors)) { args$color.by.highlight <- TRUE }
 if (!is.null(label_col)) { args$label.colname <- ".label" }
 g <- do_call(manhattan_plot, args)
@@ -114,10 +115,15 @@ print(g)
 dev.off()
 # zoom into chromosomes
+all_chroms <- as.character(unique(mpdata$data[[mpdata$chr.colname]]))
 if (!is.null(zoom)) {
     log_info("Zooming into chromosomes ...")
     zoom <- norm_chroms(zoom)
     for (z in zoom) {
+        if (!z %in% all_chroms) {
+            log_warn("- {z}: not found in data")
+            next
+        }
         log_info("- {z}")
         args_z <- args
         args_z$chromosome <- z

biopipen/scripts/plot/QQPlot.R CHANGED Viewed

@@ -1,5 +1,7 @@
 source("{{biopipen_dir}}/utils/misc.R")
+library(rlang)
+library(stats)
 library(ggplot2)
 library(ggprism)
 library(qqplotr)
@@ -7,50 +9,132 @@ library(qqplotr)
 theme_set(theme_prism())
 infile <- {{in.infile | r}}
+theorfile <- {{in.theorfile | r}}
 outfile <- {{out.outfile | r}}
 val_col <- {{envs.val_col | r}}
+theor_col <- {{envs.theor_col | r}}
+theor_trans <- {{envs.theor_trans | r}}
+theor_funs <- {{envs.theor_funs | r}}
 devpars <- {{envs.devpars | r}}
 title <- {{envs.title | r}}
 xlabel <- {{envs.xlabel | r}}
 ylabel <- {{envs.ylabel | r}}
 kind <- {{envs.kind | r}}
 trans <- {{envs.trans | r}}
+args <- {{envs.args | r}}
 band_args <- {{envs.band | r}}
 line_args <- {{envs.line | r}}
 point_args <- {{envs.point | r}}
 ggs <- {{envs.ggs | r}}
+.eval_fun <- function(fun) {
+    if (is.character(fun)) {
+        fun <- trimws(fun)
+        if (grepl("^-\\s*[a-zA-Z\\.][0-9a-zA-Z\\._]*$", fun)) {
+            fun <- trimws(substring(fun, 2))
+            fun <- eval(parse(text = fun))
+            return(function(x) -fun(x))
+        } else {
+            return(eval(parse(text = fun)))
+        }
+    } else {
+        return(fun)
+    }
+}
 indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
-if (is.numeric(val_col)) { val_col <- colnames(indata)[val_col] }
+if (is.numeric(val_col)) {
+    val_col <- colnames(indata)[val_col]
+}
+if (!is.null(trans)) {
+    trans <- .eval_fun(trans)
+    indata[[val_col]] <- trans(indata[[val_col]])
+}
+if (!is.null(theor_col)) {
+    if (is.numeric(theor_col)) {
+        theor_col <- colnames(theor)[theor_col]
+    }
+    if (!is.null(theorfile)) {
+        theor <- read.table(theorfile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
+        theor_vals <- theor[[theor_col]]
+    } else {
+        theor_vals <- indata[[theor_col]]
+    }
+    if (!is.null(theor_trans)) {
+        theor_trans <- .eval_fun(theor_trans)
+        theor_vals <- theor_trans(theor_vals)
+    }
+    theor_vals <- sort(na.omit(theor_vals))
+}
 band_fun <- ifelse(kind == "pp", stat_pp_band, stat_qq_band)
 line_fun <- ifelse(kind == "pp", stat_pp_line, stat_qq_line)
 point_fun <- ifelse(kind == "pp", stat_pp_point, stat_qq_point)
-title <- title %||% waiver()
-xlabel <- xlabel %||% waiver()
-ylabel <- ylabel %||% waiver()
+for (fun in names(theor_funs)) {
+    assign(fun, .eval_fun(theor_funs[[fun]]))
+}
-if (!is.null(trans)) {
-    trans <- trimws(trans)
-    if (trans == "-log10") {
-        trans <- function(x) -log10(x)
+if (!is.null(band_args) || isFALSE(band_args)) {
+    if (isTRUE(band_args$disabled)) {
+        band_args <- NULL
     } else {
-        trans <- eval(parse(text = trans))
+        band_args$disabled <- NULL
+        band_args <- list_update(band_args, args)
+        if (band_args$distribution == "custom") {
+            band_args$dparams <- band_args$dparams %||% list()
+            band_args$dparams$values <- theor_vals
+        }
+    }
+}
+if (!is.null(line_args) || isFALSE(line_args)) {
+    if (isTRUE(line_args$disabled)) {
+        line_args <- NULL
+    } else {
+        line_args$disabled <- NULL
+        line_args <- list_update(line_args, args)
+        if (line_args$distribution == "custom") {
+            line_args$dparams <- line_args$dparams %||% list()
+            line_args$dparams$values <- theor_vals
+        }
+    }
+}
+if (!is.null(point_args) || isFALSE(point_args)) {
+    if (isTRUE(point_args$disabled)) {
+        point_args <- NULL
+    } else {
+        point_args$disabled <- NULL
+        point_args <- list_update(point_args, args)
+        if (point_args$distribution == "custom") {
+            point_args$dparams <- point_args$dparams %||% list()
+            point_args$dparams$values <- theor_vals
+        }
     }
-    indata$.trans_val <- trans(indata[[val_col]])
-    val_col <- ".trans_val"
 }
-indata <- indata[!is.na(indata[[val_col]]), , drop=FALSE]
+title <- title %||% waiver()
+xlabel <- xlabel %||% waiver()
+ylabel <- ylabel %||% waiver()
+indata <- indata[complete.cases(indata), , drop = FALSE]
+indata <- indata[order(indata[[val_col]]), , drop = FALSE]
 p <- ggplot(data = indata, mapping = aes(sample = !!sym(val_col))) +
-    do_call(band_fun, band_args) +
-    do_call(line_fun, line_args) +
-    do_call(point_fun, point_args) +
     labs(title = title, x = xlabel, y = ylabel)
+if (!is.null(band_args)) {
+    p <- p + do_call(band_fun, band_args)
+}
+if (!is.null(line_args)) {
+    p <- p + do_call(line_fun, line_args)
+}
+if (!is.null(point_args)) {
+    p <- p + do_call(point_fun, point_args)
+}
 if (!is.null(ggs)) {
     for (gg in ggs) {
         p <- p + eval(parse(text = gg))

biopipen/scripts/{regulation → regulatory}/MotifAffinityTest.R RENAMED Viewed

@@ -1,4 +1,4 @@
-# Script for regulation.MotifAffinityTest
+# Script for regulatory.MotifAffinityTest
 source("{{biopipen_dir}}/utils/misc.R")
 library(BiocParallel)
@@ -215,12 +215,12 @@ tool <- match.arg(tool, c("motifbreakr", "atsnp"))
 if (tool == "motifbreakr") {
     motifbreakr_args <- {{envs.motifbreakr_args | r}}
-    {% set sourcefile = biopipen_dir | joinpaths: "scripts", "regulation", "MotifAffinityTest_MotifBreakR.R" %}
+    {% set sourcefile = biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_MotifBreakR.R" %}
     # {{ sourcefile | getmtime }}
     source("{{sourcefile}}")
 } else {  # atsnp
     atsnp_args <- {{envs.atsnp_args | r}}
-    {% set sourcefile = biopipen_dir | joinpaths: "scripts", "regulation", "MotifAffinityTest_AtSNP.R" %}
+    {% set sourcefile = biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_AtSNP.R" %}
     # {{ sourcefile | getmtime }}
     source("{{sourcefile}}")
 }

biopipen/scripts/{regulation → regulatory}/MotifScan.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Script for regulation.MotifScan"""
+"""Script for regulatory.MotifScan"""
 import re
 # Paths may be passed in args or to motifdb

biopipen/scripts/scrna/MarkersFinder.R CHANGED Viewed

@@ -65,6 +65,19 @@ if (ncores > 1) {
 log_info("- Reading Seurat object ...")
 srtobj <- readRDS(srtfile)
 defassay <- DefaultAssay(srtobj)
+if (defassay == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@commands)) {
+    log_warn("  SCTransform used but PrepSCTFindMarkers not applied, running ...")
+    srtobj <- PrepSCTFindMarkers(srtobj)
+    # compose a new SeuratCommand to record it to srtobj@commands
+    scommand <- srtobj@commands$FindClusters
+    scommand@name <- "PrepSCTFindMarkers"
+    scommand@time.stamp <- Sys.time()
+    scommand@assay.used <- "SCT"
+    scommand@call.string <- "PrepSCTFindMarkers(object = srtobj)"
+    scommand@params <- list()
+    srtobj@commands$PrepSCTFindMarkers <- scommand
+}
 if (!is.null(mutaters) && length(mutaters) > 0) {
     log_info("- Mutating meta data ...")
@@ -472,33 +485,30 @@ find_markers <- function(findmarkers_args, find_all = FALSE) {
             p_val_adj = numeric()
         )
     }
+    call_findmarkers <- function(fn, args) {
+        if (find_all) {
+            do_call(fn, args)
+        } else {
+            do_call(fn, args) %>% rownames_to_column("gene")
+        }
+    }
     markers <- tryCatch({
-        do_call(fun, findmarkers_args) %>% rownames_to_column("gene")
+        call_findmarkers(fun, findmarkers_args)
     }, error = function(e) {
-        # Object contains multiple models with unequal library sizes.
-        # Run `PrepSCTFindMarkers()` before running `FindMarkers()`.
-        if (grepl("PrepSCTFindMarkers", e$message)) {
-            log_warn("  Running PrepSCTFindMarkers ...")
-            findmarkers_args$object <<- PrepSCTFindMarkers(findmarkers_args$object)
-            tryCatch({
-                do_call(fun, findmarkers_args) %>% rownames_to_column("gene")
-            }, error = function(err) {
-                log_warn(paste0("  ", err$message))
-                empty
-            })
-        } else {
-            log_warn(paste0("  ", e$message))
-            empty
+        if (!grepl("PrepSCTFindMarkers", e$message) && defassay == "SCT") {
+            log_warn(paste0("  ! ", e$message))
         }
+        empty
     })
     if (nrow(markers) == 0 && defassay == "SCT") {
-        log_warn("  No markers found from SCT assay, trying recorrect_umi = FALSE")
+        log_warn("  ! No markers found from SCT assay, trying recorrect_umi = FALSE")
         findmarkers_args$recorrect_umi <- FALSE
         markers <- tryCatch({
-            do_call(fun, findmarkers_args) %>% rownames_to_column("gene")
+            call_findmarkers(fun, findmarkers_args)
         }, error = function(e) {
-            log_warn(paste0("  ", e$message))
+            log_warn(paste0("  ! ", e$message))
             empty
         })
     }

biopipen/scripts/scrna/SeuratClustering.R CHANGED Viewed

@@ -202,6 +202,14 @@ if (DefaultAssay(sobj) == "SCT") {
         # https://github.com/satijalab/seurat/issues/6968
     log_info("Running PrepSCTFindMarkers ...")
     sobj <- PrepSCTFindMarkers(sobj)
+    # compose a new SeuratCommand to record it to sobj@commands
+    scommand <- sobj@commands$FindClusters
+    scommand@name <- "PrepSCTFindMarkers"
+    scommand@time.stamp <- Sys.time()
+    scommand@assay.used <- "SCT"
+    scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
+    scommand@params <- list()
+    sobj@commands$PrepSCTFindMarkers <- scommand
 }
 log_info("Saving results ...")

biopipen 0.29.0__py3-none-any.whl → 0.29.1__py3-none-any.whl

Potentially problematic release.

biopipen 0.29.0py3-none-any.whl → 0.29.1py3-none-any.whl