PyPI - biopipen - Versions diffs - 0.27.3__py3-none-any.whl → 0.27.5__py3-none-any.whl - Mend

biopipen 0.27.3py3-none-any.whl → 0.27.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (20) hide show

biopipen/__init__.py +1 -1
biopipen/core/testing.py +3 -2
biopipen/ns/delim.py +1 -1
biopipen/ns/plot.py +36 -0
biopipen/ns/scrna.py +18 -11
biopipen/ns/scrna_metabolic_landscape.py +3 -3
biopipen/ns/snp.py +65 -0
biopipen/ns/tcr.py +6 -6
biopipen/scripts/delim/SampleInfo.R +6 -6
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/scrna/SeuratClusterStats-features.R +1 -1
biopipen/scripts/scrna/SeuratPreparing.R +163 -112
biopipen/scripts/scrna/SeuratTo10X.R +25 -82
biopipen/scripts/scrna/Subset10X.R +84 -0
biopipen/scripts/snp/MatrixEQTL.R +157 -0
{biopipen-0.27.3.dist-info → biopipen-0.27.5.dist-info}/METADATA +1 -2
{biopipen-0.27.3.dist-info → biopipen-0.27.5.dist-info}/RECORD +19 -17
biopipen/scripts/scrna/Write10X.R +0 -11
{biopipen-0.27.3.dist-info → biopipen-0.27.5.dist-info}/WHEEL +0 -0
{biopipen-0.27.3.dist-info → biopipen-0.27.5.dist-info}/entry_points.txt +0 -0

biopipen/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.27.3"
1	+ __version__ = "0.27.5"

biopipen/core/testing.py CHANGED Viewed

@@ -51,15 +51,16 @@ class PipelineSucceeded:
         pipen._succeeded = succeeded
-def get_pipeline(testfile, loglevel="debug", **kwargs):
+def get_pipeline(testfile, loglevel="debug", enable_report=False, **kwargs):
     """Get a pipeline for a test file"""
     name, workdir, outdir = _get_test_dirs(testfile, False)
+    report_plugin_prefix = "+" if enable_report else "-"
     kws = {
         "name": name,
         "workdir": workdir,
         "outdir": outdir,
         "loglevel": loglevel,
-        "plugins": [PipelineSucceeded, "-report"],
+        "plugins": [PipelineSucceeded, f"{report_plugin_prefix}report"],
     }
     kws.update(kwargs)
     return Pipen(**kws)

biopipen/ns/delim.py CHANGED Viewed

@@ -113,7 +113,7 @@ class SampleInfo(Proc):
         "exclude_cols": None,
         "defaults": {
             "on": "Sample",
-            "distinct": None,
+            # "distinct": None,
             "group": None,
             "na_group": False,
             "each": None,

biopipen/ns/plot.py CHANGED Viewed

@@ -114,3 +114,39 @@ class Heatmap(Proc):
         "globals": "",
     }
     script = "file://../scripts/plot/Heatmap.R"
+class ROC(Proc):
+    """Plot ROC curve using [`plotROC`](https://cran.r-project.org/web/packages/plotROC/vignettes/examples.html).
+    Input:
+        infile: The input file for data, tab-separated.
+            The first column should be ids of the records (this is optional if `envs.noids` is True).
+            The second column should be the labels of the records (1 for positive, 0 for negative).
+            If they are not binary, you can specify the positive label by `envs.pos_label`.
+            From the third column, it should be the scores of the different models.
+    Output:
+        outfile: The output figure file
+    Envs:
+        noids: Whether the input file has ids (first column) or not.
+        pos_label: The positive label.
+        ci: Whether to use `geom_rocci()` instead of `geom_roc()`.
+        devpars: The parameters for `png()`
+        args: Additional arguments for `geom_roc()` or `geom_rocci()` if `envs.ci` is True.
+        style_roc: Arguments for `style_roc()`
+    """  # noqa: E501
+    input = "infile:file"
+    output = "outfile:file:{{in.infile | stem}}.roc.png"
+    lang = config.lang.rscript
+    envs = {
+        "noids": False,
+        "pos_label": 1,
+        "ci": False,
+        "devpars": {"res": 100, "width": 750, "height": 600},
+        "args": {"labels": False},
+        "style_roc": {},
+        "show_auc": True,
+    }
+    script = "file://../scripts/plot/ROC.R"

biopipen/ns/scrna.py CHANGED Viewed

@@ -122,6 +122,9 @@ class SeuratPreparing(Proc):
             genes.
             ///
+        cell_qc_per_sample (flag): Whether to perform cell QC per sample or not.
+            If `True`, the cell QC will be performed per sample, and the QC will be
+            applied to each sample before merging.
         gene_qc (ns): Filter genes.
             `gene_qc` is applied after `cell_qc`.
             - min_cells: The minimum number of cells that a gene must be
@@ -222,6 +225,7 @@ class SeuratPreparing(Proc):
     envs = {
         "ncores": config.misc.ncores,
         "cell_qc": None,  # "nFeature_RNA > 200 & percent.mt < 5",
+        "cell_qc_per_sample": False,
         "gene_qc": {"min_cells": 0, "excludes": []},
         "use_sct": False,
         "no_integration": False,
@@ -413,7 +417,7 @@ class SeuratClusterStats(Proc):
         nCells_All = { }
         ```
-        ![nCells_All](https://pwwang.github.io/immunopipe/processes/images/SeuratClusterStats_nCells_All.png){: width="80%" }
+        ![nCells_All](https://pwwang.github.io/immunopipe/latest/processes/images/SeuratClusterStats_nCells_All.png){: width="80%" }
         ### Number of cells in each cluster by groups
@@ -422,7 +426,7 @@ class SeuratClusterStats(Proc):
         nCells_Sample = { group-by = "Sample" }
         ```
-        ![nCells_Sample](https://pwwang.github.io/immunopipe/processes/images/SeuratClusterStats_nCells_Sample.png){: width="80%" }
+        ![nCells_Sample](https://pwwang.github.io/immunopipe/latest/processes/images/SeuratClusterStats_nCells_Sample.png){: width="80%" }
         ### Violin plots for the gene expressions
@@ -435,8 +439,8 @@ class SeuratClusterStats(Proc):
         vlnplots_1 = { features = ["FOXP3", "IL2RA"], pt-size = 0, kind = "vln" }
         ```
-        ![vlnplots](https://pwwang.github.io/immunopipe/processes/images/SeuratClusterStats_vlnplots.png){: width="80%" }
-        ![vlnplots_1](https://pwwang.github.io/immunopipe/processes/images/SeuratClusterStats_vlnplots_1.png){: width="80%" }
+        ![vlnplots](https://pwwang.github.io/immunopipe/latest/processes/images/SeuratClusterStats_vlnplots.png){: width="80%" }
+        ![vlnplots_1](https://pwwang.github.io/immunopipe/latest/processes/images/SeuratClusterStats_vlnplots_1.png){: width="80%" }
         ### Dimension reduction plot with labels
@@ -447,7 +451,7 @@ class SeuratClusterStats(Proc):
         repel = true
         ```
-        ![dimplots](https://pwwang.github.io/immunopipe/processes/images/SeuratClusterStats_dimplots.png){: width="80%" }
+        ![dimplots](https://pwwang.github.io/immunopipe/latest/processes/images/SeuratClusterStats_dimplots.png){: width="80%" }
     Input:
         srtobj: The seurat object loaded by `SeuratClustering`
@@ -857,7 +861,7 @@ class CellsDistribution(Proc):
         group_order = [ "Tumor", "Normal" ]
         ```
-        ![CellsDistribution_example](https://pwwang.github.io/immunopipe/processes/images/CellsDistribution_example.png)
+        ![CellsDistribution_example](https://pwwang.github.io/immunopipe/latest/processes/images/CellsDistribution_example.png)
     Input:
         srtobj: The seurat object in RDS format
@@ -1483,14 +1487,17 @@ class SeuratTo10X(Proc):
         srtobj: The seurat object in RDS
     Output:
-        outdir: The output directory
+        outdir: The output directory.
+            When `envs.split_by` is specified, the subdirectories will be
+            created for each distinct value of the column.
+            Otherwise, the matrices will be written to the output directory.
     Envs:
         version: The version of 10X format
     """
     input = "srtobj:file"
     output = "outdir:dir:{{in.srtobj | stem}}"
-    envs = {"version": "3"}
+    envs = {"version": "3", "split_by": None}
     lang = config.lang.rscript
     script = "file://../scripts/scrna/SeuratTo10X.R"
@@ -1870,7 +1877,7 @@ class RadarPlots(Proc):
         Then we will have a radar plots like this:
-        ![Radar plots](https://pwwang.github.io/immunopipe/processes/images/RadarPlots-default.png)
+        ![Radar plots](https://pwwang.github.io/immunopipe/latest/processes/images/RadarPlots-default.png)
         We can use `each` to separate the cells into different cases:
@@ -1882,7 +1889,7 @@ class RadarPlots(Proc):
         Then we will have two radar plots, one for `Pre` and one for `Post`:
-        ![Radar plots](https://pwwang.github.io/immunopipe/processes/images/RadarPlots-each.png)
+        ![Radar plots](https://pwwang.github.io/immunopipe/latest/processes/images/RadarPlots-each.png)
         Using `cluster_order` to change the order of the clusters and show only the first 3 clusters:
@@ -1893,7 +1900,7 @@ class RadarPlots(Proc):
         breaks = [0, 50, 100]  # also change the breaks
         ```
-        ![Radar plots cluster_order](https://pwwang.github.io/immunopipe/processes/images/RadarPlots-cluster_order.png)
+        ![Radar plots cluster_order](https://pwwang.github.io/immunopipe/latest/processes/images/RadarPlots-cluster_order.png)
         /// Attention

biopipen/ns/scrna_metabolic_landscape.py CHANGED Viewed

@@ -22,11 +22,11 @@ class MetabolicPathwayActivity(Proc):
     For each subset, a heatmap and a violin plot will be generated.
     The heatmap shows the pathway activities for each group and each metabolic pathway
-    ![MetabolicPathwayActivity_heatmap](https://pwwang.github.io/immunopipe/processes/images/MetabolicPathwayActivity_heatmap.png){: width="80%"}
+    ![MetabolicPathwayActivity_heatmap](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayActivity_heatmap.png){: width="80%"}
     The violin plot shows the distribution of the pathway activities for each group
-    ![MetabolicPathwayActivity_violin](https://pwwang.github.io/immunopipe/processes/images/MetabolicPathwayActivity_violin.png){: width="45%"}
+    ![MetabolicPathwayActivity_violin](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayActivity_violin.png){: width="45%"}
     Envs:
         ntimes (type=int): Number of times to do the permutation
@@ -294,7 +294,7 @@ class MetabolicPathwayHeterogeneity(Proc):
     The heterogeneity can be reflected by the NES values and the p-values in
     different groups for the metabolic pathways.
-    ![MetabolicPathwayHeterogeneity](https://pwwang.github.io/immunopipe/processes/images/MetabolicPathwayHeterogeneity.png)
+    ![MetabolicPathwayHeterogeneity](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayHeterogeneity.png)
     Envs:

biopipen/ns/snp.py CHANGED Viewed

@@ -71,3 +71,68 @@ class PlinkSimulation(Proc):
         "sample_prefix": None,
     }
     script = "file://../scripts/snp/PlinkSimulation.py"
+class MatrixEQTL(Proc):
+    """Run Matrix eQTL
+    See also <https://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/>
+    Input:
+        geno: Genotype matrix file with rows representing SNPs and columns
+            representing samples.
+        expr: Expression matrix file with rows representing genes and columns
+            representing samples.
+        cov: Covariate matrix file with rows representing covariates and columns
+            representing samples.
+    Output:
+        alleqtls: Matrix eQTL output file
+        cisqtls: The cis-eQTL file if `snppos` and `genepos` are provided.
+            Otherwise it'll be empty.
+    Envs:
+        model (choice): The model to use.
+            - `linear`: Linear model
+            - `modelLINEAR`: Same as `linear`
+            - `anova`: ANOVA model
+            - `modelANOVA`: Same as `anova`
+        pval (type=float): P-value threshold for eQTLs
+        transp (type=float): P-value threshold for trans-eQTLs.
+            If cis-eQTLs are not enabled (`snppos` and `genepos` are not set),
+            this defaults to 1e-5.
+            If cis-eQTLs are enabled, this defaults to `None`, which will disable
+            trans-eQTL analysis.
+        fdr (flag): Do FDR calculation or not (save memory if not).
+        snppos: The path of the SNP position file.
+            It could be a BED, GFF, VCF or a tab-delimited file with
+            `snp`, `chr`, `pos` as the first 3 columns.
+        genepos: The path of the gene position file.
+            It could be a BED or GFF file.
+        dist (type=int): Distance threshold for cis-eQTLs.
+        transpose_geno (flag): If set, the genotype matrix (`in.geno`)
+            will be transposed.
+        transpose_expr (flag): If set, the expression matrix (`in.expr`)
+            will be transposed.
+        transpose_cov (flag): If set, the covariate matrix (`in.cov`)
+            will be transposed.
+    """
+    input = "geno:file, expr:file, cov:file"
+    output = [
+        "alleqtls:file:{{in.geno | stem}}.alleqtls.txt",
+        "cisqtls:file:{{in.geno | stem}}.cisqtls.txt",
+    ]
+    lang = config.lang.rscript
+    envs = {
+        "model": "linear",
+        "pval": 1e-3,
+        "transp": None,
+        "fdr": False,
+        "snppos": None,
+        "genepos": config.ref.refgene,
+        "dist": 250000,
+        "transpose_geno": False,
+        "transpose_expr": False,
+        "transpose_cov": False,
+    }
+    script = "file://../scripts/snp/MatrixEQTL.R"

biopipen/ns/tcr.py CHANGED Viewed

@@ -923,7 +923,7 @@ class CloneResidency(Proc):
     - Residency plots showing the residency of clones in the two groups
-        ![CloneResidency_residency](https://pwwang.github.io/immunopipe/processes/images/CloneResidency.png)
+        ![CloneResidency_residency](https://pwwang.github.io/immunopipe/latest/processes/images/CloneResidency.png)
         The points in the plot are jittered to avoid overplotting. The x-axis is the residency in the first group and
         the y-axis is the residency in the second group. The size of the points are relative to the normalized size of
@@ -943,7 +943,7 @@ class CloneResidency(Proc):
     - Venn diagrams showing the overlap of the clones in the two groups
-        ![CloneResidency_venn](https://pwwang.github.io/immunopipe/processes/images/CloneResidency_venn.png){: width="60%"}
+        ![CloneResidency_venn](https://pwwang.github.io/immunopipe/latest/processes/images/CloneResidency_venn.png){: width="60%"}
     Input:
         immdata: The data loaded by `immunarch::repLoad()`
@@ -1259,7 +1259,7 @@ class TCRClusterStats(Proc):
         by = "Sample"
         ```
-        ![Cluster_size](https://pwwang.github.io/immunopipe/processes/images/TCRClusteringStats_cluster_size.png){: width="80%"}
+        ![Cluster_size](https://pwwang.github.io/immunopipe/latest/processes/images/TCRClusteringStats_cluster_size.png){: width="80%"}
         ### Shared clusters
@@ -1269,7 +1269,7 @@ class TCRClusterStats(Proc):
         heatmap_meta = ["region"]
         ```
-        ![Shared_clusters](https://pwwang.github.io/immunopipe/processes/images/TCRClusteringStats_shared_clusters.png){: width="80%"}
+        ![Shared_clusters](https://pwwang.github.io/immunopipe/latest/processes/images/TCRClusteringStats_shared_clusters.png){: width="80%"}
         ### Sample diversity
@@ -1278,11 +1278,11 @@ class TCRClusterStats(Proc):
         method = "gini"
         ```
-        ![Sample_diversity](https://pwwang.github.io/immunopipe/processes/images/TCRClusteringStats_sample_diversity.png){: width="80%"}
+        ![Sample_diversity](https://pwwang.github.io/immunopipe/latest/processes/images/TCRClusteringStats_sample_diversity.png){: width="80%"}
         Compared to the sample diversity using TCR clones:
-        ![Sample_diversity](https://pwwang.github.io/immunopipe/processes/images/Immunarch_sample_diversity.png){: width="80%"}
+        ![Sample_diversity](https://pwwang.github.io/immunopipe/latest/processes/images/Immunarch_sample_diversity.png){: width="80%"}
     Input:
         immfile: The immunarch object with TCR clusters attached

biopipen/scripts/delim/SampleInfo.R CHANGED Viewed

@@ -113,14 +113,14 @@ for (name in names(stats)) {
     if (stat$plot == "boxplot" || stat$plot == "box") {
         p <- ggplot(data, aes(x=!!group, y=!!sym(stat$on), fill=!!group)) +
             geom_boxplot(position = "dodge") +
-            scale_fill_biopipen() +
+            scale_fill_biopipen(alpha = .6) +
             xlab("")
     } else if (stat$plot == "violin" ||
                stat$plot == "violinplot" ||
                stat$plot == "vlnplot") {
         p <- ggplot(data, aes(x = !!group, y = !!sym(stat$on), fill=!!group)) +
             geom_violin(position = "dodge") +
-            scale_fill_biopipen() +
+            scale_fill_biopipen(alpha = .6) +
             xlab("")
     } else if (
         (grepl("violin", stat$plot) || grepl("vln", stat$plot)) &&
@@ -129,12 +129,12 @@ for (name in names(stats)) {
         p <- ggplot(data, aes(x = !!group, y = !!sym(stat$on), fill = !!group)) +
             geom_violin(position = "dodge") +
             geom_boxplot(width = 0.1, position = position_dodge(0.9), fill="white") +
-            scale_fill_biopipen() +
+            scale_fill_biopipen(alpha = .6) +
             xlab("")
     } else if (stat$plot == "histogram" || stat$plot == "hist") {
         p <- ggplot(data, aes(x = !!sym(stat$on), fill = !!group)) +
             geom_histogram(bins = 10, position = "dodge", alpha = 0.8, color = "white") +
-            scale_fill_biopipen()
+            scale_fill_biopipen(alpha = .6)
     } else if (stat$plot == "pie" || stat$plot == "piechart") {
         if (is.null(stat$each)) {
             data <- data %>% distinct(!!group, .keep_all = TRUE)
@@ -157,7 +157,7 @@ for (name in names(stats)) {
                 fill="#EEEEEE",
                 size=4
             ) +
-            scale_fill_biopipen(name = group) +
+            scale_fill_biopipen(alpha = .6, name = group) +
             ggtitle(paste0("# ", stat$on))
     } else if (stat$plot == "bar" || stat$plot == "barplot") {
         if (is.null(stat$each)) {
@@ -169,7 +169,7 @@ for (name in names(stats)) {
             data,
             aes(x = !!group, y = !!sym(count_on), fill = !!group)) +
             geom_bar(stat = "identity") +
-            scale_fill_biopipen() +
+            scale_fill_biopipen(alpha = .6) +
             ylab(paste0("# ", stat$on))
     } else {
         stop("Unknown plot type: ", stat$plot)

biopipen/scripts/plot/ROC.R ADDED Viewed

@@ -0,0 +1,88 @@
+source("{{biopipen_dir}}/utils/misc.R")
+library(rlang)
+library(ggplot2)
+library(plotROC)
+infile <- {{in.infile | r}}
+outfile <- {{out.outfile | r}}
+joboutdir <- {{job.outdir | r}}
+noids <- {{envs.noids | r}}
+pos_label <- {{envs.pos_label | r}}
+ci <- {{envs.ci | r}}
+devpars <- {{envs.devpars | r}}
+show_auc <- {{envs.show_auc | r}}
+args <- {{envs.args | r: todot="-"}}
+style_roc_args <- {{envs.style_roc | r: todot="-"}}
+if (!is.null(style_roc_args$theme)) {
+    style_roc_args$theme <- eval(parse(text=style_roc_args$theme))
+}
+data <- read.table(infile, header=TRUE, sep="\t", row.names = NULL, check.names = FALSE, stringsAsFactors=FALSE)
+if (!noids) {
+    data <- data[, -1]
+}
+# Normalize the first column (labels) into 0 and 1.
+# If they are not 0/1, use pos_label to determine the positive class.
+label_col <- colnames(data)[1]
+if (is.character(data[[label_col]])) {
+    data[[label_col]] <- as.numeric(data[[label_col]] == pos_label)
+}
+models <- colnames(data)[2:ncol(data)]
+if (length(models) > 1) {
+    # pivot longer the models, and put the model names into the column 'model'
+    data <- melt_roc(data, label_col, colnames(data)[2:ncol(data)])
+} else {
+    data <- data.frame(
+        D = data[[label_col]],
+        M = data[[models]],
+        name = rep(models, nrow(data))
+    )
+}
+# Plot the ROC curve
+p <- ggplot(data, aes(d = D, m = M, color = name))
+if (isTRUE(ci)) {
+    p <- p + do.call(geom_rocci, args)
+} else {
+    p <- p + do.call(geom_roc, args)
+}
+p <- p + do.call(style_roc, style_roc_args)
+p <- p + scale_color_biopipen()
+if (length(models) > 1) {
+    p <- p + theme(legend.title = element_blank())
+} else {
+    p <- p + theme(legend.position = "none")
+}
+aucs = calc_auc(p)
+write.table(aucs, file=file.path(joboutdir, "aucs.tsv"), sep="\t", quote=FALSE, row.names=FALSE)
+if (show_auc) {
+    aucs = split(aucs$AUC, aucs$name)
+    if (length(aucs) > 1) {
+        # Add AUC values to the legend items
+        p <- p +
+            scale_color_manual(
+                values = pal_biopipen()(length(models)),
+                labels = sapply(models, function(m) paste(m, " (AUC =", round(aucs[[m]], 2), ")")),
+                breaks = models)
+    } else {
+        p <- p +
+            geom_text(
+                x = 0.8, y = 0.2, label = paste("AUC =", round(unlist(aucs), 2)),
+                color = "black", size = 4)
+    }
+}
+devpars$filename <- outfile
+do.call(png, devpars)
+print(p)
+dev.off()

biopipen/scripts/scrna/SeuratClusterStats-features.R CHANGED Viewed

@@ -81,7 +81,7 @@ do_one_features = function(name) {
     if (case$kind %in% c("ridge", "ridgeplot")) {
         case$kind = "ridge"
         if (is.null(case$cols)) {
-            case$cols = pal_biopipen()(32)
+            case$cols = pal_biopipen()(n_uidents)
         }
         excluded_args = c(excluded_args, "split.by", "reduction")
         fn = RidgePlot

biopipen/scripts/scrna/SeuratPreparing.R CHANGED Viewed

@@ -4,6 +4,7 @@ library(Seurat)
 library(future)
 library(bracer)
 library(ggplot2)
+library(dplyr)
 library(tidyseurat)
 metafile = {{in.metafile | quote}}
@@ -49,6 +50,19 @@ if (!"RNAData" %in% meta_cols) {
     stop("Error: Column `RNAData` is not found in metafile.")
 }
+samples = as.character(metadata$Sample)
+# used for plotting
+cell_qc_df = NULL
+plotsdir = file.path(joboutdir, "plots")
+dir.create(plotsdir, showWarnings = FALSE, recursive = TRUE)
+# features for cell QC
+feats = c(
+    "nFeature_RNA", "nCount_RNA",
+    "percent.mt", "percent.ribo", "percent.hb", "percent.plat"
+)
 rename_files = function(e, sample, path) {
     tmpdatadir = file.path(joboutdir, "renamed", sample)
@@ -74,6 +88,143 @@ rename_files = function(e, sample, path) {
     Read10X(data.dir = tmpdatadir)
 }
+perform_cell_qc <- function(sobj, per_sample = FALSE) {
+    log_prefix = ifelse(per_sample, "  ", "- ")
+    log_info("{log_prefix}Adding metadata for QC ...")
+    sobj$percent.mt = PercentageFeatureSet(sobj, pattern = "^MT-")
+    sobj$percent.ribo = PercentageFeatureSet(sobj, pattern = "^RP[SL]")
+    sobj$percent.hb = PercentageFeatureSet(sobj, pattern = "^HB[^(P)]")
+    sobj$percent.plat = PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
+    if (is.null(envs$cell_qc) || length(envs$cell_qc) == 0) {
+        log_warn("{log_prefix}No cell QC criteria is provided. All cells will be kept.")
+        cell_qc = "TRUE"
+    } else {
+        cell_qc = envs$cell_qc
+    }
+    sobj = sobj %>% mutate(.QC = !!rlang::parse_expr(cell_qc))
+    if (is.null(cell_qc_df)) {
+        cell_qc_df <<- sobj@meta.data[, c("Sample", ".QC", feats), drop = FALSE]
+    } else {
+        cell_qc_df <<- rbind(cell_qc_df, sobj@meta.data[, c("Sample", ".QC", feats), drop = FALSE])
+    }
+    # Do the filtering
+    log_info("{log_prefix}Filtering cells using QC criteria ...")
+    sobj = sobj %>% filter(.QC)
+    sobj$.QC = NULL
+    return(sobj)
+}
+report_cell_qc = function(ngenes) {
+    # uses cell_qc_df
+    # Violin plots
+    log_info("- Plotting violin plots ...")
+    add_report(
+        list(
+            kind = "descr",
+            content = paste(
+                "The violin plots for each feature. The cells are grouped by sample.",
+                "The cells that fail the QC criteria are colored in red, and",
+                "the cells that pass the QC criteria are colored in black.",
+                "The cells that fail the QC criteria are filtered out in the returned Seurat object."
+            )
+        ),
+        h1 = "Violin Plots"
+    )
+    for (feat in feats) {
+        log_info("  For feature: {feat}")
+        vln_p <- ggplot(cell_qc_df, aes(x = Sample, y = !!sym(feat), color = .QC)) +
+            geom_violin(fill = "white", width = 0.5) +
+            geom_jitter(width = 0.2, height = 0, alpha = 0.5) +
+            scale_color_manual(values = c("#181818", pal_biopipen()(1)), breaks = c(TRUE, FALSE)) +
+            labs(x = "Sample", y = feat) +
+            theme_minimal()
+        vlnplot = file.path(plotsdir, paste0(slugify(feat), ".vln.png"))
+        png(
+            vlnplot,
+            width = 800 + length(samples) * 15, height = 600, res = 100
+        )
+        print(vln_p)
+        dev.off()
+        add_report(
+            list(
+                src = vlnplot,
+                name = feat,
+                descr = paste0("Distribution of ", feat, " for each sample.")
+            ),
+            h1 = "Violin Plots",
+            ui = "table_of_images"
+        )
+    }
+    # Scatter plots against nCount_RNA
+    log_info("- Plotting scatter plots ...")
+    add_report(
+        list(
+            kind = "descr",
+            content = paste(
+                "The scatter plots for each feature against nCount_RNA. ",
+                "The cells that fail the QC criteria are colored in red, and",
+                "the cells that pass the QC criteria are colored in black.",
+                "The cells that fail the QC criteria are filtered out in the returned Seurat object."
+            )
+        ),
+        h1 = "Scatter Plots"
+    )
+    for (feat in setdiff(feats, "nCount_RNA")) {
+        log_info("  For feature: {feat}, against nCount_RNA")
+        scat_p <- ggplot(cell_qc_df, aes(x = nCount_RNA, y = !!sym(feat), color = .QC)) +
+            geom_point() +
+            scale_color_manual(values = c("#181818", pal_biopipen()(1)), breaks = c(TRUE, FALSE)) +
+            labs(x = "nCount_RNA", y = feat) +
+            theme_minimal()
+        scatfile = file.path(plotsdir, paste0(slugify(feat), "-nCount_RNA.scatter.png"))
+        png(scatfile, width = 800, height = 600, res = 100)
+        print(scat_p)
+        dev.off()
+        add_report(
+            list(
+                src = scatfile,
+                name = paste0(feat, " vs nCount_RNA"),
+                descr = paste0("Scatter plot for ", feat, " against nCount_RNA")
+            ),
+            h1 = "Scatter Plots",
+            ui = "table_of_images"
+        )
+    }
+    # return the dim_df calculated from the cell_qc_df
+    rbind(
+        cell_qc_df %>%
+            # group_by(Sample) %>%
+            summarise(
+                when = "Before_Cell_QC",
+                nCells = dplyr::n(),
+                nGenes = ngenes
+            ) %>%
+            ungroup(),
+        cell_qc_df %>%
+            filter(.QC) %>%
+            # group_by(Sample) %>%
+            summarise(
+                when = "After_Cell_QC",
+                nCells = dplyr::n(),
+                nGenes = ngenes
+            ) %>%
+            ungroup()
+    )
+}
 load_sample = function(sample) {
     log_info("- Loading sample: {sample} ...")
     mdata = as.data.frame(metadata)[metadata$Sample == sample, , drop=TRUE]
@@ -114,6 +265,11 @@ load_sample = function(sample) {
         obj[[mname]] = mdt
     }
+    if (isTRUE(envs$cell_qc_per_sample)) {
+        log_info("- Perform cell QC for sample: {sample} ...")
+        obj = perform_cell_qc(obj, TRUE)
+    }
     if (isTRUE(envs$use_sct)) {
         # so that we have data and scale.data layers on RNA assay
         # useful for visualization in case some genes are not in
@@ -126,125 +282,20 @@ load_sample = function(sample) {
 }
 # Load data
-samples = as.character(metadata$Sample)
 log_info("Reading samples individually ...")
 obj_list = lapply(samples, load_sample)
 log_info("Merging samples ...")
 sobj = Reduce(merge, obj_list)
-log_info("Adding metadata for QC ...")
-sobj$percent.mt = PercentageFeatureSet(sobj, pattern = "^MT-")
-sobj$percent.ribo = PercentageFeatureSet(sobj, pattern = "^RP[SL]")
-sobj$percent.hb = PercentageFeatureSet(sobj, pattern = "^HB[^(P)]")
-sobj$percent.plat = PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
-dim_df = data.frame(When = "Before_QC", nCells = ncol(sobj), nGenes = nrow(sobj))
-if (is.null(envs$cell_qc) || length(envs$cell_qc) == 0) {
-    log_warn("No cell QC criteria is provided. All cells will be kept.")
-    envs$cell_qc = "TRUE"
-}
-sobj = sobj %>% mutate(.QC = !!rlang::parse_expr(envs$cell_qc))
-feats = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.ribo", "percent.hb", "percent.plat")
-plotsdir = file.path(joboutdir, "plots")
-dir.create(plotsdir, showWarnings = FALSE)
-# Violin plots
-log_info("Plotting violin plots ...")
-add_report(
-    list(
-        kind = "descr",
-        content = paste(
-            "The violin plots for each feature. The cells are grouped by sample.",
-            "The cells that fail the QC criteria are colored in red, and",
-            "the cells that pass the QC criteria are colored in black.",
-            "The cells that fail the QC criteria are filtered out in the returned Seurat object."
-        )
-    ),
-    h1 = "Violin Plots"
-)
-for (feat in feats) {
-    log_info("- For feature: {feat}")
-    vln_p = VlnPlot(
-        sobj,
-        cols = rep("white", length(samples)),
-        group.by = "Sample",
-        features = feat,
-        pt.size = 0) + NoLegend()
-    vln_p$data$.QC = sobj@meta.data$.QC
-    vln_p = vln_p + geom_jitter(
-            aes(color = .QC),
-            data = vln_p$data,
-            position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9)
-        ) + scale_color_manual(values = c("#181818", pal_biopipen()(1)), breaks = c(TRUE, FALSE))
-    vlnplot = file.path(plotsdir, paste0(slugify(feat), ".vln.png"))
-    png(
-        vlnplot,
-        width = 800 + length(samples) * 15, height = 600, res = 100
-    )
-    print(vln_p)
-    dev.off()
-    add_report(
-        list(
-            src = vlnplot,
-            name = feat,
-            descr = paste0("Distribution of ", feat, " for each sample.")
-        ),
-        h1 = "Violin Plots",
-        ui = "table_of_images"
-    )
-}
-# Scatter plots against nCount_RNA
-log_info("Plotting scatter plots ...")
-add_report(
-    list(
-        kind = "descr",
-        content = paste(
-            "The scatter plots for each feature against nCount_RNA. ",
-            "The cells that fail the QC criteria are colored in red, and",
-            "the cells that pass the QC criteria are colored in black.",
-            "The cells that fail the QC criteria are filtered out in the returned Seurat object."
-        )
-    ),
-    h1 = "Scatter Plots"
-)
-for (feat in setdiff(feats, "nCount_RNA")) {
-    log_info("- For feature: {feat}, against nCount_RNA")
-    scat_p = FeatureScatter(
-        sobj,
-        feature1 = "nCount_RNA",
-        feature2 = feat,
-        group.by = ".QC"
-    ) +
-    NoLegend() +
-    scale_color_manual(values = c("#181818", pal_biopipen()(1)), breaks = c(TRUE, FALSE))
-    scatfile = file.path(plotsdir, paste0(slugify(feat), "-nCount_RNA.scatter.png"))
-    png(scatfile, width = 800, height = 600, res = 100)
-    print(scat_p)
-    dev.off()
-    add_report(
-        list(
-            src = scatfile,
-            name = paste0(feat, " vs nCount_RNA"),
-            descr = paste0("Scatter plot for ", feat, " against nCount_RNA")
-        ),
-        h1 = "Scatter Plots",
-        ui = "table_of_images"
-    )
+if (!envs$cell_qc_per_sample) {
+    log_info("Performing cell QC ...")
+    sobj = perform_cell_qc(sobj)
 }
-# Do the filtering
-log_info("Filtering cells using QC criteria ...")
-sobj = sobj %>% filter(.QC)
-sobj$.QC = NULL
+# plot and report the QC
+log_info("Plotting and reporting QC ...")
+dim_df = report_cell_qc(nrow(sobj))
 log_info("Filtering genes ...")
 if (is.list(envs$gene_qc)) {
@@ -271,7 +322,7 @@ if (is.list(envs$gene_qc)) {
 dim_df = rbind(
     dim_df,
     data.frame(
-        When = "After_Gene_QC",
+        when = "After_Gene_QC",
         nCells = ncol(sobj),
         nGenes = nrow(sobj)
     )

biopipen/scripts/scrna/SeuratTo10X.R CHANGED Viewed

@@ -1,84 +1,27 @@
-library(Matrix)
-indir = {{in.indir | quote}}
-outdir = {{out.outdir | quote}}
-envs = {{envs | r}}
-set.seed(envs$seed)
-setwd(outdir)
-logger <- function(...) {
-  cat(paste(..., "\n"), file=stderr())
-}
-# Find the data files
-mtx_file = Sys.glob(file.path(indir, "*matrix.mtx.gz"))
-feat_file = c(
-    Sys.glob(file.path(indir, "*genes.tsv.gz")),
-    Sys.glob(file.path(indir, "*features.tsv.gz"))
-)
-barcode_file = Sys.glob(file.path(indir, "*barcodes.tsv.gz"))
-if (length(mtx_file) == 0) {
-    stop("No matrix file found in", indir)
-}
-if (length(mtx_file) > 1) {
-    warning(paste("Multiple matrix files found in", indir, ", using the first one."))
-}
-if (length(feat_file) == 0) {
-    stop("No feature file found in", indir)
-}
-if (length(feat_file) > 1) {
-    warning(paste("Multiple feature files found in", indir, ", using the first one."))
-}
-if (length(barcode_file) == 0) {
-    stop("No barcode file found in", indir)
-}
-if (length(barcode_file) > 1) {
-    warning(paste("Multiple barcode files found in", indir, ", using the first one."))
-}
-mtx = readMM(mtx_file)
-n_feats = nrow(mtx)
-n_cells = ncol(mtx)
-logger("- Dimension: Features:", n_feats, ", Cells:", n_cells)
-if (envs$nfeats <= 1) {
-    nfeats = as.integer(n_feats * envs$nfeats)
+library(DropletUtils)
+library(Seurat)
+srtobjfile = {{in.srtobj | r}}
+outdir = {{out.outdir | r}}
+version = {{envs.version | r}}
+split_by = {{envs.split_by | r}}
+srtobj = readRDS(srtobjfile)
+if (!is.null(split_by)) {
+    # check if split_by is a valid column
+    if (is.null(srtobj[[split_by]])) {
+        stop(paste0("Column ", split_by, " not found in Seurat object"))
+    }
+    # split Seurat object by split_by column
+    objs <- SplitObject(srtobj, split.by = split_by)
+    for (s in names(objs)) {
+        counts <- GetAssayData(object = objs[[s]], layer = "counts")
+        odir <- file.path(outdir, s)
+        dir.create(odir, recursive = TRUE, showWarnings = FALSE)
+        write10xCounts(odir, counts, version = version, overwrite = TRUE)
+    }
 } else {
-    nfeats = envs$nfeats
-}
-if (envs$ncells <= 1) {
-    ncells = as.integer(n_cells * envs$ncells)
-} else {
-    ncells = envs$ncells
-}
-logger("- Identifying features to keep ...")
-feats = read.table(feat_file, header=FALSE, row.names=NULL, check.names=FALSE)
-feats_to_keep = c()
-if (length(envs$feats_to_keep) > 0) {
-    feats_to_keep = match(envs$feats_to_keep, feats[,2])
+    counts = GetAssayData(object = srtobj, layer = "counts")
+    write10xCounts(outdir, counts, version = version, overwrite = TRUE)
 }
-out_feats = unique(c(sample(1:n_feats, nfeats), feats_to_keep))
-out_cells = sample(1:n_cells, ncells)
-logger("- Resulting in", length(out_feats), "features and", ncells, "cells")
-logger("- Subsetting matrix and saving it ...")
-out_mtx = mtx[out_feats, out_cells, drop=FALSE]
-out_mtx_file = file.path(outdir, "matrix.mtx")
-writeMM(out_mtx, out_mtx_file)
-system(paste("gzip", out_mtx_file))
-logger("- Subsetting features and saving it ...")
-out_feats = feats[out_feats, , drop=FALSE]
-out_feats_file = gzfile(file.path(outdir, "features.tsv.gz"), "w")
-write.table(out_feats, out_feats_file, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)
-close(out_feats_file)
-logger("- Subsetting barcodes and saving it ...")
-barcodes = read.table(barcode_file, header=FALSE, row.names=NULL, check.names=FALSE)
-out_barcodes = barcodes[out_cells, , drop=FALSE]
-out_barcodes_file = gzfile(file.path(outdir, "barcodes.tsv.gz"), "w")
-write.table(out_barcodes, out_barcodes_file, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)
-close(out_barcodes_file)

biopipen/scripts/scrna/Subset10X.R ADDED Viewed

@@ -0,0 +1,84 @@
+library(Matrix)
+indir = {{in.indir | quote}}
+outdir = {{out.outdir | quote}}
+envs = {{envs | r}}
+set.seed(envs$seed)
+setwd(outdir)
+logger <- function(...) {
+  cat(paste(..., "\n"), file=stderr())
+}
+# Find the data files
+mtx_file = Sys.glob(file.path(indir, "*matrix.mtx.gz"))
+feat_file = c(
+    Sys.glob(file.path(indir, "*genes.tsv.gz")),
+    Sys.glob(file.path(indir, "*features.tsv.gz"))
+)
+barcode_file = Sys.glob(file.path(indir, "*barcodes.tsv.gz"))
+if (length(mtx_file) == 0) {
+    stop("No matrix file found in", indir)
+}
+if (length(mtx_file) > 1) {
+    warning(paste("Multiple matrix files found in", indir, ", using the first one."))
+}
+if (length(feat_file) == 0) {
+    stop("No feature file found in", indir)
+}
+if (length(feat_file) > 1) {
+    warning(paste("Multiple feature files found in", indir, ", using the first one."))
+}
+if (length(barcode_file) == 0) {
+    stop("No barcode file found in", indir)
+}
+if (length(barcode_file) > 1) {
+    warning(paste("Multiple barcode files found in", indir, ", using the first one."))
+}
+mtx = readMM(mtx_file)
+n_feats = nrow(mtx)
+n_cells = ncol(mtx)
+logger("- Dimension: Features:", n_feats, ", Cells:", n_cells)
+if (envs$nfeats <= 1) {
+    nfeats = as.integer(n_feats * envs$nfeats)
+} else {
+    nfeats = envs$nfeats
+}
+if (envs$ncells <= 1) {
+    ncells = as.integer(n_cells * envs$ncells)
+} else {
+    ncells = envs$ncells
+}
+logger("- Identifying features to keep ...")
+feats = read.table(feat_file, header=FALSE, row.names=NULL, check.names=FALSE)
+feats_to_keep = c()
+if (length(envs$feats_to_keep) > 0) {
+    feats_to_keep = match(envs$feats_to_keep, feats[,2])
+}
+out_feats = unique(c(sample(1:n_feats, nfeats), feats_to_keep))
+out_cells = sample(1:n_cells, ncells)
+logger("- Resulting in", length(out_feats), "features and", ncells, "cells")
+logger("- Subsetting matrix and saving it ...")
+out_mtx = mtx[out_feats, out_cells, drop=FALSE]
+out_mtx_file = file.path(outdir, "matrix.mtx")
+writeMM(out_mtx, out_mtx_file)
+system(paste("gzip", out_mtx_file))
+logger("- Subsetting features and saving it ...")
+out_feats = feats[out_feats, , drop=FALSE]
+out_feats_file = gzfile(file.path(outdir, "features.tsv.gz"), "w")
+write.table(out_feats, out_feats_file, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)
+close(out_feats_file)
+logger("- Subsetting barcodes and saving it ...")
+barcodes = read.table(barcode_file, header=FALSE, row.names=NULL, check.names=FALSE)
+out_barcodes = barcodes[out_cells, , drop=FALSE]
+out_barcodes_file = gzfile(file.path(outdir, "barcodes.tsv.gz"), "w")
+write.table(out_barcodes, out_barcodes_file, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)
+close(out_barcodes_file)

biopipen/scripts/snp/MatrixEQTL.R ADDED Viewed

@@ -0,0 +1,157 @@
+source("{{biopipen_dir}}/utils/misc.R")
+library(rlang)
+library(MatrixEQTL)
+snpfile = {{in.geno | r}}
+expfile = {{in.expr | r}}
+covfile = {{in.cov | r}}
+joboutdir = {{job.outdir | r}}
+alleqtl = {{out.alleqtls | r}}
+outfile = {{out.cisqtls | r}}
+model = {{envs.model | r}}
+pval = {{envs.pval | r}}
+transp = {{envs.transp | r}}
+fdr = {{envs.fdr | r}}
+snppos = {{envs.snppos | r}}
+genepos = {{envs.genepos | r}}
+dist = {{envs.dist | r}}
+transpose_geno = {{envs.transpose_geno | r}}
+transpose_expr = {{envs.transpose_expr | r}}
+transpose_cov = {{envs.transpose_cov | r}}
+arg_match(model, c("modelANOVA", "modelLINEAR", "linear", "anova"))
+if (model == "linear") model = "modelLINEAR"
+if (model == "anova") model = "modelANOVA"
+model = get(model)
+trans_enabled = !is.null(transp)
+cis_enabled = !is.null(snppos) && !is.null(genepos) && dist > 0
+# if trans is disabled, all files needed for cis should be provided
+if (!trans_enabled && !cis_enabled) {
+    log_warn("Using `envs.transp = 1e-5` since cis-eQTL is disabled.")
+    trans_enabled <- TRUE
+    transp <- 1e-5
+}
+transpose_file <- function(file) {
+    out <- file.path(joboutdir, paste0(
+        tools::file_path_sans_ext(basename(file)),
+        ".transposed.",
+        tools::file_ext(file))
+    )
+    data <- read.table(file, header=TRUE, stringsAsFactors=FALSE, row.names=1, sep="\t", quote="", check.names=FALSE)
+    write.table(t(data), file=out, sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)
+    out
+}
+if (transpose_geno) snpfile = transpose_file(snpfile)
+if (transpose_expr) expfile = transpose_file(expfile)
+if (transpose_cov) covfile = transpose_file(covfile)
+snps = SlicedData$new();
+snps$fileDelimiter = "\t";       # the TAB character
+snps$fileOmitCharacters = "NA";  # denote missing values;
+snps$fileSkipRows = 1;           # one row of column labels
+snps$fileSkipColumns = 1;        # one column of row labels
+snps$fileSliceSize = 10000;      # read file in pieces of 2,000 rows
+snps$LoadFile( snpfile );
+gene = SlicedData$new();
+gene$fileDelimiter = "\t";       # the TAB character
+gene$fileOmitCharacters = "NA";  # denote missing values;
+gene$fileSkipRows = 1;           # one row of column labels
+gene$fileSkipColumns = 1;        # one column of row labels
+gene$fileSliceSize = 10000;      # read file in pieces of 2,000 rows
+gene$LoadFile( expfile );
+cvrt = SlicedData$new();
+if (!is.null(covfile) && file.exists(covfile)) {
+    covmatrix = t(read.table.inopts(covfile, list(cnames=TRUE, rnames=TRUE)))
+    cvrt$CreateFromMatrix( as.matrix(covmatrix) )
+}
+engine_params = list()
+engine_params$snps = snps
+engine_params$gene = gene
+engine_params$cvrt = cvrt
+engine_params$output_file_name = ifelse(trans_enabled, alleqtl, NULL)
+engine_params$pvOutputThreshold = ifelse(trans_enabled, transp, 0)
+engine_params$useModel = model
+engine_params$errorCovariance = numeric()
+engine_params$verbose = TRUE
+engine_params$noFDRsaveMemory = !fdr
+noq = function(s) {
+    gsub('^\"|\"$', "", s)
+}
+if (cis_enabled) {
+    if (endsWith(snppos, ".bed")) {
+        snppos_data = read.table.inopts(snppos,
+                                        list(cnames=FALSE, rnames=FALSE))
+        snppos_data = snppos_data[, c(4, 1, 2)]
+        colnames(snppos_data) = c("snp", "chr", "pos")
+    } else if (endsWith(snppos, ".gff") || endsWith(snppos, ".gtf")) {
+        snppos_data = read.table.inopts(snppos,
+                                        list(cnames=FALSE, rnames=FALSE));
+        snppos_data = snppos_data[, c(9, 1, 4)]
+        colnames(snppos_data) = c("snp", "chr", "pos")
+        snppos_data$snp = unlist(lapply(snppos_data$snp, function(x) {
+            for (s in unlist(strsplit(x, '; ', fixed=T))) {
+                if (startsWith(s, "snp_id "))
+                    return(noq(substring(s, 8)))
+                else if (startsWith(s, "rs_id "))
+                    return(noq(substring(s, 7)))
+                else if (startsWith(s, "rs "))
+                    return(noq(substring(s, 4)))
+            }
+        }))
+    } else if (endsWith(snppos, ".vcf") || endsWith(snppos, ".vcf.gz")) {
+        snppos_data = read.table.inopts(snppos,
+                                        list(cnames=FALSE, rnames=FALSE))
+        snppos_data = snppos_data[, c(3, 1, 2)]
+        colnames(snppos_data) = c("snp", "chr", "pos")
+    } else {
+        snppos_data = read.table.inopts(snppos, list(cnames=TRUE))
+        colnames(snppos_data) = c("snp", "chr", "pos")
+    }
+    if (endsWith(genepos, ".bed")) {
+        genepos_data = read.table.inopts(genepos,
+                                         list(cnames=FALSE, rnames=FALSE))
+        genepos_data = genepos_data[, c(4, 1:3)]
+        colnames(genepos_data) = c("geneid", "chr", "s1", "s2")
+    } else if (endsWith(genepos, ".gff") || endsWith(genepos, ".gtf")) {
+        genepos_data = read.table.inopts(genepos,
+                                         list(cnames=FALSE, rnames=FALSE))
+        genepos_data = genepos_data[, c(9, 1, 4, 5)]
+        colnames(genepos_data) = c("geneid", "chr", "s1", "s2")
+        genepos_data$geneid = noquote(unlist(lapply(genepos_data$geneid, function(x) {
+            for (s in unlist(strsplit(x, '; ', fixed=T))) {
+                if (startsWith(s, "gene_id "))
+                    return(noq(substring(s, 9)))
+            }
+        })))
+    } else {
+        genepos_data = read.table(genepos, header = TRUE, stringsAsFactors = FALSE);
+        colnames(genepos_data) = c("geneid", "chr", "s1", "s2")
+    }
+    engine_params$output_file_name.cis = outfile
+    engine_params$pvOutputThreshold.cis = pval
+    engine_params$cisDist = dist
+    engine_params$snpspos = snppos_data
+    engine_params$genepos = genepos_data
+    do_call(Matrix_eQTL_main, engine_params)
+} else {
+    do_call(Matrix_eQTL_engine, engine_params)
+    file.create(outfile)
+}
+if (pval == 0) {
+    if (!file.exists(outfile)) file.create(outfile)
+    if (!file.exists(alleqtl)) file.create(alleqtl)
+}

{biopipen-0.27.3.dist-info → biopipen-0.27.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: biopipen
-Version: 0.27.3
+Version: 0.27.5
 Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
 License: MIT
 Author: pwwang
@@ -20,4 +20,3 @@ Requires-Dist: pipen-filters (>=0.12,<0.13)
 Requires-Dist: pipen-poplog (>=0.1.2,<0.2.0)
 Requires-Dist: pipen-runinfo (>=0.6,<0.7) ; extra == "runinfo"
 Requires-Dist: pipen-verbose (>=0.11,<0.12)
-Requires-Dist: pyyaml-include (==1.*)

{biopipen-0.27.3.dist-info → biopipen-0.27.5.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
-biopipen/__init__.py,sha256=lxhjPOOCzhlHB02EzaqTtDdBFZSOLV3WLWw2HC0DYvo,23
+biopipen/__init__.py,sha256=E1FuUUku2gzKP9EaIByX13BXhDU2SYE99gN_s2YdX7s,23
 biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
 biopipen/core/config.toml,sha256=20RCI30Peee1EQdfb_UbV3Hf74XUPndJnYZlUThytsw,1781
 biopipen/core/defaults.py,sha256=yPeehPLk_OYCf71IgRVCWuQRxLAMixDF81Ium0HtPKI,344
 biopipen/core/filters.py,sha256=HLrjXGsvvjRtTWIAmg_f4IMymWaRD769HlDwsCTh170,12424
 biopipen/core/proc.py,sha256=60lUP3PcUAaKbDETo9N5PEIoeOYrLgcSmuytmrhcx8g,912
-biopipen/core/testing.py,sha256=6BaHm8C7oHdnC5q14DBd0Qp1wqNxSexSFc5vUtHZjsw,3565
+biopipen/core/testing.py,sha256=fZ8lzLwM5AhYapx0LDdYZPumqC0dj7GZpQuabhlqyGI,3665
 biopipen/ns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 biopipen/ns/bam.py,sha256=5AsYrB0mtr_mH6mCL6gjJ5rC4NywpjFkpFjUrBGp7Fk,9301
 biopipen/ns/bcftools.py,sha256=puCDfIL-1z6cz2y1Rlz-ESNIr8xJgeIjEQ440qicCvM,3467
@@ -15,18 +15,18 @@ biopipen/ns/cellranger_pipeline.py,sha256=D6gvIeasHjDCdro7f4wjomxRYTtsJT77Ld47Xz
 biopipen/ns/cnv.py,sha256=vq6dZfEOyuVuqg3nP6FQtNmQ-JocpBJMX9IYlZ0OPD0,6803
 biopipen/ns/cnvkit.py,sha256=5mA2Q8-YDs4g1HoxtpB_NWnyZYwEThNr3s3wlubLQrQ,31130
 biopipen/ns/cnvkit_pipeline.py,sha256=2fJLn70L2jJ81ZMNdnU84Sf3HoKA2CSnHuDzLGR8jmw,36854
-biopipen/ns/delim.py,sha256=cmGy82_Cjyf1anBPSzqEUcZCzZqMLdNY-w5uJLE-H6c,5610
+biopipen/ns/delim.py,sha256=fejsh4KW1TG5oMZzAC238LvQhBz7brXkfl3BHfnLK5M,5612
 biopipen/ns/gene.py,sha256=Q5FzRByfnRITXRNRZR65ApG09FRyiihRC3TcIXxufzE,2228
 biopipen/ns/gsea.py,sha256=EsNRAPYsagaV2KYgr4Jv0KCnZGqayM209v4yOGGTIOI,7423
 biopipen/ns/misc.py,sha256=fzn0pXvdghMkQhu-e3MMapPNMyO6IAJbtTzVU3GbFa0,3246
-biopipen/ns/plot.py,sha256=yguxmErUOH-hOM10JfuI_sXw2p49XF8yGR_gXfbd5yQ,4066
+biopipen/ns/plot.py,sha256=fzJAKKl4a_tsVkLREGCQTFVHP049m33LdWgeYRb6v7M,5483
 biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
-biopipen/ns/scrna.py,sha256=i9h0xNOII3SqJ_cJOZ5epn8breAsc-yXH_Us04DoZvg,103401
-biopipen/ns/scrna_metabolic_landscape.py,sha256=9s1NvH3aMaNDXyfwy9TdzGcSP_lIW4JqhLgknNZcIKE,28313
-biopipen/ns/snp.py,sha256=Nq20NJzQ9YiqE9mhtCUH6dfs7528o1e4N-j9PewjAsQ,3016
+biopipen/ns/scrna.py,sha256=7Gs1xxQoGM3TKxaQvbgKNyMDEsgatFopImzC-RcOEoA,103946
+biopipen/ns/scrna_metabolic_landscape.py,sha256=EhOtHQyoH-jRpzDoOI_06UbjEg6mhvbDEHKhek01bPk,28334
+biopipen/ns/snp.py,sha256=EQ2FS0trQ7YThPmBVTpS66lc2OSfgQ6lCh6WnyP-C2g,5499
 biopipen/ns/stats.py,sha256=yJ6C1CXF84T7DDs9mgufqUOr89Rl6kybE5ji8Vnx6cw,13693
 biopipen/ns/tcgamaf.py,sha256=AFbUJIxiMSvsVY3RcHgjRFuMnNh2DG3Mr5slLNEyz6o,1455
-biopipen/ns/tcr.py,sha256=5bMnxhbeB08UrAw8YSh2BkA3AUFeoOajhE6DhHt74K4,87863
+biopipen/ns/tcr.py,sha256=7F_FulZ3UGouuvgH_ylZwJybr_310f9BTz_kouO1SjY,87905
 biopipen/ns/vcf.py,sha256=cdkKroii0_nl_bSP2cnO09qESUAhHqu6btOiTSKS79Y,15314
 biopipen/ns/web.py,sha256=3zucrDo-IVsSnIvlw-deoScuxqWa6OMTm8Vo-R4E44Q,2224
 biopipen/reports/bam/CNAClinic.svelte,sha256=D4IxQcgDCPQZMbXog-aZP5iJEQTK2N4i0C60e_iXyfs,213
@@ -102,7 +102,7 @@ biopipen/scripts/cnvkit/CNVkitScatter.py,sha256=7DhTiXPHEHbdXn0VFcDOR-wTP6sks08N
 biopipen/scripts/cnvkit/CNVkitSegment.py,sha256=q5iGAjY6-yIehPcJpi3hX6EuGre0YgWTPkG_d5LEV48,1629
 biopipen/scripts/cnvkit/guess_baits.py,sha256=7OCMtSMHIJWWZv9qEYVXnB0N4hU_JaGEesKdkr6tvJc,10586
 biopipen/scripts/delim/RowsBinder.R,sha256=yp960u7Ui_jFCL8WDvODa-0vhJvyLo64ll35PzXYUbI,1444
-biopipen/scripts/delim/SampleInfo.R,sha256=aYMaQR6klV8rVshqkaML9mnVpCfuGTvd_D5iUbPRzJA,6335
+biopipen/scripts/delim/SampleInfo.R,sha256=1EYlqoVpIEl9l2eBaCLETuI_Ma3HjihS9tRbGmOPiBk,6397
 biopipen/scripts/gene/GeneNameConversion.py,sha256=2RveardTsLv2K1XSj3G0ERYLiln9bcR74bjkRdKcChc,1880
 biopipen/scripts/gsea/Enrichr.R,sha256=tr4vInlVIeiGXumh22ARuTQmy0-Qq869RiX7d7ERqCg,661
 biopipen/scripts/gsea/FGSEA.R,sha256=RLqDgrqnYEacHfzEEuZ3d29lxNqWehigOnGuu248SRg,1483
@@ -111,6 +111,7 @@ biopipen/scripts/gsea/PreRank.R,sha256=onZK1FQa6yDO0Fz4juy56XQjpzyw3zBdZv7edY9ac
 biopipen/scripts/misc/Config2File.py,sha256=NUio0uOEuZtUBpuByDSItYu9Kwu5mosb4pdPq5-QAmE,440
 biopipen/scripts/misc/Str2File.py,sha256=99oQNxChxChNJ9vmD77b48cu-r_P_heSpx7A5wi3qTE,212
 biopipen/scripts/plot/Heatmap.R,sha256=4v_oRME8ZiwczIlBIp-OP_YPWLAvBKzbHiwNBCZ0Xog,1982
+biopipen/scripts/plot/ROC.R,sha256=Cr-mHQx6c748fQYkOWO2xIKWwiVAUxGuxn6lYEhNH78,2430
 biopipen/scripts/plot/VennDiagram.R,sha256=GVc-kyHqnXrbXZvy-evcxI1XGtlLSChBiVnMjPywNMA,731
 biopipen/scripts/rnaseq/Simulation-ESCO.R,sha256=68cEHDdJclX8P8Q7ey9yBOfK09M_kxlL6zgYXsEL2Rs,6378
 biopipen/scripts/rnaseq/Simulation-RUVcorr.R,sha256=6C6Ke5RLF0fC2V9WQPoFEdqoDabCnhslZBIyB6zhIxc,1155
@@ -137,7 +138,7 @@ biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtq
 biopipen/scripts/scrna/ScFGSEA.R,sha256=2UCTCIydVkPGvn7WP-_fcE7857iKKDxY56-j-ruyO8o,6254
 biopipen/scripts/scrna/Seurat2AnnData.R,sha256=qz4u-B5J3GMwttubnNnByJXreziFbrP5Mak0L0q7eG0,1557
 biopipen/scripts/scrna/SeuratClusterStats-dimplots.R,sha256=gViDgQ8NorYD64iK0FgcODOrDOw0tExZmhuPRuLNp4g,2354
-biopipen/scripts/scrna/SeuratClusterStats-features.R,sha256=SaKTJloP1fttRXZQeb2ApX0ej7al13wOoEYkthSk13k,15489
+biopipen/scripts/scrna/SeuratClusterStats-features.R,sha256=W7iYhaFsC5EMZLO50QukYPLYGK4bq9kQc1VT5FwvI68,15496
 biopipen/scripts/scrna/SeuratClusterStats-hists.R,sha256=YhuD-GePjJPSkR0iLRgV_hiGHD_bnOIKp-LB6GCwquo,5037
 biopipen/scripts/scrna/SeuratClusterStats-ngenes.R,sha256=GVKIXFNS_syCuSN8oxoBkjxxAeI5LdSxh-qLVkUsbDA,2146
 biopipen/scripts/scrna/SeuratClusterStats-stats.R,sha256=TxQ0OcLwXwIgwL1mTLArboK0ATJIJhxWiv9DV_jBlhE,9255
@@ -147,19 +148,20 @@ biopipen/scripts/scrna/SeuratFilter.R,sha256=BrYK0MLdaTtQvInMaQsmOt7oH_hlks0M1zy
 biopipen/scripts/scrna/SeuratLoading.R,sha256=ekWKnHIqtQb3kHVQiVymAHXXqiUxs6KKefjZKjaykmk,900
 biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=Xn3VnvKqShuC0Ju05380wjuLVSdW0uWVzntdxjme244,4359
 biopipen/scripts/scrna/SeuratMetadataMutater.R,sha256=Pp4GsF3hZ6ZC2vroC3LSBmVa4B1p2L3hbh981yaAIeQ,1093
-biopipen/scripts/scrna/SeuratPreparing.R,sha256=c_aBM0mugBNyYJ5OjNVDR_Cj0sGqkiJZXCOk3pesFDk,16990
+biopipen/scripts/scrna/SeuratPreparing.R,sha256=t6GOcc9ZNwpRLeES7uBWja9RF6u6k5I_TXcdK4Ve7d0,18683
 biopipen/scripts/scrna/SeuratSplit.R,sha256=vdK11V39_Uo_NaOh76QWCtxObGaEr5Ynxqq0hTiSvsU,754
 biopipen/scripts/scrna/SeuratSubClustering.R,sha256=L1SwKhNNKvsQGrcj0ZjScW9BLuvdO2pg7U48Ospsot8,6096
 biopipen/scripts/scrna/SeuratSubset.R,sha256=yVA11NVE2FSSw-DhxQcJRapns0tNNHdyDYi5epO6SKM,1776
-biopipen/scripts/scrna/SeuratTo10X.R,sha256=T2nJBTwOe12AIKC2FZsMSv6xx3s-67CYZokpz5wshqY,2679
+biopipen/scripts/scrna/SeuratTo10X.R,sha256=1mh1R0Qlo1iHVrpMLUXyLDOA92QKJ4GzTMURTFRqsWg,901
+biopipen/scripts/scrna/Subset10X.R,sha256=T2nJBTwOe12AIKC2FZsMSv6xx3s-67CYZokpz5wshqY,2679
 biopipen/scripts/scrna/TopExpressingGenes.R,sha256=kXMCYHVytgVgO_Uq66fKKFCFV2PPXE8VREy_0yYPLpU,7475
-biopipen/scripts/scrna/Write10X.R,sha256=OMhXvJwvaH-aWsMpijKrvXQVabc1qUu5ZEwiLAhkDeY,285
 biopipen/scripts/scrna/celltypist-wrapper.py,sha256=f5M8f4rU5nC7l17RS0YVmUPpLLz4D6PIcgWtA77UExM,1722
 biopipen/scripts/scrna/sctype.R,sha256=NaUJkABwF5G1UVm1CCtcMbwLSj94Mo24mbYCKFqo1Bw,6524
 biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R,sha256=b77yG5FeRse3bNfFgLIEYGHNZzydAn1OeyyR_n5Ju60,4790
 biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R,sha256=ic8Fy8QqYDGh_izmvZVJ3KL66podg_CSF5ITL3FZsvo,5196
 biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R,sha256=95DLX1Rz0tobOuDZ8V9YdGgO0KiNthhccoeeOK21tno,16216
 biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R,sha256=rQ9iwGh9FNRZlJJzM4QItdyXmebfzLAq05ZAjb1kGUw,9831
+biopipen/scripts/snp/MatrixEQTL.R,sha256=zOR_mhn_sUXuxqgV82TPvDp-E1i5aJVA45QixyRP8no,5930
 biopipen/scripts/snp/PlinkSimulation.py,sha256=mSSoGGG6sbEPBcUGdHgbebUrg4DiHeyNyc7jLPjV5pY,4169
 biopipen/scripts/stats/ChowTest.R,sha256=4p7NULmfOZSfeBSQ04els0h3cXOK5yeCJJ4-gEBPOGk,3617
 biopipen/scripts/stats/DiffCoexpr.R,sha256=5hQDV2_7bKdKUsOGMZUa0GS5rc7kFspxonNyFEPmtbc,4516
@@ -238,7 +240,7 @@ biopipen/utils/reference.py,sha256=6bPSwQa-GiDfr7xLR9a5T64Ey40y24yn3QfQ5wDFZkU,4
 biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
 biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
 biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
-biopipen-0.27.3.dist-info/METADATA,sha256=4DeAjhBZHdg7pZXoTNPiQkzGsx6hSm7VwgWgyYKMY18,920
-biopipen-0.27.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-biopipen-0.27.3.dist-info/entry_points.txt,sha256=wu70aoBcv1UahVbB_5237MY-9M9_mzqmWjDD-oi3yz0,621
-biopipen-0.27.3.dist-info/RECORD,,
+biopipen-0.27.5.dist-info/METADATA,sha256=V-P-6i9I4Q1OE-KDY39Nkki_Iv_5jpP-65kxeUuCc88,882
+biopipen-0.27.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+biopipen-0.27.5.dist-info/entry_points.txt,sha256=wu70aoBcv1UahVbB_5237MY-9M9_mzqmWjDD-oi3yz0,621
+biopipen-0.27.5.dist-info/RECORD,,

biopipen/scripts/scrna/Write10X.R DELETED Viewed

@@ -1,11 +0,0 @@
-library(DropletUtils)
-library(Seurat)
-srtobjfile = {{in.srtobj | r}}
-outdir = {{out.outdir | r}}
-version = {{envs.version | r}}
-srtobj = readRDS(srtobjfile)
-counts = GetAssayData(object = srtobj, layer = "counts")
-write10xCounts(outdir, counts, version = version, overwrite = TRUE)

{biopipen-0.27.3.dist-info → biopipen-0.27.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{biopipen-0.27.3.dist-info → biopipen-0.27.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

biopipen 0.27.3__py3-none-any.whl → 0.27.5__py3-none-any.whl

Potentially problematic release.

biopipen 0.27.3py3-none-any.whl → 0.27.5py3-none-any.whl