PyPI - biopipen - Versions diffs - 0.27.1__tar.gz → 0.27.3__tar.gz - Mend

biopipen 0.27.1tar.gz → 0.27.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (245) hide show

{biopipen-0.27.1 → biopipen-0.27.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: biopipen
-Version: 0.27.1
+Version: 0.27.3
 Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
 License: MIT
 Author: pwwang
@@ -17,6 +17,7 @@ Requires-Dist: datar[pandas] (>=0.15.6,<0.16.0)
 Requires-Dist: pipen-board[report] (>=0.15,<0.16)
 Requires-Dist: pipen-cli-run (>=0.13,<0.14)
 Requires-Dist: pipen-filters (>=0.12,<0.13)
-Requires-Dist: pipen-poplog (>=0.1,<0.2)
+Requires-Dist: pipen-poplog (>=0.1.2,<0.2.0)
 Requires-Dist: pipen-runinfo (>=0.6,<0.7) ; extra == "runinfo"
 Requires-Dist: pipen-verbose (>=0.11,<0.12)
+Requires-Dist: pyyaml-include (==1.*)

biopipen-0.27.3/biopipen/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.27.3"

{biopipen-0.27.1 → biopipen-0.27.3}/biopipen/ns/scrna.py RENAMED Viewed

@@ -201,6 +201,13 @@ class SeuratPreparing(Proc):
                 - scvi: Same as `scVIIntegration`.
             - <more>: See <https://satijalab.org/seurat/reference/integratelayers>
+        DoubletFinder (ns): Arguments to run [`DoubletFinder`](https://github.com/chris-mcginnis-ucsf/DoubletFinder).
+            See also <https://demultiplexing-doublet-detecting-docs.readthedocs.io/en/latest/DoubletFinder.html>.
+            To disable `DoubletFinder`, set `envs.DoubletFinder` to `None` or `False`; or set `pcs` to `0`.
+            - PCs (type=int): Number of PCs to use for 'doubletFinder' function.
+            - doublets (type=float): Number of expected doublets as a proportion of the pool size.
+            - pN (type=float): Number of doublets to simulate as a proportion of the pool size.
     Requires:
         r-seurat:
             - check: {{proc.lang}} <(echo "library(Seurat)")
@@ -227,6 +234,7 @@ class SeuratPreparing(Proc):
             "min_cells": 5,
         },
         "IntegrateLayers": {"method": "harmony"},
+        "DoubletFinder": {"PCs": 0, "pN": 0.25, "doublets": 0.075},
     }
     script = "file://../scripts/scrna/SeuratPreparing.R"
     plugin_opts = {

{biopipen-0.27.1 → biopipen-0.27.3}/biopipen/ns/snp.py RENAMED Viewed

@@ -7,12 +7,15 @@ from ..core.config import config
 class PlinkSimulation(Proc):
     """Simulate SNPs using PLINK v1.9
-    See also <https://www.cog-genomics.org/plink/1.9/input#simulate>.
+    See also <https://www.cog-genomics.org/plink/1.9/input#simulate> and
+    <https://pwwang.github.io/biopipen/api/biopipen.ns.snp/#biopipen.ns.snp.PlinkSimulation>
     Input:
-        nsnps: Number of SNPs to simulate
-        ncases: Number of cases to simulate
-        nctrls: Number of controls to simulate
+        configfile: Configuration file containing the parameters for the simulation.
+            The configuration file (in toml, yaml or json format) should contain a
+            dictionary of parameters.  The parameters are listed in `envs` except
+            `ncores`, which is used for parallelization. You can set parameters
+            in `envs` and override them in the configuration file.
     Output:
         outdir: Output directory containing the simulated data
@@ -21,9 +24,11 @@ class PlinkSimulation(Proc):
             SNPs and columns representing samples.
     Envs:
+        nsnps (type=int): Number of SNPs to simulate
+        ncases (type=int): Number of cases to simulate
+        nctrls (type=int): Number of controls to simulate
         plink: Path to PLINK v1.9
-        seed (type=int): Random seed.
-            If not set, seed will not be set.
+        seed (type=int): Random seed. If not set, seed will not be set.
         label: Prefix label for the SNPs.
         prevalence  (type=float): Disease prevalence.
         minfreq (type=float): Minimum allele frequency.
@@ -41,19 +46,17 @@ class PlinkSimulation(Proc):
             This only affects the sample names in the genotype matrix file
             (`out.gtmat`).
     """
-    input = "nsnps:var, ncases:var, nctrls:var"
+    input = "configfile:file"
     output = [
-        (
-            "outdir:dir:{{in.nsnps | int}}_"
-            "{{in.ncases | int}}xcases_{{in.nctrls | int}}xctrls.plink_sim"
-        ),
-        (
-            "gtmat:file:{{in.nsnps | int}}_"
-            "{{in.ncases | int}}xcases_{{in.nctrls | int}}xctrls.plink_sim/gtmat.txt"
-        ),
+        "outdir:dir:{{in.configfile | stem}}.plink_sim",
+        "gtmat:file:{{in.configfile | stem}}.plink_sim/"
+        "{{in.configfile | stem}}-gtmat.txt",
     ]
     lang = config.lang.python
     envs = {
+        "nsnps": None,
+        "ncases": None,
+        "nctrls": None,
         "plink": config.exe.plink,
         "seed": None,
         "label": "SNP",

{biopipen-0.27.1 → biopipen-0.27.3}/biopipen/ns/tcr.py RENAMED Viewed

@@ -983,6 +983,7 @@ class CloneResidency(Proc):
             before calculating the clone residency. For example, `Clones > 1` to filter
             out singletons.
         prefix: The prefix of the cell barcodes in the `Seurat` object.
+        upset_ymax: The maximum value of the y-axis in the upset bar plots.
         upset_trans: The transformation to apply to the y axis of upset bar plots.
             For example, `log10` or `sqrt`. If not specified, the y axis will be
             plotted as is. Note that the position of the bar plots will be dodged
@@ -1007,6 +1008,7 @@ class CloneResidency(Proc):
         "mutaters": {},
         "subset": None,
         "prefix": "{Sample}_",
+        "upset_ymax": None,
         "upset_trans": None,
         "cases": {},
     }
@@ -1595,3 +1597,74 @@ class TESSA(Proc):
     }
     script = "file://../scripts/tcr/TESSA.R"
     plugin_opts = {"report": "file://../reports/tcr/TESSA.svelte"}
+class TCRDock(Proc):
+    """Using TCRDock to predict the structure of MHC-peptide-TCR complexes
+    See <https://github.com/phbradley/TCRdock>.
+    Input:
+        configfile: The config file for TCRDock
+            It's should be a toml file with the keys listed in `envs`, including
+            `organism`, `mhc_class`, `mhc`, `peptide`, `va`, `ja`, `vb`, `jb`,
+            `cdr3a`, and `cdr3b`.
+            The values will overwrite the values in `envs`.
+    Output:
+        outdir: The output directory containing the results
+    Envs:
+        organism: The organism of the TCR, peptide and MHC
+        mhc_class (type=int): The MHC class, either `1` or `2`
+        mhc: The MHC allele, e.g., `A*02:01`
+        peptide: The peptide sequence
+        va: The V alpha gene
+        ja: The J alpha gene
+        vb: The V beta gene
+        jb: The J beta gene
+        cdr3a: The CDR3 alpha sequence
+        cdr3b: The CDR3 beta sequence
+        python: The path of python with dependencies for `tcrdock` installed.
+            If not provided, `TCRDock.lang` will be used (the same interpreter
+            used for the wrapper script).
+            It could also be a list to specify, for example, a python in a conda
+            environment (e.g., `["conda", "run", "-n", "myenv", "python"]`).
+        tmpdir: The temporary directory used to clone the `tcrdock` source code if
+            `envs.tcrdock` is not provided.
+        tcrdock: The path to the `tcrdock` source code repo.
+            You need to clone the source code from the github repository.
+            <https://github.com/phbradley/TCRdock> at
+            revision c5a7af42eeb0c2a4492a4d4fe803f1f9aafb6193 at main branch.
+            You also have to run `download_blast.py` after cloning to download the
+            blast database in the directory.
+            If not provided, we will clone the source code to the `envs.tmpdir`
+            directory and run the `download_blast.py` script.
+        model_name: The model name to use
+        model_file: The model file to use.
+            If provided as a relative path, it should be relative to the
+            `<envs.data_dir>/params/`, otherwise, it should be the full path.
+        data_dir: The data directory that contains the model files.
+            The model files should be in the `params` subdirectory.
+    """
+    input = "configfile:file"
+    output = "outdir:dir:{{in.configfile | stem}}.tcrdock"
+    lang = config.lang.python
+    envs = {
+        "tcrdock": None,
+        "organism": "human",
+        "mhc_class": 1,
+        "mhc": "A*02:01",
+        "peptide": None,
+        "va": None,
+        "ja": None,
+        "vb": None,
+        "jb": None,
+        "cdr3a": None,
+        "cdr3b": None,
+        "python": None,
+        "model_name": "model_2_ptm_ft4",
+        "model_file": "tcrpmhc_run4_af_mhc_params_891.pkl",
+        "data_dir": None,
+    }
+    script = "file://../scripts/tcr/TCRDock.py"

{biopipen-0.27.1 → biopipen-0.27.3}/biopipen/scripts/scrna/MarkersFinder.R RENAMED Viewed

@@ -120,7 +120,7 @@ expand_each <- function(name, case) {
                     pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
             }
             for (each in eachs) {
-                by <- make.names(paste0(".", name, "_", case$each,"_", each))
+                by <- make.names(paste0("..", name, "_", case$each,"_", each))
                 srtobj@meta.data <<- srtobj@meta.data %>% mutate(
                     !!sym(by) := if_else(
                         !!sym(case$each) == each,
@@ -364,6 +364,16 @@ add_case_report <- function(info, sigmarkers, siggenes) {
     }
 }
+ensure_sobj <- function(expr, allow_empty) {
+    tryCatch({ expr }, error = function(e) {
+        if (allow_empty) {
+            log_warn("  Ignoring this case: {e$message}")
+            return(NULL)
+        } else {
+            stop(e)
+        }
+    })
+}
 do_case_findall <- function(casename) {
     # casename
@@ -382,10 +392,17 @@ do_case_findall <- function(casename) {
     # args$min.cells.group <- args$min.cells.group %||% 1
     # args$min.cells.feature <- args$min.cells.feature %||% 1
     # args$min.pct <- args$min.pct %||% 0
+    allow_empty = startsWith(case$group.by, "..")
     if (!is.null(case$subset)) {
-        args$object <- srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
+        args$object <- ensure_sobj({
+            srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
+        }, allow_empty)
+        if (is.null(args$object)) { return() }
     } else {
-        args$object <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
+        args$object <- ensure_sobj({
+            srtobj %>% filter(!is.na(!!sym(case$group.by)))
+        }, allow_empty)
+        if (is.null(args$object)) { return() }
     }
     Idents(args$object) <- case$group.by
@@ -486,11 +503,19 @@ do_case <- function(casename) {
     # sigmarkers
     # rest
     args <- case$rest
+    allow_empty = startsWith(case$group.by, "..")
     if (!is.null(case$subset)) {
-        args$object <- srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
+        args$object <- ensure_sobj({
+            srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
+        }, allow_empty)
+        if (is.null(args$object)) { return() }
     } else {
-        args$object <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
+        args$object <- ensure_sobj({
+            srtobj %>% filter(!is.na(!!sym(case$group.by)))
+        }, allow_empty)
+        if (is.null(args$object)) { return() }
     }
     args$assay <- case$assay
     args$group.by <- case$group.by
     args$ident.1 <- case$ident.1

{biopipen-0.27.1 → biopipen-0.27.3}/biopipen/scripts/scrna/MetaMarkers.R RENAMED Viewed

@@ -76,7 +76,7 @@ expand_each <- function(name, case) {
                 pull(case$each) %>% unique() %>% na.omit()
         }
         for (each in eachs) {
-            by = make.names(paste0(".", name, "_", case$each, "_", each))
+            by = make.names(paste0("..", name, "_", case$each, "_", each))
             idents <- case$idents
             if (is.null(idents) || length(idents) == 0) {
                 srtobj@meta.data = srtobj@meta.data %>%
@@ -169,17 +169,31 @@ do_enrich <- function(info, markers, sig) {
     }
 }
+ensure_sobj <- function(expr, allow_empty) {
+    tryCatch({ expr }, error = function(e) {
+        if (allow_empty) {
+            log_warn("  Ignoring this case: {e$message}")
+            return(NULL)
+        } else {
+            stop(e)
+        }
+    })
+}
 do_case <- function(casename) {
     log_info("- Dealing with case: {casename} ...")
     info <- casename_info(casename, cases, outdir, create = TRUE)
     case <- cases[[casename]]
+    allow_empty = startsWith(case$group_by, "..")
     if (sum(!is.na(srtobj@meta.data[[case$group_by]])) == 0) {
         msg = "Not enough cells to run tests."
     } else {
-        sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)))
+        sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group_by))) }, allow_empty)
+        if (is.null(sobj)) { return() }
         if (!is.null(case$subset)) {
-            sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)), !!parse_expr(case$subset))
+            sobj <- ensure_sobj({ sobj %>% filter(!!parse_expr(case$subset)) }, allow_empty)
+            if (is.null(sobj)) { return() }
         }
         df <- tryCatch({
                 GetAssayData(sobj, layer = "data")

{biopipen-0.27.1 → biopipen-0.27.3}/biopipen/scripts/scrna/RadarPlots.R RENAMED Viewed

@@ -74,10 +74,10 @@ expand_each <- function(name,  case) {
         }
     } else {
         if (is.null(case$subset)) {
-            eachs <- srtobj@meta.data %>%
+            eachs <- meta %>%
                 pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
         } else {
-            eachs <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
+            eachs <- meta %>% filter(!!parse_expr(case$subset)) %>%
                 pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
         }
         for (each in eachs) {

{biopipen-0.27.1 → biopipen-0.27.3}/biopipen/scripts/scrna/ScFGSEA.R RENAMED Viewed

@@ -72,7 +72,7 @@ expand_each <- function(name, case) {
                 pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
         }
         for (each in eachs) {
-            by <- make.names(paste0(".", name, "_", case$each,"_", each))
+            by <- make.names(paste0("..", name, "_", case$each,"_", each))
             srtobj@meta.data <<- srtobj@meta.data %>%
                 mutate(!!sym(by) := if_else(
                     !!sym(case$each) == each,
@@ -97,18 +97,35 @@ log_info("- Expanding cases...")
 cases <- expand_cases(cases, defaults, expand_each)
+ensure_sobj <- function(expr, allow_empty) {
+    tryCatch({ expr }, error = function(e) {
+        if (allow_empty) {
+            log_warn("  Ignoring this case: {e$message}")
+            return(NULL)
+        } else {
+            stop(e)
+        }
+    })
+}
 do_case <- function(name, case) {
     log_info("- Handling case: {name} ...")
     info <- casename_info(name, cases, outdir, create = TRUE)
+    allow_empty = startsWith(case$group.by, "..")
     # prepare expression matrix
     log_info("  Preparing expression matrix...")
-    sobj <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
+    sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group.by))) }, allow_empty)
+    if (is.null(sobj)) { return() }
     if (!is.null(case$subset)) {
-        sobj <- sobj %>% filter(!!!parse_exprs(case$subset))
+        sobj <- ensure_sobj({ sobj %>% filter(!!!parse_exprs(case$subset)) }, allow_empty)
+        if (is.null(sobj)) { return() }
     }
     if (!is.null(case$ident.2)) {
-        sobj <- sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2))
+        sobj <- ensure_sobj({ sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2)) }, allow_empty)
+        if (is.null(sobj)) { return() }
     }
     allclasses <- sobj@meta.data[, case$group.by, drop = TRUE]

{biopipen-0.27.1 → biopipen-0.27.3}/biopipen/scripts/scrna/SeuratPreparing.R RENAMED Viewed

@@ -13,6 +13,7 @@ envs = {{envs | r: todot = "-", skip = 1}}
 set.seed(8525)
 options(future.globals.maxSize = 80000 * 1024^2)
+options(future.rng.onMisuse="ignore")
 options(Seurat.object.assay.version = "v5")
 plan(strategy = "multicore", workers = envs$ncores)
@@ -342,7 +343,7 @@ RunPCAArgs$object <- sobj
 sobj <- do_call(RunPCA, RunPCAArgs)
 if (!envs$no_integration) {
-    log_info("- Running IntegrateLayers ...")
+    log_info("- Running IntegrateLayers (method = {envs$IntegrateLayers$method}) ...")
     IntegrateLayersArgs <- envs$IntegrateLayers
     method <- IntegrateLayersArgs$method
     if (!is.null(IntegrateLayersArgs$reference) && is.character(IntegrateLayersArgs$reference)) {
@@ -383,6 +384,117 @@ if (!envs$use_sct) {
     sobj <- JoinLayers(sobj)
 }
+if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletFinder$PCs > 0) {
+    library(DoubletFinder)
+    log_info("Running DoubletFinder ...")
+    log_info("- Preparing Seurat object ...")
+    # More controls from envs?
+    sobj <- FindNeighbors(sobj, dims = 1:envs$DoubletFinder$PCs)
+    sobj <- FindClusters(sobj)
+    log_info("- pK Indentification ...")
+    sweep.res.list <- paramSweep(
+        sobj,
+        PCs = 1:envs$DoubletFinder$PCs,
+        sct = envs$use_sct,
+        num.cores = envs$ncores
+    )
+    sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
+    bcmvn <- find.pK(sweep.stats)
+    bcmvn$Selected <- bcmvn$pK == bcmvn$pK[which.max(bcmvn$BCmetric)[1]]
+    plot <- ggplot(bcmvn, aes(x = pK, y = BCmetric, color = Selected)) +
+        geom_point() +
+        # rotate x axis labels
+        theme(axis.text.x = element_text(angle = 90, hjust = 1))
+    ggsave(plot, filename = file.path(plotsdir, "pK_BCmetric.png"))
+    pK <- bcmvn$pK[which.max(bcmvn$BCmetric)[1]]
+    pK <- as.numeric(as.character(pK))
+    pN <- envs$DoubletFinder$pN
+    log_info("- Homotypic Doublet Proportion Estimate ...")
+    homotypic.prop <- modelHomotypic(Idents(sobj))
+    nExp_poi <- round(nrow(sobj@meta.data) * envs$DoubletFinder$doublets)
+    nExp_poi.adj <- round(nExp_poi * (1 - homotypic.prop))
+    log_info("- Running DoubletFinder ...")
+    sobj <- doubletFinder(
+        sobj,
+        PCs = 1:envs$DoubletFinder$PCs,
+        pN = pN,
+        pK = pK,
+        nExp = nExp_poi.adj,
+        reuse.pANN = FALSE,
+        sct = envs$use_sct
+    )
+    pANN_col <- paste0("pANN_", pN, "_", pK)
+    pANN_col <- colnames(sobj@meta.data)[grepl(pANN_col, colnames(sobj@meta.data))]
+    DF_col <- paste0("DF.classifications_", pN, "_", pK)
+    DF_col <- colnames(sobj@meta.data)[grepl(DF_col, colnames(sobj@meta.data))]
+    doublets <- as.data.frame(
+        cbind(
+            colnames(sobj),
+            sobj@meta.data[, pANN_col],
+            sobj@meta.data[, DF_col]
+        )
+    )
+    colnames(doublets) <-  c("Barcode","DoubletFinder_score","DoubletFinder_DropletType")
+    write.table(
+        doublets,
+        file.path(joboutdir, "DoubletFinder_doublets_singlets.txt"),
+        row.names = FALSE,
+        quote = FALSE,
+        sep = "\t"
+    )
+    summary <- as.data.frame(table(doublets$DoubletFinder_DropletType))
+    colnames(summary) <- c("Classification", "Droplet_N")
+    write.table(
+        summary,
+        file.path(joboutdir, "DoubletFinder_summary.txt"),
+        row.names = FALSE,
+        quote = FALSE,
+        sep = "\t"
+    )
+    # Do a dimplot
+    log_info("- Plotting dimension reduction ...")
+    dimp <- DimPlot(
+        sobj, group.by = DF_col, order = "Doublet",
+        cols = c("#333333", "#FF3333"), pt.size = 0.8, alpha = 0.5)
+    ggsave(dimp, filename = file.path(plotsdir, "DoubletFinder_dimplot.png"))
+    log_info("- Filtering doublets ...")
+    sobj <- subset(sobj, cells = doublets$Barcode[doublets$DoubletFinder_DropletType == "Singlet"])
+    add_report(
+        list(
+            kind = "descr",
+            content = "The table contains the number of cells classified as singlets and doublets."
+        ),
+        list(
+            kind = "table",
+            data = list(path = file.path(joboutdir, "DoubletFinder_summary.txt"))
+        ),
+        h1 = "DoubletFinder Results",
+        h2 = "The DoubletFinder Summary"
+    )
+    add_report(
+        list(
+            name = "pK vs BCmetric",
+            src = file.path(plotsdir, "pK_BCmetric.png")
+        ),
+        list(
+            name = "Dimension Reduction Plot",
+            src = file.path(plotsdir, "DoubletFinder_dimplot.png")
+        ),
+        ui = "table_of_images",
+        h1 = "DoubletFinder Results",
+        h2 = "Plots"
+    )
+}
 log_info("Saving filtered seurat object ...")
 saveRDS(sobj, rdsfile)

biopipen-0.27.3/biopipen/scripts/snp/PlinkSimulation.py ADDED Viewed

@@ -0,0 +1,124 @@
+from pathlib import Path
+from multiprocessing import Pool
+from slugify import slugify
+from simpleconf import Config
+from biopipen.utils.misc import logger, run_command, dict_to_cli_args
+configfile = {{in.configfile | repr}}  # pyright: ignore # noqa: E999
+outdir = {{out.outdir | repr}}  # pyright: ignore
+gtmatfile = {{out.gtmat | repr}}  # pyright: ignore
+config = Config.load(configfile)
+default_nsnps = {{envs.nsnps | repr}}  # pyright: ignore
+default_ncases = {{envs.ncases | repr}}  # pyright: ignore
+default_nctrls = {{envs.nctrls | repr}}  # pyright: ignore
+default_plink = {{envs.plink | repr}}  # pyright: ignore
+default_seed = {{envs.seed | repr}}  # pyright: ignore
+default_label = {{envs.label | repr}}  # pyright: ignore
+default_prevalence = {{envs.prevalence | repr}}  # pyright: ignore
+default_minfreq = {{envs.minfreq | repr}}  # pyright: ignore
+default_maxfreq = {{envs.maxfreq | repr}}  # pyright: ignore
+default_hetodds = {{envs.hetodds | repr}}  # pyright: ignore
+default_homodds = {{envs.homodds | repr}}  # pyright: ignore
+default_missing = {{envs.missing | repr}}  # pyright: ignore
+default_args = {{envs.args | repr}}  # pyright: ignore
+default_transpose_gtmat = {{envs.transpose_gtmat | repr}}  # pyright: ignore
+default_sample_prefix = {{envs.sample_prefix | repr}}  # pyright: ignore
+defaults = {
+    "nsnps": default_nsnps,
+    "ncases": default_ncases,
+    "nctrls": default_nctrls,
+    "plink": default_plink,
+    "seed": default_seed,
+    "label": default_label,
+    "prevalence": default_prevalence,
+    "minfreq": default_minfreq,
+    "maxfreq": default_maxfreq,
+    "hetodds": default_hetodds,
+    "homodds": default_homodds,
+    "missing": default_missing,
+    # "args": default_args,
+    "transpose_gtmat": default_transpose_gtmat,
+    "sample_prefix": default_sample_prefix,
+}
+def do_one_simulation(confitems):
+    args = default_args.copy()
+    args.update(confitems.pop("args", {}))
+    confs = defaults.copy()
+    confs.update(confitems)
+    transpose_gtmat = confs.pop("transpose_gtmat")
+    sample_prefix = confs.pop("sample_prefix")
+    logger.debug("  Generating parameters file")
+    params_file = Path(outdir) / "params.txt"
+    params_file.write_text(
+        f"{confs['nsnps']}\t{confs['label']}\t{confs['minfreq']}\t"
+        f"{confs['maxfreq']}\t{confs['hetodds']}\t{confs['homodds']}\n"
+    )
+    if confs.get('seed') is not None:
+        args["seed"] = confs['seed']
+    args["simulate"] = params_file
+    args["out"] = Path(outdir) / "sim_snps"
+    args["simulate-ncases"] = confs['ncases']
+    args["simulate-ncontrols"] = confs['nctrls']
+    args["simulate-prevalence"] = confs['prevalence']
+    args["simulate-missing"] = confs['missing']
+    cmd = [confs['plink']] + dict_to_cli_args(args)
+    logger.debug("  Running PLINK simulation ...")
+    run_command(cmd, fg=True)
+    # Transpose the genotype matrix
+    # CHR	SNP	(C)M	POS	COUNTED	ALT	per0_per0	per1_per1	per2_per2
+    # 1	SNP_0	0	1	D	d	1	0	1
+    # 1	SNP_1	0	2	d	D	0	1	0
+    # 1	SNP_2	0	3	d	D	0	0	0
+    # 1	SNP_3	0	4	d	D	0	0	0
+    # 1	SNP_4	0	5	D	d	1	2	1
+    cmd = [
+        confs['plink'],
+        "--recode",
+        "A" if transpose_gtmat else "A-transpose",
+        "tab",
+        "--bfile",
+        args["out"],
+        "--out",
+        gtmatfile + ".plink.recoded",
+    ]
+    logger.debug("- Recoding into genotype matrix ...")
+    run_command(cmd, fg=True)
+    logger.debug("  Saving genotype matrix ...")
+    ## transpose_gtmat = False
+    # SNP_COUNTED	per0_per0	per1_per1	per2_per2
+    # SNP_0_D	1	0	1
+    # SNP_1_d	0	1	0
+    # SNP_2_d	0	0	0
+    # SNP_3_d	0	0	0
+    # SNP_4_D	1	2	1
+    ## transpose_gtmat = True
+    # FID_IID SNP_0_D SNP_1_D SNP_2_D
+    # per0_per0 0 1 1
+    # per1_per1 0 2 0
+    # per2_per2 0 0 0
+    # per3_per3 1 1 0
+    # per4_per4 0 0 0
+    if transpose_gtmat:
+        cmd = f"cut -f1,2,7- {gtmatfile}.plink.recoded.raw | sed 's/\\t/_/'"
+    else:
+        cmd = f"cut -f2,5,7- {gtmatfile}.plink.recoded.traw | sed 's/\\t/_/'"
+    if sample_prefix:
+        cmd = f"{cmd} | sed 's/per[0-9]\\+_per/{sample_prefix}/g'"
+    cmd = f"{cmd} > {gtmatfile}"
+    run_command(cmd, fg=True)
+do_one_simulation(config)

{biopipen-0.27.1 → biopipen-0.27.3}/biopipen/scripts/stats/DiffCoexpr.R RENAMED Viewed

@@ -42,21 +42,21 @@ diffcoex_score <- function(group) {
     gvals <- unique(gdata[, group, drop = TRUE])
     if (length(gvals) < 2) {
-        log_warn("  Less than 2 groups in the input. Skipping ...")
+        log_debug("  Less than 2 groups in the input. Skipping ...")
         return(NULL)
     }
     rs <- lapply(gvals, function(gval) {
         samples <- rownames(gdata[gdata[[group]] == gval, , drop = FALSE])
         expr <- indata[samples, , drop = FALSE]
         if (length(samples) < 3) {
-            log_warn("  Less than 3 samples in one of the groups. Skipping ...")
+            log_debug("  Less than 3 samples in one of the groups. Skipping ...")
             return(NULL)
         }
         cor.pairs(as.matrix(expr), cor.method = method)
     })
     rs[sapply(rs, is.null)] <- NULL
     if (length(rs) < 2) {
-        log_warn("  Less than 2 groups with at least 3 samples. Skipping ...")
+        log_debug("  Less than 2 groups with at least 3 samples. Skipping ...")
         return(NULL)
     }
     N <- length(rs)

biopipen 0.27.1__tar.gz → 0.27.3__tar.gz

Potentially problematic release.

biopipen 0.27.1tar.gz → 0.27.3tar.gz