PyPI - biopipen - Versions diffs - 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.34.6py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +4 -0
biopipen/core/filters.py +1 -1
biopipen/core/testing.py +2 -1
biopipen/ns/cellranger.py +33 -3
biopipen/ns/regulatory.py +4 -0
biopipen/ns/scrna.py +548 -98
biopipen/ns/scrna_metabolic_landscape.py +4 -0
biopipen/ns/tcr.py +256 -16
biopipen/ns/web.py +5 -0
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +9 -9
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +9 -8
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +9 -9
biopipen/reports/tcr/ClonalStats.svelte +1 -0
biopipen/scripts/cellranger/CellRangerCount.py +55 -11
biopipen/scripts/cellranger/CellRangerVdj.py +54 -8
biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
biopipen/scripts/regulatory/motifs-common.R +3 -2
biopipen/scripts/scrna/AnnData2Seurat.R +2 -1
biopipen/scripts/scrna/CellCellCommunication.py +26 -14
biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +27 -36
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +42 -26
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +11 -13
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +5 -8
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +5 -8
biopipen/scripts/scrna/CellTypeAnnotation.R +26 -3
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +128 -30
biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
biopipen/scripts/scrna/PseudoBulkDEG.R +113 -27
biopipen/scripts/scrna/ScFGSEA.R +23 -26
biopipen/scripts/scrna/ScVelo.py +20 -8
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -1
biopipen/scripts/scrna/SeuratClustering.R +5 -1
biopipen/scripts/scrna/SeuratMap2Ref.R +1 -2
biopipen/scripts/scrna/SeuratPreparing.R +19 -11
biopipen/scripts/scrna/SeuratSubClustering.R +1 -1
biopipen/scripts/scrna/Slingshot.R +2 -4
biopipen/scripts/scrna/TopExpressingGenes.R +1 -4
biopipen/scripts/scrna/celltypist-wrapper.py +140 -4
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +18 -1
biopipen/scripts/tcr/{TCRClustering.R → CDR3Clustering.R} +63 -23
biopipen/scripts/tcr/ClonalStats.R +76 -35
biopipen/utils/misc.py +104 -9
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/METADATA +5 -2
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/RECORD +55 -53
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
biopipen/utils/common_docstrs.py +0 -103
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +0 -0

biopipen/scripts/cellranger/CellRangerVdj.py CHANGED Viewed

@@ -1,19 +1,24 @@
-import uuid
+import hashlib
+import shutil
 import re
+from contextlib import suppress
 from pathlib import Path, PosixPath  # noqa: F401
 from biopipen.utils.misc import run_command
 fastqs: list[Path] = {{in.fastqs | each: as_path}}  # pyright: ignore  # noqa
-outdir: str = {{out.outdir | quote}}  # pyright: ignore
+outdir: Path = Path({{out.outdir | quote}})  # pyright: ignore
 id: str = {{out.outdir | basename | quote}}  # pyright: ignore
 cellranger: str = {{envs.cellranger | quote}}  # pyright: ignore
 tmpdir = Path({{envs.tmpdir | quote}})  # pyright: ignore
 ref: str = {{envs.ref | quote}}  # pyright: ignore
 ncores: int = {{envs.ncores | int}}  # pyright: ignore
+outdir_is_mounted: bool = {{envs.outdir_is_mounted | repr}}  # pyright: ignore
+copy_outs_only: bool = {{envs.copy_outs_only | repr}}  # pyright: ignore
 # create a temporary unique directory to store the soft-linked fastq files
-fastqdir = tmpdir / f"cellranger_count_{uuid.uuid4()}"
+uid = hashlib.md5(str(fastqs).encode()).hexdigest()[:8]
+fastqdir = tmpdir / f"cellranger_count_{uid}"
 fastqdir.mkdir(parents=True, exist_ok=True)
 if len(fastqs) == 1 and fastqs[0].is_dir():
     fastqs = list(fastqs[0].glob("*.fastq.gz"))
@@ -23,7 +28,7 @@ for fastq in fastqs:
     fastq = Path(fastq)
     (fastqdir / fastq.name).symlink_to(fastq)
-other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['cellranger', 'reference', 'ref', 'tmpdir', 'id', 'ncores']}}  # pyright: ignore
+other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['cellranger', 'reference', 'ref', 'tmpdir', 'id', 'ncores', 'outdir_is_mounted', 'copy_outs_only']}}  # pyright: ignore
 command = [
     cellranger,
@@ -40,12 +45,26 @@ command = [
     *other_args,
 ]
-run_command(command, fg=True, cwd=str(Path(outdir).parent))
+version: str = run_command([cellranger, "--version"], stdout = "RETURN")  # type: ignore
+version = version.replace("cellranger", "").replace("-", "").strip()  # type: ignore
+print(f"# Detected cellranger version: {version}")
-web_summary_html = Path(outdir) / "outs" / "web_summary.html"
+if outdir_is_mounted:
+    print("# Using mounted outdir, redirecting cellranger output to a local tmpdir")
+    local_outdir = tmpdir / f"{outdir.name}-{uid}" / id
+    if local_outdir.parent.exists():
+        shutil.rmtree(local_outdir.parent)
+    local_outdir.parent.mkdir(parents=True, exist_ok=True)
+    odir = local_outdir
+else:
+    odir = outdir
+run_command(command, fg=True, cwd=str(odir.parent))
+web_summary_html = odir / "outs" / "web_summary.html"
 if not web_summary_html.exists():
     raise RuntimeError(
-        f"web_summary.html does not exist in {outdir}/outs. "
+        f"web_summary.html does not exist in {odir}/outs. "
         "cellranger vdj failed."
     )
@@ -53,7 +72,7 @@ if not web_summary_html.exists():
 # to void vscode live server breaking the page by injecting some code
 print("# Modify web_summary.html to move javascript to a separate file")
 try:
-    web_summary_js = Path(outdir) / "outs" / "web_summary.js"
+    web_summary_js = odir / "outs" / "web_summary.js"
     web_summary_content = web_summary_html.read_text()
     regex = re.compile(r"<script>(.+)</script>", re.DOTALL)
     web_summary_html.write_text(regex.sub(
@@ -64,3 +83,30 @@ try:
 except Exception as e:
     print(f"Error modifying web_summary.html: {e}")
     raise e
+# If using local tmpdir for output, move results to the final outdir
+if outdir_is_mounted:
+    print("# Copy results back to outdir")
+    if outdir.exists():
+        shutil.rmtree(outdir)
+    if copy_outs_only:
+        outdir.mkdir(parents=True, exist_ok=True)
+        with suppress(Exception):
+            # Some files may be failed to copy due to permission issues
+            # But the contents are actually copied
+            shutil.copytree(odir / "outs", outdir / "outs")
+    else:
+        with suppress(Exception):
+            shutil.copytree(local_outdir, outdir)  # type: ignore
+    # Make sure essential files exist
+    web_summary_html = outdir / "outs" / "web_summary.html"
+    web_summary_js = outdir / "outs" / "web_summary.js"
+    filtered_annotations_csv = outdir / "outs" / "filtered_contig_annotations.csv"
+    for f in [web_summary_html, web_summary_js, filtered_annotations_csv]:
+        if not f.exists():
+            raise RuntimeError(
+                f"{f} does not exist in {outdir}/outs. "
+                "Copying results back from tmpdir failed."
+            )

biopipen/scripts/regulatory/MotifAffinityTest.R CHANGED Viewed

@@ -14,6 +14,7 @@ bcftools <- {{envs.bcftools | r}}
 genome <- {{envs.genome | r}}
 motif_col <- {{envs.motif_col | r}}
 regulator_col <- {{envs.regulator_col | r}}
+var_col <- {{envs.var_col | r}}
 notfound <- {{envs.notfound | r}}
 motifdb <- {{envs.motifdb | r}}
 regmotifs <- {{envs.regmotifs | r}}
@@ -21,6 +22,7 @@ devpars <- {{envs.devpars | r}}
 plot_nvars <- {{envs.plot_nvars | r}}
 plots <- {{envs.plots | r}}
 cutoff <- {{envs.cutoff | r}}
+set.seed(8525)
 if (is.null(motifdb) || !file.exists(motifdb)) {
     stop("Motif database (envs.motifdb) is required and must exist")
@@ -47,10 +49,21 @@ log <- get_logger()
 log$info("Reading input regulator/motif file ...")
 in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
 log$info("Ensuring motifs and regulators in the input data ...")
-in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
+in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, var_col, regmotifs, notfound = notfound)
 genome_pkg <- get_genome_pkg(genome)
+motif_var_pairs <- NULL
+if (!is.null(var_col)) {
+    log$info("Obtaining motif-variant pairs to test ...")
+    if (!var_col %in% colnames(in_motifs)) {
+        stop("Variant column (envs.var_col) not found in the input motif file")
+    }
+    motif_var_pairs <- unique(paste0(in_motifs[[motif_col]], " // ", in_motifs[[var_col]]))
+}
 log$info("Reading variant file ...")
 if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
     log$info("Converting VCF file to BED file ...")
@@ -77,10 +90,13 @@ mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfou
 tool <- tolower(tool)
 tool <- match.arg(tool, c("motifbreakr", "atsnp"))
-if (tool == "motifbreakr") {
+{% if envs.tool == "motifbreakr" %}
     motifbreakr_args <- {{envs.motifbreakr_args | r}}
     {% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_MotifBreakR.R" %}
-} else {  # atsnp
-    atsnp_args <- {{envs.atsnp_args | r}}
+{% else %}
+    atsnp_args <- list_update(
+        list(padj_cutoff = TRUE, padj = "BH", p = "Pval_diff"),
+        {{envs.atsnp_args | r}}
+    )
     {% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_AtSNP.R" %}
-}
+{% endif %}

biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R CHANGED Viewed

@@ -46,6 +46,13 @@ atsnp_result <- ComputePValues(
     testing.mc = TRUE
 )
+if (!is.null(motif_var_pairs)) {
+    log$info("Filtering motif-variant pairs ...")
+    atsnp_result$motifs_vars <- paste0(atsnp_result$motif, " // ", atsnp_result$snpid)
+    atsnp_result <- atsnp_result[atsnp_result$motifs_vars %in% motif_var_pairs, , drop = FALSE]
+    atsnp_result$motifs_vars <- NULL
+}
 padj_col <- paste0(atsnp_args$p, "_adj")
 atsnp_result[[padj_col]] <- p.adjust(atsnp_result[[atsnp_args$p]], method = atsnp_args$padj)
 cutoff_col <- if (atsnp_args$padj_cutoff) padj_col else atsnp_args$p
@@ -87,7 +94,8 @@ write.table(
 log$info("Plotting variants ...")
 # Convert result to GRanges object
-atsnp_result$alleleDiff <- -atsnp_result[[cutoff_col]]
+atsnp_result$alleleDiff <- -log10(atsnp_result[[cutoff_col]])
+atsnp_result <- atsnp_result[order(-atsnp_result$alleleDiff), , drop = FALSE]
 atsnp_result$effect <- "strong"
 atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
 atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
@@ -96,7 +104,6 @@ attributes(atsnp_result)$genome.package <- genome_pkg
 attributes(atsnp_result)$motifs <- mdb
 if (is.null(plots) || length(plots) == 0) {
-    atsnp_result <- atsnp_result[order(-abs(atsnp_result$alleleDiff)), , drop = FALSE]
     atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
     variants <- unique(atsnp_result$SNP_id)
 } else {

biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R CHANGED Viewed

@@ -50,6 +50,7 @@ results <- motifbreakR(
 log$info("Calculating p values ...")
 results <- calculatePvalue(results)
+results$.id <- 1:length(results)
 results_to_save <- as.data.frame(unname(results))
 results_to_save$motifPos <- lapply(results_to_save$motifPos, function(x) paste(x, collapse = ","))
 results_to_save$altPos <- lapply(results_to_save$altPos, function(x) paste(x, collapse = ","))
@@ -60,20 +61,28 @@ if (!is.null(regulator_col)) {
         drop = TRUE
     ]
 }
-results_to_save <- apply(results_to_save, 2, as.character)
+results_to_save <- as.data.frame(apply(results_to_save, 2, as.character))
+if (!is.null(motif_var_pairs)) {
+    log$info("Filtering motif-variant pairs ...")
+    results_to_save$motifs_vars <- paste0(results_to_save$providerId, " // ", results_to_save$SNP_id)
+    results_to_save <- results_to_save[results_to_save$motifs_vars %in% motif_var_pairs, , drop = FALSE]
+    results_to_save$motifs_vars <- NULL
+}
 write.table(
     results_to_save,
     file = file.path(outdir, "motifbreakr.txt"),
     sep = "\t", quote = FALSE, row.names = FALSE
 )
-rm(results_to_save)
+# rm(results_to_save)
 log$info("Plotting variants ...")
 if (is.null(plots) || length(plots) == 0) {
-    results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
-    results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
-    variants <- unique(results$SNP_id)
+    results_to_save$alleleDiff <- as.numeric(results_to_save$alleleDiff)
+    results_to_save <- results_to_save[order(-abs(results_to_save$alleleDiff)), , drop = FALSE]
+    results_to_save <- results_to_save[1:min(plot_nvars, nrow(results_to_save)), , drop = FALSE]
+    variants <- unique(results_to_save$SNP_id)
 } else {
     variants <- names(plots)
 }
@@ -88,7 +97,7 @@ for (variant in variants) {
     if (is.null(plots[[variant]]$devpars)) {
         plots[[variant]]$devpars <- devpars
     }
-    res <- results[results$SNP_id == variant, , drop = FALSE]
+    res <- results[results$SNP_id == variant & results$.id %in% results_to_save$.id, , drop = FALSE]
     res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
     plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)

biopipen/scripts/regulatory/VariantMotifPlot.R CHANGED Viewed

@@ -33,7 +33,7 @@ log$info("Reading input data ...")
 indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
 log$info("Ensuring regulators in the input data ...")
-indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
+indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, "SNP_id", regmotifs, notfound = notfound)
 genome_pkg <- get_genome_pkg(genome)
 log$info("Reading motif database ...")

biopipen/scripts/regulatory/motifs-common.R CHANGED Viewed

@@ -138,12 +138,13 @@ motifdb_to_motiflib <- function(motifdb) {
 #' @param outdir Output directory, used to save un-matched regulators
 #' @param motif_col Column name for the motif
 #' @param regulator_col Column name for the regulator
+#' @param var_col Column name for the variant
 #' @param regmotifs Regulator-motif mapping file
 #' @param log_indent Indentation for log messages
 #' @param notfound Action to take if regulators are not found in the mapping file
 #' @return Data frame with regulators and motifs
 #' @export
-ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
+ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, var_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
     if (is.null(motif_col)) {
         if (is.null(regmotifs)) {
             stop("Regulator-motif mapping file (envs.regmotifs) is required when no motif column (envs.motif_col) is provided")
@@ -198,7 +199,7 @@ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, r
             regulator_col <<- rm_reg_col
         }
     } else {
-        indata <- indata[!duplicated(indata[, c(regulator_col, motif_col), drop = FALSE]), , drop = FALSE]
+        indata <- indata[!duplicated(indata[, c(regulator_col, motif_col, var_col), drop = FALSE]), , drop = FALSE]
     }
     return(indata)

biopipen/scripts/scrna/AnnData2Seurat.R CHANGED Viewed

@@ -8,10 +8,11 @@ outfile <- {{out.outfile | r}}
 dotplot_check <- {{envs.dotplot_check | r}}
 outdir <- dirname(outfile)
 assay <- {{envs.assay | r}}
+ident <- {{envs.ident | r}}
 log <- get_logger()
-ConvertAnnDataToSeurat(adfile, outfile = outfile, assay = assay, log = log)
+ConvertAnnDataToSeurat(adfile, outfile = outfile, assay = assay, ident = ident, log = log)
 if (!isFALSE(dotplot_check)) {
     log$info("Reading Seurat object ...")

biopipen/scripts/scrna/CellCellCommunication.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from pathlib import Path
 from biopipen.utils.misc import run_command, logger
+from biopipen.scripts.scrna.seurat_anndata_conversion import convert_seurat_to_anndata
 import os
 import numpy as np
 import pandas as pd
@@ -7,6 +8,10 @@ import scanpy
 import liana
 import liana.method.sc._liana_pipe as _liana_pipe
+# AttributeError: module 'numpy' has no attribute 'product'
+if not hasattr(np, "product"):
+    np.product = np.prod
 # monkey-patch liana.method.sc._liana_pipe._trimean due to the updates by scipy 1.14
 # https://github.com/scipy/scipy/commit/a660202652deead0f3b4b688eb9fdcdf9f74066c
 def _trimean(a, axis=0):
@@ -35,27 +40,24 @@ ncores = envs.pop("ncores")
 species = envs.pop("species")
 rscript = envs.pop("rscript")
 subset = envs.pop("subset")
+group_by = envs.pop("group_by", None)
+groupby = envs.pop("groupby", None) or group_by
 subset_using = envs.pop("subset_using", "auto")
 if subset_using == "auto":
     subset_using = "python" if subset and "[" in subset else "r"
 split_by = envs.pop("split_by")
 if sobjfile.suffix.lower() in (".rds", ".qs", "qs2"):
-    logger.info("Converting the Seurat object to h5ad ...")
     annfile = outfile.parent / f"{sobjfile.stem}.h5ad"
-    if subset and subset_using == "r":
-        r_script_convert_to_anndata = (
-            "biopipen.utils::ConvertSeuratToAnnData"
-            f"({str(sobjfile)!r}, {str(annfile)!r}, "
-            f"assay = {{envs['assay'] | r}}, subset = {{envs['subset'] | r}})"
-        )
-    else:
-        r_script_convert_to_anndata = (
-            "biopipen.utils::ConvertSeuratToAnnData"
-            f"({str(sobjfile)!r}, {str(annfile)!r}, assay = {{envs['assay'] | r}})"
-        )
-    run_command([rscript, "-e", r_script_convert_to_anndata], fg=True)
+    seurat_ident_col = convert_seurat_to_anndata(
+        input_file=str(sobjfile),
+        output_file=str(annfile),
+        assay=assay,
+        subset=subset if subset_using == "r" else None,
+        rscript=rscript,
+        return_ident_col=not groupby,
+    )
+    groupby = groupby or seurat_ident_col
     sobjfile = annfile
 elif subset and subset == "r":
     raise ValueError(
@@ -63,6 +65,16 @@ elif subset and subset == "r":
         "'subset' can only be a 'python' expression (`envs.subset_using = 'python'`)."
     )
+if not groupby:
+    logger.warning(
+        "`groupby` is not provided. "
+        "Using 'seurat_clusters' as the default groupby column. "
+        "It is recommended to provide the `groupby` parameter."
+    )
+    groupby = "seurat_clusters"
+envs["groupby"] = groupby
 logger.info("Reading the h5ad file ...")
 adata = scanpy.read_h5ad(sobjfile)

biopipen/scripts/scrna/CellCellCommunicationPlots.R CHANGED Viewed

@@ -27,7 +27,7 @@ defaults <- list(
     devpars = list(res = 100)
 )
-cases <- expand_cases(cases, defaults)
+cases <- expand_cases(cases, defaults, default_case = "Cell-Cell Communication")
 log <- get_logger()
 reporter <- get_reporter()
@@ -35,12 +35,31 @@ do_case <- function(name) {
     log$info("- Case: {name}")
     case <- cases[[name]]
     info <- case_info(name, outdir, is_dir = FALSE)
-    case <- extract_vars(case, "subset", "devpars", "more_formats", "descr")
+    case <- extract_vars(case, subset_ = "subset", "devpars", "more_formats", "descr")
     case$data <- ccc
-    if (!is.null(case$subset)) {
-        case$data <- ccc %>% dplyr::filter(!!parse_expr(case$subset))
+    if (!is.null(subset_)) {
+        case$data <- ccc %>% dplyr::filter(!!parse_expr(subset_))
     }
+    if (identical(case$plot_type, "table")) {
+        write.table(
+            case$data,
+            file = paste0(info$prefix, ".txt"),
+            sep = "\t",
+            row.names = FALSE,
+            col.names = TRUE,
+            quote = FALSE
+        )
+        report <- list(
+            kind = "table",
+            data = list(nrows = 100),
+            src = paste0(info$prefix, ".txt")
+        )
+        reporter$add2(report, hs = c(info$section, info$name))
+        return()
+    }
     if (is.null(case$magnitude)) {
         case$magnitude <- NULL
     }

biopipen/scripts/scrna/CellSNPLite.py ADDED Viewed

@@ -0,0 +1,30 @@
+from __future__ import annotations
+from contextlib import suppress
+from pathlib import Path
+from biopipen.core.filters import dict_to_cli_args
+from biopipen.utils.misc import run_command
+crdir = Path({{in.crdir | quote}})  # noqa: E999 # pyright: ignore
+outdir = {{out.outdir | quote}}  # pyright: ignore
+envs: dict = {{envs | repr}}  # pyright: ignore
+cellsnp_lite = envs.pop("cellsnp_lite")
+ncores = envs.pop("ncores")
+with suppress(RuntimeError):
+    run_command([cellsnp_lite, "--version"], fg=True)
+    print("")
+if crdir.name != "outs":
+    crdir = crdir / "outs"
+bamfile = str(crdir / "possorted_genome_bam.bam")
+barcodefile = str(crdir / "filtered_feature_bc_matrix" / "barcodes.tsv.gz")
+envs["nproc"] = ncores
+envs["samFile"] = bamfile
+envs["barcodeFile"] = barcodefile
+envs["outDir"] = outdir
+cmd = [cellsnp_lite, *dict_to_cli_args(envs)]
+run_command(cmd, fg=True, bufsize=1)

biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R CHANGED Viewed

@@ -7,6 +7,7 @@ library(biopipen.utils)
 sobjfile <- {{in.sobjfile | r}}
 outfile <- {{out.outfile | r}}
 newcol <- {{envs.newcol | r}}
+cluster_ident <- {{envs.ident | r }}
 merge_same_labels <- {{envs.merge | r}}
 celltypist_args <- {{envs.celltypist_args | r}}
 outtype <- {{envs.outtype | r }}
@@ -17,6 +18,10 @@ if (identical(outtype, "input")) {
 outdir <- dirname(outfile)
 outprefix <- file.path(outdir, tools::file_path_sans_ext(basename(outfile)))
+over_clustering <- celltypist_args$over_clustering %||% cluster_ident
+require_package("celltypist", version = ">=1.7.1", python = celltypist_args$python)
 log <- get_logger()
 if (is.null(celltypist_args$model)) {
@@ -30,23 +35,14 @@ suppressWarnings(file.remove(modelfile))
 file.symlink(normalizePath(celltypist_args$model), modelfile)
 sobj <- NULL
+ident <- NULL
 if (!endsWith(sobjfile, ".h5ad")) {
     sobj <- read_obj(sobjfile)
-    if (is.null(celltypist_args$over_clustering)) {
-        # find the default ident name in meta.data
-        for (col in colnames(sobj@meta.data)) {
-            if (!is.factor(sobj@meta.data[[col]])) { next }
-            if (isTRUE(all.equal(unname(Idents(sobj)), sobj@meta.data[[col]]))) {
-                celltypist_args$over_clustering <- col
-                break
-            }
-        }
-    }
-    if (is.null(celltypist_args$over_clustering)) {
-        celltypist_args$over_clustering <- FALSE
-    }
-    if (!isFALSE(celltypist_args$over_clustering)) {
-        destfile <- paste0(outprefix, ".", celltypist_args$over_clustering, ".h5ad")
+    ident <- GetIdentityColumn(sobj)
+    over_clustering <- over_clustering %||% ident
+    if (!isFALSE(over_clustering)) {
+        destfile <- paste0(outprefix, ".", over_clustering, ".h5ad")
     } else {
         destfile <- paste0(outprefix, ".h5ad")
     }
@@ -61,7 +57,7 @@ if (!endsWith(sobjfile, ".h5ad")) {
         ConvertSeuratToAnnData(
             sobj,
             outfile = destfile,
-            assay = celltypist_args$assay %||% "RNA",
+            assay = celltypist_args$assay,
             log = log
         )
     }
@@ -103,15 +99,15 @@ if (file.exists(celltypist_outfile) &&
         "-m", celltypist_args$model,
         "-o", celltypist_outfile
     )
-    if (!isFALSE(celltypist_args$over_clustering) &&
-        !is.null(celltypist_args$over_clustering)) {
-        command <- paste(command, "-c", celltypist_args$over_clustering)
+    if (!isFALSE(over_clustering) && !is.null(over_clustering)) {
+        command <- paste(command, "-c", over_clustering)
     }
     if (isTRUE(celltypist_args$majority_voting)) {
         command <- paste(command, "-v")
     }
     log$info("Running celltypist:")
-    print("- {command}")
+    # print("- {command}")
+    log$debug("  {command}")
     rc <- system(command)
     if (rc != 0) {
         stop("Failed to run celltypist. Check the job.stderr file to see the error message.")
@@ -129,6 +125,7 @@ if (outtype == "h5ad") {
             infile = celltypist_outfile,
             outfile = NULL,
             assay = celltypist_args$assay %||% "RNA",
+            ident = ident,
             log = log
         )
     } else {
@@ -152,31 +149,20 @@ if (outtype == "h5ad") {
         if (!is.null(newcol)) {
             sobj@meta.data[[newcol]] <- sobj@meta.data[[prediction]]
-        } else {
-            over_clustering <- celltypist_args$over_clustering
-            if (over_clustering %in% colnames(sobj@meta.data)) {
-                sobj@meta.data$seurat_clusters_id <- sobj@meta.data[[over_clustering]]
-            } else {
-                over_clustering <- "over_clustering"
-            }
+        } else if (!isFALSE(over_clustering) && !is.null(over_clustering)) {
+            # save the original over_clustering column as seurat_clusters_id
+            sobj@meta.data$seurat_clusters_id <- sobj@meta.data[[over_clustering]]
             # make a map of original cluster id to new cluster id
             cluster_map <- data.frame(
-                seurat_clusters_id = sobj@meta.data[[over_clustering]],
+                seurat_clusters_id = sobj@meta.data$seurat_clusters_id,
                 seurat_clusters = sobj@meta.data[[prediction]]
                 ) %>%
                 group_by(seurat_clusters_id) %>%
                 summarise(seurat_clusters = first(seurat_clusters), .groups = "drop") %>%
                 mutate(seurat_clusters = make.unique(seurat_clusters))
             cluster_map <- split(cluster_map$seurat_clusters, cluster_map$seurat_clusters_id)
-            if (over_clustering != "seurat_clusters") {
-                sobj@meta.data$seurat_clusters <- sobj@meta.data[[over_clustering]]
-            }
-            Idents(sobj) <- "seurat_clusters"
-            cluster_map$object <- sobj
-            log$info("Renaming clusters ...")
-            sobj <- do_call(RenameIdents, cluster_map)
-            sobj@meta.data$seurat_clusters <- Idents(sobj)
+            sobj <- rename_idents(sobj, over_clustering, cluster_map)
         }
     } else if (!is.null(newcol)) {
         sobj@meta.data[[newcol]] <- sobj@meta.data[["predicted_labels"]]
@@ -187,6 +173,11 @@ if (outtype == "h5ad") {
         sobj <- merge_clusters_with_same_labels(sobj, newcol)
     }
+    if (!is.null(ident)) {
+        # restore the original identity
+        Idents(sobj) <- ident
+    }
     log$info("Saving the object ...")
     save_obj(sobj, outfile)
 } else {

biopipen 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.34.6py3-none-any.whl → 0.34.26py3-none-any.whl