PyPI - biopipen - Versions diffs - 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.34.6py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +4 -0
biopipen/core/filters.py +1 -1
biopipen/core/testing.py +2 -1
biopipen/ns/cellranger.py +33 -3
biopipen/ns/regulatory.py +4 -0
biopipen/ns/scrna.py +548 -98
biopipen/ns/scrna_metabolic_landscape.py +4 -0
biopipen/ns/tcr.py +256 -16
biopipen/ns/web.py +5 -0
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +9 -9
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +9 -8
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +9 -9
biopipen/reports/tcr/ClonalStats.svelte +1 -0
biopipen/scripts/cellranger/CellRangerCount.py +55 -11
biopipen/scripts/cellranger/CellRangerVdj.py +54 -8
biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
biopipen/scripts/regulatory/motifs-common.R +3 -2
biopipen/scripts/scrna/AnnData2Seurat.R +2 -1
biopipen/scripts/scrna/CellCellCommunication.py +26 -14
biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +27 -36
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +42 -26
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +11 -13
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +5 -8
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +5 -8
biopipen/scripts/scrna/CellTypeAnnotation.R +26 -3
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +128 -30
biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
biopipen/scripts/scrna/PseudoBulkDEG.R +113 -27
biopipen/scripts/scrna/ScFGSEA.R +23 -26
biopipen/scripts/scrna/ScVelo.py +20 -8
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -1
biopipen/scripts/scrna/SeuratClustering.R +5 -1
biopipen/scripts/scrna/SeuratMap2Ref.R +1 -2
biopipen/scripts/scrna/SeuratPreparing.R +19 -11
biopipen/scripts/scrna/SeuratSubClustering.R +1 -1
biopipen/scripts/scrna/Slingshot.R +2 -4
biopipen/scripts/scrna/TopExpressingGenes.R +1 -4
biopipen/scripts/scrna/celltypist-wrapper.py +140 -4
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +18 -1
biopipen/scripts/tcr/{TCRClustering.R → CDR3Clustering.R} +63 -23
biopipen/scripts/tcr/ClonalStats.R +76 -35
biopipen/utils/misc.py +104 -9
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/METADATA +5 -2
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/RECORD +55 -53
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
biopipen/utils/common_docstrs.py +0 -103
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +0 -0

biopipen/scripts/scrna/CellTypeAnnotation-direct.R CHANGED Viewed

@@ -1,15 +1,23 @@
 library(Seurat)
+library(rlang)
+library(dplyr)
+library(tidyseurat)
 sobjfile <- {{in.sobjfile | r}}
 outfile <- {{out.outfile | r}}
 celltypes <- {{envs.cell_types | r}}
 newcol <- {{envs.newcol | r}}
+ident <- {{envs.ident | r }}
 merge_same_labels <- {{envs.merge | r}}
+more_cell_types <- {{envs.more_cell_types | r}}
 log <- biopipen.utils::get_logger()
 if (is.null(celltypes) || length(celltypes) == 0) {
     log$warn("No cell types are given!")
+    if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
+        log$warn("`envs.celltypes` is not given, won't process `envs.more_cell_types`!")
+    }
     if (merge_same_labels) {
         log$warn("Ignoring 'envs.merge' because no cell types are given!")
@@ -19,45 +27,52 @@ if (is.null(celltypes) || length(celltypes) == 0) {
 } else {
     log$info("Loading Seurat object ...")
     sobj <- biopipen.utils::read_obj(sobjfile)
+    ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
+    Idents(sobj) <- ident
     idents <- Idents(sobj)
     if (is.factor(idents)) {
         idents <- levels(idents)
     } else {
         idents <- as.character(unique(idents))
     }
-    if (length(celltypes) < length(idents)) {
-        celltypes <- c(celltypes, idents[(length(celltypes) + 1):length(idents)])
-    } else if (length(celltypes) > length(idents)) {
-        celltypes <- celltypes[1:length(idents)]
-        log$warn("The length of cell types is longer than the number of clusters!")
+    process_celltypes <- function(ct, key = NULL) {
+        if (length(ct) < length(idents)) {
+            ct <- c(ct, idents[(length(ct) + 1):length(idents)])
+        } else if (length(ct) > length(idents)) {
+            ct <- ct[1:length(idents)]
+            if (is.null(key)) {
+                log$warn("The length of cell types is longer than the number of clusters!")
+            } else {
+                log$warn(paste0("The length of cell types for '", key, "' is longer than the number of clusters!"))
+            }
+        }
+        for (i in seq_along(ct)) {
+            if (ct[i] == "-" || ct[i] == "") {
+                ct[i] <- idents[i]
+            }
+        }
+        names(ct) <- idents
+        return(ct)
     }
-    for (i in seq_along(celltypes)) {
-        if (celltypes[i] == "-" || celltypes[i] == "") {
-            celltypes[i] <- idents[i]
+    if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
+        for (key in names(more_cell_types)) {
+            ct <- more_cell_types[[key]]
+            ct <- process_celltypes(ct, key)
+            log$info(paste0("Adding additional cell type annotation: '", key, "' ..."))
+            sobj@meta.data[[key]] <- ct[as.character(Idents(sobj))]
         }
     }
-    names(celltypes) <- idents
+    celltypes <- process_celltypes(celltypes)
     log$info("Renaming cell types ...")
     if (is.null(newcol)) {
-        has_na <- "NA" %in% unlist(celltypes) || anyNA(unlist(celltypes))
-        sobj$seurat_clusters_id <- Idents(sobj)
-        celltypes$object <- sobj
-        sobj <- do_call(RenameIdents, celltypes)
-        sobj$seurat_clusters <- Idents(sobj)
-        if (has_na) {
-            log$info("Filtering clusters if NA ...")
-            sobj <- subset(
-                sobj,
-                subset = seurat_clusters != "NA" & !is.na(seurat_clusters)
-            )
-        }
+        sobj <- rename_idents(sobj, ident, celltypes)
+        log$info("Filtering clusters if NA ...")
+        sobj <- filter(sobj, !!sym(ident) != "NA" & !is.na(!!sym(ident)))
     } else {
-        celltypes$object <- sobj
-        sobj <- do_call(RenameIdents, celltypes)
-        sobj[[newcol]] <- Idents(sobj)
-        Idents(sobj) <- "seurat_clusters"
+        sobj[[newcol]] <- celltypes[as.character(Idents(sobj))]
     }
     if (merge_same_labels) {
@@ -65,5 +80,6 @@ if (is.null(celltypes) || length(celltypes) == 0) {
         sobj <- merge_clusters_with_same_labels(sobj, newcol)
     }
+    log$info("Saving Seurat object ...")
     biopipen.utils::save_obj(sobj, outfile)
 }

biopipen/scripts/scrna/CellTypeAnnotation-hitype.R CHANGED Viewed

@@ -1,13 +1,13 @@
-library(Seurat)
+library(rlang)
 library(dplyr)
 library(hitype)
-library(biopipen.utils)
 sobjfile = {{in.sobjfile | r}}
 outfile = {{out.outfile | r}}
 tissue = {{envs.hitype_tissue | r}}
 db = {{envs.hitype_db | r}}
 newcol = {{envs.newcol | r}}
+ident = {{envs.ident | r }}
 merge_same_labels = {{envs.merge | r}}
 if (is.null(db)) { stop("`envs.hitype_db` is not set") }
@@ -16,6 +16,8 @@ log <- get_logger()
 log$info("Reading Seurat object...")
 sobj = biopipen.utils::read_obj(sobjfile)
+ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
+Idents(sobj) <- ident
 # prepare gene sets
 log$info("Preparing gene sets...")
@@ -30,18 +32,14 @@ log$info("Running RunHitype...")
 sobj = RunHitype(sobj, gs_list, threshold = 0.0, make_unique = TRUE)
 log$info("Renaming cell types...")
-hitype_levels = sobj@meta.data %>%
-    select(seurat_clusters, hitype) %>%
-    distinct(seurat_clusters, .keep_all = TRUE) %>%
-    arrange(as.numeric(seurat_clusters)) %>%
-    pull("hitype")
+hitype_labels <- sobj@meta.data %>%
+    distinct(!!sym(ident), hitype)
+hitype_labels <- split(hitype_labels$hitype, hitype_labels[[ident]])
 if (is.null(newcol)) {
-    sobj$seurat_clusters_id = sobj$seurat_clusters
-    sobj$seurat_clusters = factor(sobj$hitype, levels = hitype_levels)
-    Idents(sobj) = "seurat_clusters"
+    sobj <- rename_idents(sobj, ident, hitype_labels)
 } else {
-    sobj[[newcol]] = factor(sobj$hitype, levels = hitype_levels)
+    sobj[[newcol]] = sobj$hitype
 }
 if (merge_same_labels) {
@@ -55,9 +53,9 @@ biopipen.utils::save_obj(sobj, outfile)
 log$info("Saving the mappings ...")
 if (is.null(newcol)) {
     celltypes = sobj@meta.data %>%
-        group_by(seurat_clusters_id) %>%
+        group_by(!!sym(backup_col)) %>%
         summarize(CellType = hitype[1]) %>%
-        select(Cluster = seurat_clusters_id, CellType) %>%
+        select(Cluster = !!sym(backup_col), CellType) %>%
         ungroup()
 } else {
     celltypes = sobj@meta.data %>%

biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R CHANGED Viewed

@@ -6,6 +6,7 @@ sobjfile = {{in.sobjfile | r}}
 outfile = {{out.outfile | r}}
 sccatch_args = {{envs.sccatch_args | r}}
 newcol = {{envs.newcol | r}}
+ident = {{envs.ident | r }}
 merge_same_labels = {{envs.merge | r}}
 log <- get_logger()
@@ -22,6 +23,8 @@ if (is.integer(sccatch_args$use_method)) {
 log$info("Reading Seurat object...")
 sobj = read_obj(sobjfile)
+ident <- ident %||% GetIdentityColumn(sobj)
+Idents(sobj) <- ident
 log$info("Running createscCATCH ...")
 obj = createscCATCH(data = GetAssayData(sobj), cluster = as.character(Idents(sobj)))
@@ -48,15 +51,9 @@ if (length(celltypes) == 0) {
     log$warn("- No cell types annotated from the database!")
 } else {
     if (is.null(newcol)) {
-        sobj$seurat_clusters_id = Idents(sobj)
-        celltypes$object = sobj
-        sobj = do_call(RenameIdents, celltypes)
-        sobj$seurat_clusters = Idents(sobj)
+        sobj <- rename_idents(sobj, ident, celltypes)
     } else {
-        celltypes$object = sobj
-        sobj = do_call(RenameIdents, celltypes)
-        sobj[[newcol]] = Idents(sobj)
-        Idents(sobj) = "seurat_clusters"
+        sobj@meta.data[[newcol]] = celltypes[as.character(Idents(sobj))]
     }
     if (merge_same_labels) {

biopipen/scripts/scrna/CellTypeAnnotation-sctype.R CHANGED Viewed

@@ -11,6 +11,7 @@ outfile = {{out.outfile | r}}
 tissue = {{envs.sctype_tissue | r}}
 db = {{envs.sctype_db | r}}
 newcol = {{envs.newcol | r}}
+ident = {{envs.ident | r }}
 merge_same_labels = {{envs.merge | r}}
 if (is.null(db)) { stop("`envs.sctype_args.db` is not set") }
@@ -19,6 +20,8 @@ log <- get_logger()
 log$info("Reading Seurat object...")
 sobj = biopipen.utils::read_obj(sobjfile)
+ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
+Idents(sobj) <- ident
 # prepare gene sets
 log$info("Preparing gene sets...")
@@ -116,15 +119,9 @@ for (key in names(celltypes)) {
 celltypes = as.list(celltypes)
 if (is.null(newcol)) {
-    sobj$seurat_clusters_id = sobj$seurat_clusters
-    celltypes$object = sobj
-    sobj = do_call(RenameIdents, celltypes)
-    sobj$seurat_clusters = Idents(sobj)
+    sobj <- rename_idents(sobj, ident, celltypes)
 } else {
-    celltypes$object = sobj
-    sobj = do_call(RenameIdents, celltypes)
-    sobj[[newcol]] = Idents(sobj)
-    Idents(sobj) = "seurat_clusters"
+    sobj@meta.data[[newcol]] = celltypes[as.character(Idents(sobj))]
 }
 celltypes$object = NULL
 gc()

biopipen/scripts/scrna/CellTypeAnnotation.R CHANGED Viewed

@@ -1,9 +1,15 @@
+library(Seurat)
+library(biopipen.utils)
 set.seed(8525)
-merge_clusters_with_same_labels <- function(sobj, newcol) {
+backup_col <- {{envs.backup_col | r}}
+merge_clusters_with_same_labels <- function(sobj, newcol = NULL) {
     if (is.null(newcol)) {
-        sobj@meta.data$seurat_clusters <- sub("\\.\\d+$", "", sobj@meta.data$seurat_clusters)
-        Idents(sobj) <- "seurat_clusters"
+        newcol <- biopipen.utils::GetIdentityColumn(sobj)
+        sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
+        Idents(sobj) <- newcol
     } else {
         sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
     }
@@ -11,6 +17,23 @@ merge_clusters_with_same_labels <- function(sobj, newcol) {
     sobj
 }
+rename_idents <- function(sobj, ident_col, mapping) {
+    orig_ident_col <- biopipen.utils::GetIdentityColumn(sobj)
+    if (!identical(ident_col, orig_ident_col)) {
+        Idents(sobj) <- ident_col
+        mapping$object <- sobj
+        sobj <- do_call(RenameIdents, mapping)
+    } else {
+        if (!is.null(backup_col)) {
+            sobj@meta.data[[backup_col]] <- Idents(sobj)
+        }
+        mapping$object <- sobj
+        sobj <- do_call(RenameIdents, mapping)
+    }
+    sobj@meta.data[[ident_col]] <- Idents(sobj)
+    sobj
+}
 {% if envs.tool == "hitype" %}
 {% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-hitype.R" %}
 {% elif envs.tool == "sctype" %}

biopipen/scripts/scrna/MQuad.py ADDED Viewed

@@ -0,0 +1,25 @@
+from __future__ import annotations
+from pathlib import Path
+from contextlib import suppress
+from biopipen.core.filters import dict_to_cli_args
+from biopipen.utils.misc import run_command
+cellsnpout = {{in.cellsnpout | quote}}  # noqa: E999 # pyright: ignore
+outdir = {{out.outdir | quote}}  # pyright: ignore
+envs: dict = {{envs | repr}}  # pyright: ignore
+mquad = envs.pop("mquad")
+ncores = envs.pop("ncores")
+seed = envs.pop("seed", 8525)
+with suppress(RuntimeError):
+    run_command([mquad], fg=True)
+    print("")
+envs["cellData"] = cellsnpout
+envs["outDir"] = outdir
+envs["randSeed"] = seed
+envs["nproc"] = ncores
+cmd = [mquad, *dict_to_cli_args(envs, sep="=")]
+run_command(cmd, fg=True, bufsize=1)

biopipen/scripts/scrna/MarkersFinder.R CHANGED Viewed

@@ -42,15 +42,12 @@ if (isTRUE(cache)) { cache <- joboutdir }
 set.seed(8525)
 if (ncores > 1) {
-    options(future.globals.maxSize = 80000 * 1024^2)
+    options(future.globals.maxSize = Inf)
     plan(strategy = "multicore", workers = ncores)
 }
 log$info("Reading Seurat object ...")
 srtobj <- read_obj(srtfile)
-if (!"Identity" %in% colnames(srtobj@meta.data)) {
-    srtobj@meta.data$Identity <- Idents(srtobj)
-}
 if (!is.null(mutaters) && length(mutaters) > 0) {
@@ -105,7 +102,7 @@ log$info("Expanding cases ...")
 post_casing <- function(name, case) {
     outcases <- list()
-    case$group_by <- case$group_by %||% "Identity"
+    case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
     if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
         # single cases, no need to expand
@@ -165,10 +162,12 @@ post_casing <- function(name, case) {
         if (length(cases) == 0 && name == "Marker Discovery") {
             name <- case$each
+        } else {
+            name <- paste0(name, " (", case$each, ")")
         }
         for (each in eachs) {
-            newname <- paste0(name, " - ", each)
+            newname <- paste0(name, "::", each)
             newcase <- case
             newcase$original_case <- name
@@ -266,20 +265,23 @@ process_markers <- function(markers, info, case) {
         ui = "tabs"
     )
-    for (plotname in names(case$marker_plots)) {
-        plotargs <- case$marker_plots[[plotname]]
-        plotargs$degs <- markers
-        rownames(plotargs$degs) <- make.unique(markers$gene)
-        plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
-        do_call(VizDEGs, plotargs)
-        reporter$add2(
-            list(
-                name = plotname,
-                contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
-            hs = c(info$section, info$name),
-            hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
-            ui = "tabs"
-        )
+    if (nrow(markers) > 0) {
+        for (plotname in names(case$marker_plots)) {
+            plotargs <- case$marker_plots[[plotname]]
+            plotargs$markers <- markers
+            plotargs$object <- case$object
+            plotargs$comparison_by <- case$group_by
+            plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
+            do_call(VizDEGs, plotargs)
+            reporter$add2(
+                list(
+                    name = plotname,
+                    contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
+                hs = c(info$section, info$name),
+                hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
+                ui = "tabs"
+            )
+        }
     }
     # Do enrichment analysis
@@ -349,16 +351,29 @@ process_markers <- function(markers, info, case) {
                 for (db in case$dbs) {
                     plots <- list()
                     for (plotname in names(case$enrich_plots)) {
-                        plotargs <- case$enrich_plots[[plotname]]
+                        plotargs <- extract_vars(case$enrich_plots[[plotname]], "descr", allow_nonexisting = TRUE)
                         plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
-                        p <- do_call(VizEnrichment, plotargs)
+                        p <- tryCatch(
+                            do_call(VizEnrichment, plotargs),
+                            error = function(e) {
+                                stop("Failed to plot enrichment for database '", db, "' with plot '", plotname, "': ", e$message)
+                            }
+                        )
                         if (plotargs$plot_type == "bar") {
                             attr(p, "height") <- attr(p, "height") / 1.5
+                            descr <- descr %||% glue::glue(
+                                "The bar plot shows the top enriched terms in database '{db}', ",
+                                "the x-axis shows the -log10 of the adjusted p-values, ",
+                                "and the y-axis shows the term names. The number next to each bar indicates the overlap gene count."
+                            )
                         }
                         outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
                         save_plot(p, outprefix, plotargs$devpars, formats = "png")
+                        if (!is.null(descr)) {
+                            plots[[length(plots) + 1]] <- list(kind = "descr", content = glue::glue(descr))
+                        }
                         plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
                     }
                     reporter$add2(
@@ -389,13 +404,18 @@ process_markers <- function(markers, info, case) {
     }
 }
-process_allmarkers <- function(markers, plotcases, casename, groupname) {
+process_allmarkers <- function(markers, object, comparison_by, plotcases, casename, groupname, subset_by_group = TRUE) {
     name <- paste0(casename, "::", paste0(groupname, " (All Markers)"))
     info <- case_info(name, outdir, create = TRUE)
     for (plotname in names(plotcases)) {
+        log$info("  {plotname} ...")
         plotargs <- plotcases[[plotname]]
-        plotargs$degs <- markers
+        plotargs$markers <- markers
+        plotargs$object <- object
+        plotargs$comparison_by <- comparison_by
+        if (subset_by_group)
+            plotargs$subset_by <- groupname
         plotargs$outprefix <- file.path(info$prefix, slugify(plotname))
         do_call(VizDEGs, plotargs)
         reporter$add2(
@@ -417,6 +437,7 @@ process_allenriches <- function(enriches, plotcases, casename, groupname) {
     for (db in dbs) {
         plots <- list()
         for (plotname in names(plotcases)) {
+            log$info("  {plotname} ({db}) ...")
             plotargs <- plotcases[[plotname]]
             plotargs <- extract_vars(plotargs, "devpars")
             plotargs$data <- enriches[enriches$Database == db, , drop = FALSE]
@@ -449,6 +470,7 @@ process_overlaps <- function(markers, ovcases, casename, groupname) {
     info <- case_info(name, outdir, create = TRUE)
     for (plotname in names(ovcases)) {
+        log$info("  {plotname} ...")
         args <- extract_vars(
             ovcases[[plotname]],
             sigm = "sigmarkers", "more_formats", "save_code", "devpars", "plot_type",
@@ -513,6 +535,7 @@ run_case <- function(name) {
         case,
         "dbs", "sigmarkers", "allmarker_plots", "allenrich_plots", "marker_plots", "enrich_plots",
         "overlaps", "original_case", "markers", "enriches", "each_name", "each", "enrich_style", "original_subset",
+        subset_ = "subset",
         allow_nonexisting = TRUE
     )
@@ -545,7 +568,16 @@ run_case <- function(name) {
                 attr(markers, "group_by") <- each
                 attr(markers, "ident_1") <- NULL
                 attr(markers, "ident_2") <- NULL
-                process_allmarkers(markers, allmarker_plots, name, each)
+                if (!is.null(markers) && nrow(markers) > 0) {
+                    process_allmarkers(
+                        markers,
+                        object = if (is.null(original_subset)) srtobj else filter(srtobj, !!parse_expr(original_subset)),
+                        comparison_by = group_by,
+                        allmarker_plots,
+                        name,
+                        each
+                    )
+                }
             }
             if (length(overlaps) > 0) {
@@ -555,7 +587,7 @@ run_case <- function(name) {
         }
-        if (!is.null(enriches)) {
+        if (!is.null(enriches) && length(enriches) > 0) {
             log$info("- Summarizing enrichments in subcases (by each: {each}) ...")
             if (!is.data.frame(enriches)) {
                 each_levels <- names(enriches)
@@ -571,8 +603,30 @@ run_case <- function(name) {
                 enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
             }
-            if (length(allenrich_plots) > 0) {
+            if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
                 log$info("- Visualizing all enrichments together ...")
+                # add other metadata columns if any by mapping groupname
+                # only add the metadata columns from object if there is a single value mapped
+                metacols <- srtobj@meta.data %>% group_by(!!sym(each)) %>%
+                    summarize(across(everything(), ~ n_distinct(.) == 1), .groups = "keep") %>%
+                    select(where(~ all(. == TRUE))) %>%
+                    colnames()
+                if (length(metacols) > 1) {
+                    metadf <- srtobj@meta.data[, metacols, drop = FALSE]  %>%
+                        distinct(!!sym(each), .keep_all = TRUE)
+                    for (col in setdiff(metacols, each)) {
+                        if (col %in% colnames(enriches)) {
+                            warning("Column name conflict: {col}, adding with suffix '_meta'", immediate. = TRUE)
+                            metadf[[paste0(col, "_meta")]] <- metadf[[col]]
+                            metadf[[col]] <- NULL
+                        }
+                    }
+                    enriches <- left_join(enriches, metadf, by = each)
+                }
                 process_allenriches(enriches, allenrich_plots, name, each)
             }
         }
@@ -580,11 +634,15 @@ run_case <- function(name) {
         return(invisible())
     }
+    # Let RunSeuratDEAnalysis handle the subset
+    case$subset <- subset_
     case$object <- srtobj
     markers <- do_call(RunSeuratDEAnalysis, case)
-    case$object <- NULL
+    case$object <- NULL  # Release memory
     gc()
+    subobj <- if (is.null(subset_)) srtobj else filter(srtobj, !!parse_expr(subset_))
     if (is.null(case$ident_1)) {
         all_idents <- unique(as.character(markers[[case$group_by]]))
         enriches <- list()
@@ -596,7 +654,9 @@ run_case <- function(name) {
             attr(ident_markers, "ident_1") <- ident
             enrich <- process_markers(ident_markers, info = info, case = list(
+                object = subobj,
                 dbs = dbs,
+                group_by = case$group_by,
                 sigmarkers = sigmarkers,
                 enrich_style = enrich_style,
                 marker_plots = marker_plots,
@@ -609,7 +669,14 @@ run_case <- function(name) {
         if (length(allmarker_plots) > 0) {
             log$info("- Visualizing all markers together ...")
-            process_allmarkers(markers, allmarker_plots, name, case$group_by)
+            process_allmarkers(
+                markers,
+                object = subobj,
+                comparison_by = case$group_by,
+                plotcases = allmarker_plots,
+                casename = name,
+                groupname = case$group_by,
+                subset_by_group = FALSE)
         }
         if (length(overlaps) > 0) {
@@ -619,12 +686,41 @@ run_case <- function(name) {
         if (length(allenrich_plots) > 0) {
             log$info("- Visualizing all enrichments together ...")
+            # add other metadata columns if any by mapping groupname
+            # only add the metadata columns from object if there is a single value mapped
+            metacols <- subobj@meta.data %>% group_by(!!sym(case$group_by)) %>%
+                summarize(across(everything(), ~ n_distinct(.) == 1), .groups = "keep") %>%
+                select(where(~ all(. == TRUE))) %>%
+                colnames()
+            if (length(metacols) > 1) {
+                metadf <- subobj@meta.data[, metacols, drop = FALSE]  %>%
+                    distinct(!!sym(case$group_by), .keep_all = TRUE)
+                for (col in setdiff(metacols, case$group_by)) {
+                    if (col %in% colnames(enriches[[1]])) {
+                        warning("Column name conflict: {col}, adding with suffix '_meta'", immediate. = TRUE)
+                        metadf[[paste0(col, "_meta")]] <- metadf[[col]]
+                        metadf[[col]] <- NULL
+                    }
+                }
+                for (ne in names(enriches)) {
+                    if (!case$group_by %in% colnames(enriches[[ne]])) {
+                        enriches[[ne]][[case$group_by]] <- ne
+                    }
+                    enriches[[ne]] <- left_join(enriches[[ne]], metadf, by = case$group_by)
+                }
+            }
+            enriches <- do_call(rbind, enriches)
             process_allenriches(enriches, allenrich_plots, name, case$group_by)
         }
     } else {
         info <- case_info(name, outdir, create = TRUE)
         enrich <- process_markers(markers, info = info, case = list(
+            object = subobj,
             dbs = dbs,
+            group_by = case$group_by,
             sigmarkers = sigmarkers,
             enrich_style = enrich_style,
             marker_plots = marker_plots,
@@ -634,7 +730,9 @@ run_case <- function(name) {
         ))
         if (!is.null(original_case) && !is.null(cases[[original_case]])) {
-            markers[[each_name]] <- each
+            if (nrow(markers) > 0) {
+                markers[[each_name]] <- each
+            }
             cases[[original_case]]$markers[[each]] <<- markers
             cases[[original_case]]$enriches[[each]] <<- enrich
         }

biopipen/scripts/scrna/ModuleScoreCalculator.R CHANGED Viewed

@@ -1,11 +1,13 @@
-library(Seurat)
+library(rlang)
 library(dplyr)
+library(Seurat)
 library(biopipen.utils)
 sobjfile <- {{in.srtobj | r}}
 outfile <- {{out.rdsfile | r}}
 defaults <- {{envs.defaults | r}}
 modules <- {{envs.modules | r}}
+post_mutaters <- {{envs.post_mutaters | r}}
 log <- get_logger()
@@ -134,6 +136,12 @@ for (key in names(modules)) {
     }
 }
+if (!is.null(post_mutaters) && length(post_mutaters) > 0) {
+    log$info("Applying post mutaters ...")
+    sobj@meta.data <- sobj@meta.data %>%
+        mutate(!!!lapply(post_mutaters, parse_expr))
+}
 # save seurat object
 log$info("Saving Seurat object ...")
 save_obj(sobj, outfile)

biopipen 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.34.6py3-none-any.whl → 0.34.26py3-none-any.whl