PyPI - biopipen - Versions diffs - 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.34.6py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +4 -0
biopipen/core/filters.py +1 -1
biopipen/core/testing.py +2 -1
biopipen/ns/cellranger.py +33 -3
biopipen/ns/regulatory.py +4 -0
biopipen/ns/scrna.py +548 -98
biopipen/ns/scrna_metabolic_landscape.py +4 -0
biopipen/ns/tcr.py +256 -16
biopipen/ns/web.py +5 -0
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +9 -9
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +9 -8
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +9 -9
biopipen/reports/tcr/ClonalStats.svelte +1 -0
biopipen/scripts/cellranger/CellRangerCount.py +55 -11
biopipen/scripts/cellranger/CellRangerVdj.py +54 -8
biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
biopipen/scripts/regulatory/motifs-common.R +3 -2
biopipen/scripts/scrna/AnnData2Seurat.R +2 -1
biopipen/scripts/scrna/CellCellCommunication.py +26 -14
biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +27 -36
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +42 -26
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +11 -13
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +5 -8
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +5 -8
biopipen/scripts/scrna/CellTypeAnnotation.R +26 -3
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +128 -30
biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
biopipen/scripts/scrna/PseudoBulkDEG.R +113 -27
biopipen/scripts/scrna/ScFGSEA.R +23 -26
biopipen/scripts/scrna/ScVelo.py +20 -8
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -1
biopipen/scripts/scrna/SeuratClustering.R +5 -1
biopipen/scripts/scrna/SeuratMap2Ref.R +1 -2
biopipen/scripts/scrna/SeuratPreparing.R +19 -11
biopipen/scripts/scrna/SeuratSubClustering.R +1 -1
biopipen/scripts/scrna/Slingshot.R +2 -4
biopipen/scripts/scrna/TopExpressingGenes.R +1 -4
biopipen/scripts/scrna/celltypist-wrapper.py +140 -4
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +18 -1
biopipen/scripts/tcr/{TCRClustering.R → CDR3Clustering.R} +63 -23
biopipen/scripts/tcr/ClonalStats.R +76 -35
biopipen/utils/misc.py +104 -9
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/METADATA +5 -2
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/RECORD +55 -53
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
biopipen/utils/common_docstrs.py +0 -103
{biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +0 -0

biopipen/scripts/scrna/PseudoBulkDEG.R CHANGED Viewed

@@ -1,6 +1,7 @@
 library(rlang)
 library(dplyr)
 library(plotthis)
+library(Seurat)
 library(biopipen.utils)
 sobjfile <- {{in.sobjfile | r}}
@@ -8,7 +9,9 @@ outdir <- {{out.outdir | r}}
 joboutdir <- {{job.outdir | r}}
 each <- {{envs.each | r}}
 subset <- {{envs.subset | r}}
+ncores <- {{envs.ncores | r}}
 mutaters <- {{envs.mutaters | r}}
+cache <- {{ envs.cache | r }}
 aggregate_by <- {{envs.aggregate_by | r}}
 layer <- {{envs.layer | r}}
 assay <- {{envs.assay | r}}
@@ -35,6 +38,7 @@ overlaps <- {{ envs.overlaps | r }}
 cases <- {{envs.cases | r}}
 aggregate_by <- unique(c(aggregate_by, group_by, paired_by, each))
+if (isTRUE(cache)) { cache <- joboutdir }
 log <- get_logger()
 reporter <- get_reporter()
@@ -74,10 +78,12 @@ defaults <- list(
     ident_1 = ident_1,
     ident_2 = ident_2,
     dbs = dbs,
+    ncores = ncores,
     sigmarkers = sigmarkers,
     enrich_style = enrich_style,
     paired_by = paired_by,
     tool = tool,
+    cache = cache,
     allmarker_plots_defaults = allmarker_plots_defaults,
     allmarker_plots = allmarker_plots,
     allenrich_plots_defaults = allenrich_plots_defaults,
@@ -131,12 +137,14 @@ expand_each <- function(name, case) {
         if (length(cases) == 0 && name == "DEG Analysis") {
             name <- case$each
+        } else {
+            name <- paste0(name, " (", case$each, ")")
         }
         case$aggregate_by <- unique(c(case$aggregate_by, case$group_by, case$paired_by, case$each))
         for (each in eachs) {
-            newname <- paste0(case$each, "::", each)
+            newname <- paste0(name, "::", each)
             newcase <- case
             newcase$original_case <- name
@@ -179,6 +187,7 @@ expand_each <- function(name, case) {
         if (length(case$overlaps) > 0 || length(case$allmarker_plots) > 0 || length(case$allenrich_plots) > 0) {
             ovcase <- case
+            ovcase$allexprs <- list()
             ovcase$markers <- list()
             ovcase$allmarker_plots <- lapply(
                 ovcase$allmarker_plots,
@@ -212,7 +221,52 @@ process_markers <- function(markers, info, case) {
     # markers <- markers %>%
     #     mutate(gene = as.character(gene)) %>%
     #     arrange(p_val_adj, desc(abs(avg_log2FC)))
+    empty <- if (case$enrich_style == "enrichr") {
+        data.frame(
+            Database = character(0),
+            Term = character(0),
+            Overlap = character(0),
+            P.value = numeric(0),
+            Adjusted.P.value = numeric(0),
+            Odds.Ratio = numeric(0),
+            Combined.Score = numeric(0),
+            Genes = character(0),
+            Rank = numeric(0)
+        )
+    } else {  # clusterProfiler
+        data.frame(
+            ID = character(0),
+            Description = character(0),
+            GeneRatio = character(0),
+            BgRatio = character(0),
+            Count = integer(0),
+            pvalue = numeric(0),
+            p.adjust = numeric(0),
+            qvalue = numeric(0),
+            geneID = character(0),
+            Database = character(0)
+        )
+    }
+    if (is.null(markers) || nrow(markers) == 0) {
+        if (case$error) {
+            stop("Error: No markers found in case '", info$name, "'.")
+        } else {
+            log$warn("! Warning: No markers found in case '", info$name, "'.")
+            reporter$add2(
+                list(
+                    name = "Warning",
+                    contents = list(list(kind = "error", content = "No markers found.", kind_ = "warning"))),
+                hs = c(info$section, info$name),
+                hs2 = "DEG Analysis",
+                ui = "tabs"
+            )
+            return(empty)
+        }
+    }
     markers$gene <- as.character(markers$gene)
+    markers$p_val_adj <- as.numeric(markers$p_val_adj)
+    markers$log2FC <- as.numeric(markers$log2FC)
     markers <- markers[order(markers$p_val_adj, -abs(markers$log2FC)), ]
     # Save markers
@@ -287,7 +341,7 @@ process_markers <- function(markers, info, case) {
             stop("Error: Not enough significant DEGs with '", case$sigmarkers, "' in case '", info$name, "' found (< 5) for enrichment analysis.")
         } else {
             message <- paste0("Not enough significant DEGs with '", case$sigmarkers, "' found (< 5) for enrichment analysis.")
-            log$warn("  ! Error: {message}")
+            log$warn("! Error: {message}")
             reporter$add2(
                 list(
                     name = "Warning",
@@ -345,7 +399,7 @@ process_markers <- function(markers, info, case) {
             if (case$error) {
                 stop("Error: ", e$message)
             } else {
-                log$warn("  ! Error: {e$message}")
+                log$warn("! Error: {e$message}")
                 reporter$add2(
                     list(
                         name = "Warning",
@@ -478,6 +532,7 @@ process_overlaps <- function(markers, ovcases, casename, groupname) {
 run_case <- function(name) {
     case <- cases[[name]]
+    log$info("----------------------------------------")
     log$info("Case: {name} ...")
     case <- extract_vars(
@@ -485,18 +540,21 @@ run_case <- function(name) {
         "dbs", "sigmarkers", "allmarker_plots", "allenrich_plots", "marker_plots", "enrich_plots",
         "overlaps", "original_case", "markers", "enriches", "each_name", "each", "enrich_style",
         "aggregate_by", "subset", "layer", "assay", "group_by", "ident_1", "ident_2", "original_subset",
-        "paired_by", "tool", "error",
+        "paired_by", "tool", "error", "ncores", "cache", "allexprs",
         allow_nonexisting = TRUE
     )
     if (!is.null(markers) || !is.null(enriches)) {
-        if (!is.null(markers)) {  # It is the overlap/allmarker case
-            log$info("- Summarizing DEGs in subcases (by each: {each}) ...")
+        if (!is.null(markers) && length(markers) > 0) {
+            log$info("Summarizing DEGs in subcases (by each: {each}) ...")
             # handle the overlaps / allmarkers analysis here
             if (!is.data.frame(markers)) {
                 each_levels <- names(markers)
                 markers <- do_call(rbind, lapply(each_levels, function(x) {
                     markers_df <- markers[[x]]
+                    if (is.null(markers_df) || nrow(markers_df) == 0) {
+                        return(NULL)
+                    }
                     if (nrow(markers_df) > 0) {
                         markers_df[[each]] <- x
                     } else {
@@ -508,17 +566,17 @@ run_case <- function(name) {
             }
             # gene, p_val, avg_log2FC, pct.1, pct.2, p_val_adj, diff_pct, <each>
+            if (!is.data.frame(allexprs)) {
+                meta <- do_call(rbind, lapply(allexprs, attr, "meta"))
+                allexprs <- do_call(cbind, allexprs)
+            } else {
+                meta <- attr(allexprs, "meta")
+            }
             if (length(allmarker_plots) > 0) {
-                log$info("- Visualizing all DEGs together ...")
-                exprs <- AggregateExpressionPseudobulk(
-                    srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
-                    subset = original_subset, log = log
-                )
-                attr(markers, "object") <- AggregateExpressionPseudobulk(
-                    srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
-                    subset = original_subset, log = log
-                )
-                attr(markers, "meta") <- attr(exprs, "meta")
+                log$info("Visualizing all DEGs together ...")
+                attr(markers, "object") <- allexprs
+                attr(markers, "meta") <- meta
                 attr(markers, "group_by") <- each
                 attr(markers, "paired_by") <- paired_by
                 attr(markers, "ident_1") <- NULL
@@ -527,18 +585,21 @@ run_case <- function(name) {
             }
             if (length(overlaps) > 0) {
-                log$info("- Visualizing overlaps between subcases ...")
+                log$info("Visualizing overlaps between subcases ...")
                 process_overlaps(markers, overlaps, name, each)
             }
         }
-        if (!is.null(enriches)) {
-            log$info("- Summarizing enrichments in subcases (by each: {each}) ...")
+        if (!is.null(enriches) && length(enriches) > 0) {
+            log$info("Summarizing enrichments in subcases (by each: {each}) ...")
             if (!is.data.frame(enriches)) {
                 each_levels <- names(enriches)
                 enriches <- do_call(rbind, lapply(each_levels, function(x) {
                     enrich_df <- enriches[[x]]
+                    if (is.null(enrich_df) || nrow(enrich_df) == 0) {
+                        return(NULL)
+                    }
                     if (nrow(enrich_df) > 0) {
                         enrich_df[[each]] <- x
                     } else {
@@ -546,11 +607,13 @@ run_case <- function(name) {
                     }
                     enrich_df
                 }))
-                enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
+                if (!is.null(enriches) && nrow(enriches) > 0) {
+                    enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
+                }
             }
-            if (length(allenrich_plots) > 0) {
-                log$info("- Visualizing all enrichments together ...")
+            if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
+                log$info("Visualizing all enrichments together ...")
                 process_allenriches(enriches, allenrich_plots, name, each)
             }
         }
@@ -558,16 +621,36 @@ run_case <- function(name) {
         return(invisible())
     }
+    info <- case_info(name, outdir, create = TRUE)
     exprs <- AggregateExpressionPseudobulk(
         srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
         subset = subset, log = log
     )
-    markers <- RunDEGAnalysis(
-        exprs, group_by = group_by, ident_1 = ident_1, ident_2 = ident_2,
-        paired_by = paired_by, tool = tool, log = log
+    markers <- tryCatch(
+        {
+            RunDEGAnalysis(
+                exprs, group_by = group_by, ident_1 = ident_1, ident_2 = ident_2,
+                paired_by = paired_by, tool = tool, log = log, ncores = ncores,
+                cache = cache
+            )
+        }, error = function(e) {
+            if (error) {
+                stop("Error: ", e$message)
+            } else {
+                log$warn("! Error: {e$message}")
+                reporter$add2(
+                    list(
+                        name = "Warning",
+                        contents = list(list(kind = "error", content = e$message, kind_ = "warning"))),
+                    hs = c(info$section, info$name),
+                    hs2 = "DEG Analysis",
+                    ui = "tabs"
+                )
+                return(invisible())
+            }
+        }
     )
-    info <- case_info(name, outdir, create = TRUE)
     enrich <- process_markers(markers, info = info, case = list(
         dbs = dbs,
         sigmarkers = sigmarkers,
@@ -579,9 +662,12 @@ run_case <- function(name) {
     ))
     if (!is.null(original_case) && !is.null(cases[[original_case]])) {
-        markers[[each_name]] <- each
+        if (!is.null(markers)) {
+            markers[[each_name]] <- each
+        }
         cases[[original_case]]$markers[[each]] <<- markers
         cases[[original_case]]$enriches[[each]] <<- enrich
+        cases[[original_case]]$allexprs[[each]] <<- exprs
     }
     invisible()

biopipen/scripts/scrna/ScFGSEA.R CHANGED Viewed

@@ -10,6 +10,7 @@ mutaters <- {{envs.mutaters | r}}  # nolint
 group_by <- {{envs.group_by | default: envs["group-by"] | default: None | r}}  # nolint
 ident_1 <- {{envs.ident_1 | default: envs["ident-1"] | default: None | r}}  # nolint
 ident_2 <- {{envs.ident_2 | default: envs["ident-2"] | default: None | r}}  # nolint
+assay <- {{envs.assay | r}}  # nolint
 each <- {{envs.each | r}}  # nolint
 subset <- {{envs.subset | r}}  # nolint
 gmtfile <- {{envs.gmtfile | r}}  # nolint
@@ -33,9 +34,6 @@ alleach_plots <- lapply(alleach_plots, function(x) {
 log$info("Reading Seurat object ...")
 srtobj <- read_obj(srtfile)
-if (!"Identity" %in% colnames(srtobj@meta.data)) {
-    srtobj@meta.data$Identity <- Idents(srtobj)
-}
 if (!is.null(mutaters) && length(mutaters) > 0) {
     log$info("Mutating metadata columns ...")
@@ -46,6 +44,7 @@ defaults <- list(
     group_by = group_by,
     ident_1 = ident_1,
     ident_2 = ident_2,
+    assay = assay,
     each = each,
     subset = subset,
     gmtfile = gmtfile,
@@ -63,7 +62,7 @@ defaults <- list(
 expand_each <- function(name, case) {
     outcases <- list()
-    case$group_by <- case$group_by %||% "Identity"
+    case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
     if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
         if (length(case$alleach_plots) > 0) {
@@ -82,11 +81,13 @@ expand_each <- function(name, case) {
         }
         if (length(cases) == 0 && name == "GSEA") {
-            name <- case$each
+            prefix <- case$each
+        } else {
+            prefix <- paste0(name, " (", case$each, ")")
         }
         for (each in eachs) {
-            newname <- paste0(case$each, "::", each)
+            newname <- paste0(prefix, "::", each)
             newcase <- case
             newcase$original_case <- paste0(name, " (all ", case$each,")")
@@ -142,6 +143,11 @@ do_case <- function(name) {
     if (!is.null(case$gseas)) {
+        if (length(case$gseas) == 0) {
+            log$warn("  No GSEA results found for case {name}. Skipping.")
+            return(invisible(NULL))
+        }
         each_levels <- names(case$gseas)
         gseas <- do_call(rbind, lapply(each_levels, function(x) {
             gsea_df <- case$gseas[[x]]
@@ -226,7 +232,7 @@ do_case <- function(name) {
         case$ident_2 <- "Other"
         allclasses[allclasses != case$ident_1] <- "Other"
     }
-    exprs <- GetAssayData(sobj, layer = "data")
+    exprs <- GetAssayData(sobj, layer = "data", assay = case$assay)
     # get preranks
     log$info("  Getting preranks...")
@@ -240,25 +246,16 @@ do_case <- function(name) {
         quote = FALSE
     )
     if (all(is.na(ranks))) {
-        if (length(allclasses) < 100) {
-            log$warn("  Ignoring this case because all gene ranks are NA and there are <100 cells.")
-            reporter$add2(
-                list(
-                    kind = "error",
-                    content = paste0("Not enough cells (n = ", length(allclasses), ") to run fgsea.")
-                ),
-                hs = c(info$section, info$name)
-            )
-            return(NULL)
-        } else {
-            stop(paste0(
-                "All gene ranks are NA (# cells = ",
-                length(allclasses),
-                "). ",
-                "It's probably due to high missing rate in the data. ",
-                "You may want to try a different `envs$method` for pre-ranking."
-            ))
-        }
+        log$warn("  All gene ranks are NA. It's probably due to high missing rate in the data.")
+        log$warn("  Case ignored, you may also try a different ranking method.")
+        reporter$add2(
+            list(
+                kind = "error",
+                content = "All gene ranks are NA. It's probably due to high missing rate in the data."
+            ),
+            hs = c(info$section, info$name)
+        )
+        return(invisible(NULL))
     }
     # run fgsea

biopipen/scripts/scrna/ScVelo.py CHANGED Viewed

@@ -7,13 +7,21 @@ from diot import Diot  # type: ignore[import]
 import scanpy as sc
 import scvelo as scv
 import numpy as np
+import matplotlib
+matplotlib.use('Agg')
 import matplotlib.pyplot as plt
-from biopipen.utils.misc import logger
+from biopipen.utils.misc import logger, require_package
 from biopipen.scripts.scrna.seurat_anndata_conversion import (
     convert_seurat_to_anndata,
     convert_anndata_to_seurat,
 )
+require_package("scvelo", ">=0.3.3")
+from biopipen.scripts.scrna import scvelo_paga  # noqa: F401
+warnings.simplefilter("ignore", category=UserWarning)
+warnings.simplefilter("ignore", category=FutureWarning)
+warnings.simplefilter("ignore", category=DeprecationWarning)
 def SCVELO(
@@ -45,10 +53,6 @@ def SCVELO(
     dpi=100,
     fileprefix="",
 ):
-    warnings.simplefilter("ignore", category=UserWarning)
-    warnings.simplefilter("ignore", category=FutureWarning)
-    warnings.simplefilter("ignore", category=DeprecationWarning)
     os.chdir(os.path.expanduser(dirpath))
     if linear_reduction is None:
         sc.pp.pca(adata, n_comps=n_pcs)
@@ -526,18 +530,26 @@ calculate_velocity_genes: bool = {{envs.calculate_velocity_genes | repr}}  # pyr
 top_n: int = {{envs.top_n | repr}}  # pyright: ignore  # noqa: E999
 rscript: str = {{envs.rscript | repr}}  # pyright: ignore  # noqa: E999
-if group_by is None:
-    raise ValueError("The 'envs.group_by' parameter must be specified.")
 if sobjfile.endswith(".h5ad"):
     h5ad_file = Path(sobjfile)
 else:
     h5ad_file = Path(outfile).with_suffix(".input.h5ad")
     logger.info("Converting Seurat object to AnnData (h5ad) format...")
-    convert_seurat_to_anndata(
+    seurat_ident_col = convert_seurat_to_anndata(
         input_file=sobjfile,
         output_file=h5ad_file,
         rscript=rscript,
+        return_ident_col=not group_by,
+    )
+    group_by = group_by or seurat_ident_col
+if group_by is None:
+    group_by = "seurat_clusters"
+    logger.warning(
+        "`envs.group_by` is not provided. "
+        "Using 'seurat_clusters' as the default groupby column. "
+        "It is recommended to provide the `envs.group_by` parameter."
     )
 logger.info(f"Reading AnnData (h5ad) file ...")

biopipen/scripts/scrna/SeuratClusterStats-clustree.R CHANGED Viewed

@@ -16,7 +16,7 @@ if (
             if (startsWith(key, "FindClusters") && length(srtobj@commands[[key]]$resolution) > 1) {
                 pref <- substring(key, 14)
                 if (pref == "") {
-                    pref <- "seurat_clusters"
+                    pref <- biopipen.utils::GetIdentityColumn(srtobj)
                 }
                 clustrees[[pref]] <- list(prefix = pref)

biopipen/scripts/scrna/SeuratClusterStats-features.R CHANGED Viewed

@@ -107,7 +107,12 @@ do_one_features <- function(name) {
         caching$restore()
     } else {
         case$features <- .get_features(features, case$object)
-        p <- do_call(gglogger::register(FeatureStatPlot), case)
+        p <- tryCatch({
+            do_call(gglogger::register(FeatureStatPlot), case)
+        }, error = function(e) {
+            if (save_code) { stop(e) }
+            do_call(FeatureStatPlot, case)
+        })
         save_plot(p, info$prefix, devpars, formats = c("png", more_formats))
         if (save_code) {
             save_plotcode(p, info$prefix,

biopipen/scripts/scrna/SeuratClustering.R CHANGED Viewed

@@ -1,4 +1,5 @@
+library(rlang)
 library(Seurat)
 library(biopipen.utils)
@@ -11,13 +12,16 @@ RunPCAArgs <- {{envs.RunPCA | r: todot="-"}}
 FindNeighborsArgs <- {{envs.FindNeighbors | r: todot="-"}}
 FindClustersArgs <- {{envs.FindClusters | r: todot="-"}}
 RunUMAPArgs <- {{envs.RunUMAP | r: todot="-"}}
+ident <- {{envs.ident | r }}
 cache <- {{envs.cache | r}}
 ncores <- {{envs.ncores | r}}
+FindClustersArgs$cluster.name <- FindClustersArgs$cluster.name %||% ident %||% "seurat_clusters"
 log <- get_logger()
 # options(str = strOptions(vec.len = 5, digits.d = 5))
-options(future.globals.maxSize = 80000 * 1024^2)
+options(future.globals.maxSize = Inf)
 plan(strategy = "multicore", workers = ncores)
 log$info("Reading Seurat object ...")

biopipen/scripts/scrna/SeuratMap2Ref.R CHANGED Viewed

@@ -25,7 +25,7 @@ plots = {{envs.plots | r}}
 log <- get_logger()
 reporter <- get_reporter()
-options(future.globals.maxSize = 8 * 1024 ^ 4)
+options(future.globals.maxSize = Inf)
 options(future.rng.onMisuse="ignore")
 options(Seurat.object.assay.version = "v5")
@@ -43,7 +43,6 @@ if (isTRUE(cache)) {
     cache = joboutdir
 }
 if (is.null(split_by)) {
-    options(future.globals.maxSize = 8 * 1024 ^ 4)
     future::plan(strategy = "multicore", workers = ncores)
 }

biopipen/scripts/scrna/SeuratPreparing.R CHANGED Viewed

@@ -17,7 +17,7 @@ reporter <- get_reporter()
 set.seed(8525)
 # 8TB
-options(future.globals.maxSize = 8 * 1024 ^ 4)
+options(future.globals.maxSize = Inf)
 options(future.rng.onMisuse="ignore")
 options(Seurat.object.assay.version = "v5")
 plan(strategy = "multicore", workers = envs$ncores)
@@ -38,19 +38,27 @@ reporter$add(
     h1 = "Filters and QC"
 )
-metadata <- read.table(
-    metafile,
-    header = TRUE,
-    row.names = NULL,
-    sep = "\t",
-    check.names = FALSE
-)
+metadata <- tryCatch({
+    log$debug("Trying to read Seurat object from metafile ...")
+    read_obj(metafile)
+}, error = function(e) {
+    log$debug("Failed to read Seurat object from metafile: {e$message}")
+    log$debug("Reading metafile as a table (sample info) ...")
+    read.table(
+        metafile,
+        header = TRUE,
+        row.names = NULL,
+        sep = "\t",
+        check.names = FALSE
+    )
+})
+is_seurat <- inherits(metadata, "Seurat")
-meta_cols = colnames(metadata)
+meta_cols <- if (is_seurat) colnames(metadata@meta.data) else colnames(metadata)
 if (!"Sample" %in% meta_cols) {
-    stop("Error: Column `Sample` is not found in metafile.")
+    stop("Error: Column `Sample` is not found in ", ifelse(is_seurat, "Seurat object's meta.data.", "metafile."))
 }
-if (!"RNAData" %in% meta_cols) {
+if (!"RNAData" %in% meta_cols && !is_seurat) {
     stop("Error: Column `RNAData` is not found in metafile.")
 }

biopipen/scripts/scrna/SeuratSubClustering.R CHANGED Viewed

@@ -17,7 +17,7 @@ FindNeighborsArgs <- {{envs.FindNeighbors | r: todot = "-"}}
 FindClustersArgs <- {{envs.FindClusters | r: todot = "-"}}
 cases <- {{envs.cases | r}}
-options(future.globals.maxSize = 80000 * 1024^2)
+options(future.globals.maxSize = Inf)
 plan(strategy = "multicore", workers = ncores)
 log <- get_logger()

biopipen/scripts/scrna/Slingshot.R CHANGED Viewed

@@ -16,16 +16,14 @@ align_start <- {{envs.align_start | r}}
 seed <- {{envs.seed | r}}
 set.seed(seed)
-if (is.null(group_by)) {
-    stop("envs.group_by is required")
-}
 log <- get_logger()
 log$info("Reading Seurat object ...")
 srt <- read_obj(sobjfile)
+group_by <- group_by %||% biopipen.utils::GetIdentityColumn(srt)
-if (!group_by %in% colnames(srt@meta.data)) {
+if (is.null(group_by) || !group_by %in% colnames(srt@meta.data)) {
     stop(paste("Grouping column", group_by, "not found in the Seurat object"))
 }

biopipen/scripts/scrna/TopExpressingGenes.R CHANGED Viewed

@@ -25,9 +25,6 @@ reporter <- get_reporter()
 log$info("Reading Seurat object ...")
 srtobj <- read_obj(srtfile)
-if (!"Identity" %in% colnames(srtobj@meta.data)) {
-    srtobj@meta.data$Identity <- Idents(srtobj)
-}
 assay <- DefaultAssay(srtobj)
 if (!is.null(mutaters) && length(mutaters) > 0) {
@@ -171,7 +168,7 @@ run_case <- function(name) {
     } else {
         subobj <- srtobj
     }
-    case$group_by <- case$group_by %||% "Identity"
+    case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
     if (is.null(case$ident)) {
         case$ident <- as.character(unique(subobj@meta.data[[case$group_by]]))
     }

biopipen 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.34.6py3-none-any.whl → 0.34.26py3-none-any.whl