PyPI - biopipen - Versions diffs - 0.34.7__py3-none-any.whl → 0.34.8__py3-none-any.whl - Mend

biopipen 0.34.7py3-none-any.whl → 0.34.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (9) hide show

biopipen/__init__.py +1 -1
biopipen/ns/scrna.py +6 -0
biopipen/scripts/scrna/MarkersFinder.R +28 -18
biopipen/scripts/scrna/PseudoBulkDEG.R +39 -21
biopipen/scripts/scrna/ScFGSEA.R +18 -22
{biopipen-0.34.7.dist-info → biopipen-0.34.8.dist-info}/METADATA +1 -1
{biopipen-0.34.7.dist-info → biopipen-0.34.8.dist-info}/RECORD +9 -9
{biopipen-0.34.7.dist-info → biopipen-0.34.8.dist-info}/WHEEL +0 -0
{biopipen-0.34.7.dist-info → biopipen-0.34.8.dist-info}/entry_points.txt +0 -0

biopipen/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.34.7"
1	+ __version__ = "0.34.8"

biopipen/ns/scrna.py CHANGED Viewed

@@ -2706,6 +2706,7 @@ class PseudoBulkDEG(Proc):
             analysis.
     Envs:
+        ncores (type=int): Number of cores to use for parallelization.
         mutaters (type=json): Mutaters to mutate the metadata of the
             seurat object. Keys are the new column names and values are the
             expressions to mutate the columns. These new columns can be
@@ -2715,6 +2716,9 @@ class PseudoBulkDEG(Proc):
         each: The column name in metadata to separate the cells into different cases.
             When specified, the case will be expanded to multiple cases for
             each value in the column.
+        cache (type=auto): Where to cache the results.
+            If `True`, cache to `outdir` of the job. If `False`, don't cache.
+            Otherwise, specify the directory to cache to.
         subset: An expression in string to subset the cells.
         aggregate_by: The column names in metadata to aggregate the cells.
         layer: The layer to pull and aggregate the data.
@@ -2844,7 +2848,9 @@ class PseudoBulkDEG(Proc):
     lang = config.lang.rscript
     script = "file://../scripts/scrna/PseudoBulkDEG.R"
     envs = {
+        "ncores": config.misc.ncores,
         "mutaters": {},
+        "cache": config.path.tmpdir,
         "each": None,
         "subset": None,
         "aggregate_by": None,

biopipen/scripts/scrna/MarkersFinder.R CHANGED Viewed

@@ -268,20 +268,22 @@ process_markers <- function(markers, info, case) {
         ui = "tabs"
     )
-    for (plotname in names(case$marker_plots)) {
-        plotargs <- case$marker_plots[[plotname]]
-        plotargs$degs <- markers
-        rownames(plotargs$degs) <- make.unique(markers$gene)
-        plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
-        do_call(VizDEGs, plotargs)
-        reporter$add2(
-            list(
-                name = plotname,
-                contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
-            hs = c(info$section, info$name),
-            hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
-            ui = "tabs"
-        )
+    if (nrow(markers) > 0) {
+        for (plotname in names(case$marker_plots)) {
+            plotargs <- case$marker_plots[[plotname]]
+            plotargs$degs <- markers
+            rownames(plotargs$degs) <- make.unique(markers$gene)
+            plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
+            do_call(VizDEGs, plotargs)
+            reporter$add2(
+                list(
+                    name = plotname,
+                    contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
+                hs = c(info$section, info$name),
+                hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
+                ui = "tabs"
+            )
+        }
     }
     # Do enrichment analysis
@@ -399,6 +401,10 @@ process_allmarkers <- function(markers, plotcases, casename, groupname) {
         plotargs <- plotcases[[plotname]]
         plotargs$degs <- markers
         plotargs$outprefix <- file.path(info$prefix, slugify(plotname))
+        if (identical(plotargs$plot_type, "heatmap")) {
+            plotargs$show_row_names = plotargs$show_row_names %||% TRUE
+            plotargs$show_column_names = plotargs$show_column_names %||% TRUE
+        }
         do_call(VizDEGs, plotargs)
         reporter$add2(
             list(
@@ -547,7 +553,9 @@ run_case <- function(name) {
                 attr(markers, "group_by") <- each
                 attr(markers, "ident_1") <- NULL
                 attr(markers, "ident_2") <- NULL
-                process_allmarkers(markers, allmarker_plots, name, each)
+                if (!is.null(markers) && nrow(markers) > 0) {
+                    process_allmarkers(markers, allmarker_plots, name, each)
+                }
             }
             if (length(overlaps) > 0) {
@@ -557,7 +565,7 @@ run_case <- function(name) {
         }
-        if (!is.null(enriches)) {
+        if (!is.null(enriches) && length(enriches) > 0) {
             log$info("- Summarizing enrichments in subcases (by each: {each}) ...")
             if (!is.data.frame(enriches)) {
                 each_levels <- names(enriches)
@@ -573,7 +581,7 @@ run_case <- function(name) {
                 enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
             }
-            if (length(allenrich_plots) > 0) {
+            if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
                 log$info("- Visualizing all enrichments together ...")
                 process_allenriches(enriches, allenrich_plots, name, each)
             }
@@ -636,7 +644,9 @@ run_case <- function(name) {
         ))
         if (!is.null(original_case) && !is.null(cases[[original_case]])) {
-            markers[[each_name]] <- each
+            if (nrow(markers) > 0) {
+                markers[[each_name]] <- each
+            }
             cases[[original_case]]$markers[[each]] <<- markers
             cases[[original_case]]$enriches[[each]] <<- enrich
         }

biopipen/scripts/scrna/PseudoBulkDEG.R CHANGED Viewed

@@ -8,7 +8,9 @@ outdir <- {{out.outdir | r}}
 joboutdir <- {{job.outdir | r}}
 each <- {{envs.each | r}}
 subset <- {{envs.subset | r}}
+ncores <- {{envs.ncores | r}}
 mutaters <- {{envs.mutaters | r}}
+cache <- {{ envs.cache | r }}
 aggregate_by <- {{envs.aggregate_by | r}}
 layer <- {{envs.layer | r}}
 assay <- {{envs.assay | r}}
@@ -35,6 +37,7 @@ overlaps <- {{ envs.overlaps | r }}
 cases <- {{envs.cases | r}}
 aggregate_by <- unique(c(aggregate_by, group_by, paired_by, each))
+if (isTRUE(cache)) { cache <- joboutdir }
 log <- get_logger()
 reporter <- get_reporter()
@@ -74,10 +77,12 @@ defaults <- list(
     ident_1 = ident_1,
     ident_2 = ident_2,
     dbs = dbs,
+    ncores = ncores,
     sigmarkers = sigmarkers,
     enrich_style = enrich_style,
     paired_by = paired_by,
     tool = tool,
+    cache = cache,
     allmarker_plots_defaults = allmarker_plots_defaults,
     allmarker_plots = allmarker_plots,
     allenrich_plots_defaults = allenrich_plots_defaults,
@@ -181,6 +186,7 @@ expand_each <- function(name, case) {
         if (length(case$overlaps) > 0 || length(case$allmarker_plots) > 0 || length(case$allenrich_plots) > 0) {
             ovcase <- case
+            ovcase$allexprs <- list()
             ovcase$markers <- list()
             ovcase$allmarker_plots <- lapply(
                 ovcase$allmarker_plots,
@@ -533,18 +539,21 @@ run_case <- function(name) {
         "dbs", "sigmarkers", "allmarker_plots", "allenrich_plots", "marker_plots", "enrich_plots",
         "overlaps", "original_case", "markers", "enriches", "each_name", "each", "enrich_style",
         "aggregate_by", "subset", "layer", "assay", "group_by", "ident_1", "ident_2", "original_subset",
-        "paired_by", "tool", "error",
+        "paired_by", "tool", "error", "ncores", "cache", "allexprs",
         allow_nonexisting = TRUE
     )
     if (!is.null(markers) || !is.null(enriches)) {
-        if (!is.null(markers)) {  # It is the overlap/allmarker case
-            log$info("- Summarizing DEGs in subcases (by each: {each}) ...")
+        if (!is.null(markers) && length(markers) > 0) {
+            log$info("Summarizing DEGs in subcases (by each: {each}) ...")
             # handle the overlaps / allmarkers analysis here
             if (!is.data.frame(markers)) {
                 each_levels <- names(markers)
                 markers <- do_call(rbind, lapply(each_levels, function(x) {
                     markers_df <- markers[[x]]
+                    if (is.null(markers_df) || nrow(markers_df) == 0) {
+                        return(NULL)
+                    }
                     if (nrow(markers_df) > 0) {
                         markers_df[[each]] <- x
                     } else {
@@ -556,17 +565,17 @@ run_case <- function(name) {
             }
             # gene, p_val, avg_log2FC, pct.1, pct.2, p_val_adj, diff_pct, <each>
+            if (!is.data.frame(allexprs)) {
+                meta <- do_call(rbind, lapply(allexprs, attr, "meta"))
+                allexprs <- do_call(cbind, allexprs)
+            } else {
+                meta <- attr(allexprs, "meta")
+            }
             if (length(allmarker_plots) > 0) {
-                log$info("- Visualizing all DEGs together ...")
-                exprs <- AggregateExpressionPseudobulk(
-                    srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
-                    subset = original_subset, log = log
-                )
-                attr(markers, "object") <- AggregateExpressionPseudobulk(
-                    srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
-                    subset = original_subset, log = log
-                )
-                attr(markers, "meta") <- attr(exprs, "meta")
+                log$info("Visualizing all DEGs together ...")
+                attr(markers, "object") <- allexprs
+                attr(markers, "meta") <- meta
                 attr(markers, "group_by") <- each
                 attr(markers, "paired_by") <- paired_by
                 attr(markers, "ident_1") <- NULL
@@ -575,18 +584,21 @@ run_case <- function(name) {
             }
             if (length(overlaps) > 0) {
-                log$info("- Visualizing overlaps between subcases ...")
+                log$info("Visualizing overlaps between subcases ...")
                 process_overlaps(markers, overlaps, name, each)
             }
         }
-        if (!is.null(enriches)) {
-            log$info("- Summarizing enrichments in subcases (by each: {each}) ...")
+        if (!is.null(enriches) && length(enriches) > 0) {
+            log$info("Summarizing enrichments in subcases (by each: {each}) ...")
             if (!is.data.frame(enriches)) {
                 each_levels <- names(enriches)
                 enriches <- do_call(rbind, lapply(each_levels, function(x) {
                     enrich_df <- enriches[[x]]
+                    if (is.null(enrich_df) || nrow(enrich_df) == 0) {
+                        return(NULL)
+                    }
                     if (nrow(enrich_df) > 0) {
                         enrich_df[[each]] <- x
                     } else {
@@ -594,11 +606,13 @@ run_case <- function(name) {
                     }
                     enrich_df
                 }))
-                enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
+                if (!is.null(enriches) && nrow(enriches) > 0) {
+                    enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
+                }
             }
-            if (length(allenrich_plots) > 0) {
-                log$info("- Visualizing all enrichments together ...")
+            if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
+                log$info("Visualizing all enrichments together ...")
                 process_allenriches(enriches, allenrich_plots, name, each)
             }
         }
@@ -615,7 +629,8 @@ run_case <- function(name) {
         {
             RunDEGAnalysis(
                 exprs, group_by = group_by, ident_1 = ident_1, ident_2 = ident_2,
-                paired_by = paired_by, tool = tool, log = log
+                paired_by = paired_by, tool = tool, log = log, ncores = ncores,
+                cache = cache
             )
         }, error = function(e) {
             if (error) {
@@ -646,9 +661,12 @@ run_case <- function(name) {
     ))
     if (!is.null(original_case) && !is.null(cases[[original_case]])) {
-        markers[[each_name]] <- each
+        if (!is.null(markers)) {
+            markers[[each_name]] <- each
+        }
         cases[[original_case]]$markers[[each]] <<- markers
         cases[[original_case]]$enriches[[each]] <<- enrich
+        cases[[original_case]]$allexprs[[each]] <<- exprs
     }
     invisible()

biopipen/scripts/scrna/ScFGSEA.R CHANGED Viewed

@@ -82,13 +82,13 @@ expand_each <- function(name, case) {
         }
         if (length(cases) == 0 && name == "GSEA") {
-            name <- case$each
+            prefix <- case$each
         } else {
-            name <- paste0(name, " (", case$each, ")")
+            prefix <- paste0(name, " (", case$each, ")")
         }
         for (each in eachs) {
-            newname <- paste0(name, "::", each)
+            newname <- paste0(prefix, "::", each)
             newcase <- case
             newcase$original_case <- paste0(name, " (all ", case$each,")")
@@ -144,6 +144,11 @@ do_case <- function(name) {
     if (!is.null(case$gseas)) {
+        if (length(case$gseas) == 0) {
+            log$warn("  No GSEA results found for case {name}. Skipping.")
+            return(invisible(NULL))
+        }
         each_levels <- names(case$gseas)
         gseas <- do_call(rbind, lapply(each_levels, function(x) {
             gsea_df <- case$gseas[[x]]
@@ -242,25 +247,16 @@ do_case <- function(name) {
         quote = FALSE
     )
     if (all(is.na(ranks))) {
-        if (length(allclasses) < 100) {
-            log$warn("  Ignoring this case because all gene ranks are NA and there are <100 cells.")
-            reporter$add2(
-                list(
-                    kind = "error",
-                    content = paste0("Not enough cells (n = ", length(allclasses), ") to run fgsea.")
-                ),
-                hs = c(info$section, info$name)
-            )
-            return(NULL)
-        } else {
-            stop(paste0(
-                "All gene ranks are NA (# cells = ",
-                length(allclasses),
-                "). ",
-                "It's probably due to high missing rate in the data. ",
-                "You may want to try a different `envs$method` for pre-ranking."
-            ))
-        }
+        log$warn("  All gene ranks are NA. It's probably due to high missing rate in the data.")
+        log$warn("  Case ignored, you may also try a different ranking method.")
+        reporter$add2(
+            list(
+                kind = "error",
+                content = "All gene ranks are NA. It's probably due to high missing rate in the data."
+            ),
+            hs = c(info$section, info$name)
+        )
+        return(invisible(NULL))
     }
     # run fgsea

{biopipen-0.34.7.dist-info → biopipen-0.34.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: biopipen
-Version: 0.34.7
+Version: 0.34.8
 Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
 License: MIT
 Author: pwwang

{biopipen-0.34.7.dist-info → biopipen-0.34.8.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-biopipen/__init__.py,sha256=vVRUKRt0zUNKxfGQQE5WrQiVWQ-bg4UrgyEGX7LclcA,23
+biopipen/__init__.py,sha256=R2kKZIbRrNhrmdllokG5_J0gtqktdwFV00CiImolksE,23
 biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
 biopipen/core/config.toml,sha256=lZV_vbYWk6uqm19ZWJcsZCcSNqAdIfN2fOfamzxZpg4,2148
@@ -22,7 +22,7 @@ biopipen/ns/plot.py,sha256=N41_izb6zi-XArUly5WhLebapNXbTNSgGlOCCwtrDlY,18282
 biopipen/ns/protein.py,sha256=YJtlKoHI2p5yHdxKeQnNtm5QrbxDGOq1UXOdt_7tlTs,6391
 biopipen/ns/regulatory.py,sha256=gJjGVpJrdv-rg2t5UjK4AGuvtLNymaNYNvoD8PhlbvE,15929
 biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
-biopipen/ns/scrna.py,sha256=ELhCbY2Vu8qHmDHlrI32gyaOxDO2ugFLz4WIV9kARfQ,144750
+biopipen/ns/scrna.py,sha256=4dqgsj1cQGFWsJbrbdVF6ElaELmLBg76RlqqGmsW1iA,145087
 biopipen/ns/scrna_metabolic_landscape.py,sha256=EwLMrsj_pTqvyAgtHLoishjQxCg_j8n5OofuTofUph0,22096
 biopipen/ns/snp.py,sha256=iXWrw7Lmhf4_ct57HGT7JGTClCXUD4sZ2FzOgsC2pTg,28123
 biopipen/ns/stats.py,sha256=DlPyK5Vsg6ZEkV9SDS3aAw21eXzvOHgqeZDkXPhg7go,20509
@@ -155,13 +155,13 @@ biopipen/scripts/scrna/ExprImputation-rmagic.R,sha256=ePgbMZ_3bKbeUrjsMdkdtBM_MS
 biopipen/scripts/scrna/ExprImputation-scimpute.R,sha256=MI_bYfvCDKJsuGntUxfx_-NdrssBoQgL95-DGwJVE5s,1191
 biopipen/scripts/scrna/ExprImputation.R,sha256=GcdZJpkDpq88hRQjtLZY5-byp8V43stEFm5T-pQbU6A,319
 biopipen/scripts/scrna/LoomTo10X.R,sha256=c6F0p1udsL5UOlb84-53K5BsjSDWkdFyYTt5NQmlIec,1059
-biopipen/scripts/scrna/MarkersFinder.R,sha256=A-YCJ2WogU2QR8PqVn71lXCP63Vq1sMyAAIhqZYYawg,24278
+biopipen/scripts/scrna/MarkersFinder.R,sha256=-W0rwS4IlR6DsY9gzGBgBN3aYD_7Tyseg2e8mxM6xA0,24796
 biopipen/scripts/scrna/MetaMarkers.R,sha256=BgYaWYEj6obwqaZaDWqNPtxb1IEEAnXAeBE0Ji9PvBA,12426
 biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256=-tByCPk7i070LynAb0z2ANeRxr1QqiKP0dfrJm52jH4,4198
-biopipen/scripts/scrna/PseudoBulkDEG.R,sha256=Y5OuVCaIIppBqMxxXM3HpJQk5kA42wSgbBBIC1Rr1s0,24608
+biopipen/scripts/scrna/PseudoBulkDEG.R,sha256=IuM4hl-KHZ5aaaTqZeylw4b1ZenMZaY4qobD5qxAlHs,25199
 biopipen/scripts/scrna/RadarPlots.R,sha256=Kn1E-hpczuujpgNjR8MqeIIVN-S3PbpmfcKWGKcNCVY,14546
 biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtqONbk,2958
-biopipen/scripts/scrna/ScFGSEA.R,sha256=EyRbsH5d1daIxtOHjYz24Udmv1PhV0nUC9HqEtzRnpE,11584
+biopipen/scripts/scrna/ScFGSEA.R,sha256=Q3_fmVy1OGan_EHo6EHgoHa6Zgfl_i0wUv_KrwammCo,11440
 biopipen/scripts/scrna/ScSimulation.R,sha256=q0-dXD9px1cApc_TxGmR-OdNHE8W1VSVWfSI57B96bo,1697
 biopipen/scripts/scrna/ScVelo.py,sha256=SPUZFgZW1Zhw-bnjJo98RK0vpuNFODQ8Q3eTguNc84k,21359
 biopipen/scripts/scrna/Seurat2AnnData.R,sha256=F8g5n2CqX4-KBggxd8ittz8TejYuqqNLMudAHdFt1QM,184
@@ -284,7 +284,7 @@ biopipen/utils/misc.py,sha256=pDZ-INWVNqHuXYvcjmu8KqNAigkh2lsHy0BxX44CPvc,4048
 biopipen/utils/reference.py,sha256=Oc6IlA1giLxymAuI7DO-IQLHQ7-DbsWzOQE86oTDfMU,5955
 biopipen/utils/reporter.py,sha256=VwLl6xyVDWnGY7NEXyqBlkW8expKJoNQ5iTyZSELf5c,4922
 biopipen/utils/vcf.py,sha256=MmMbAtLUcKPp02jUdk9TzuET2gWSeoWn7xgoOXFysK0,9393
-biopipen-0.34.7.dist-info/METADATA,sha256=uR3Q2oygeFSoT96Lp0wQGCcigCJGaIyzbYzdJ2wlWVw,975
-biopipen-0.34.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-biopipen-0.34.7.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
-biopipen-0.34.7.dist-info/RECORD,,
+biopipen-0.34.8.dist-info/METADATA,sha256=uEU4Tv61Ui1QMIK5aUfQJIu4beg4mYyWwhfZ2IQ9M04,975
+biopipen-0.34.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+biopipen-0.34.8.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
+biopipen-0.34.8.dist-info/RECORD,,

{biopipen-0.34.7.dist-info → biopipen-0.34.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{biopipen-0.34.7.dist-info → biopipen-0.34.8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

biopipen 0.34.7__py3-none-any.whl → 0.34.8__py3-none-any.whl

Potentially problematic release.

biopipen 0.34.7py3-none-any.whl → 0.34.8py3-none-any.whl