PyPI - biopipen - Versions diffs - 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl - Mend

biopipen 0.32.1py3-none-any.whl → 0.33.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (134) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +6 -0
biopipen/core/filters.py +77 -26
biopipen/core/testing.py +6 -1
biopipen/ns/bam.py +39 -0
biopipen/ns/cellranger.py +5 -0
biopipen/ns/cellranger_pipeline.py +2 -2
biopipen/ns/cnvkit_pipeline.py +4 -1
biopipen/ns/delim.py +33 -27
biopipen/ns/protein.py +99 -0
biopipen/ns/scrna.py +411 -250
biopipen/ns/snp.py +16 -3
biopipen/ns/tcr.py +125 -1
biopipen/ns/vcf.py +34 -0
biopipen/ns/web.py +5 -1
biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
biopipen/reports/tcr/ClonalStats.svelte +15 -0
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +2 -2
biopipen/scripts/bam/BamSampling.py +4 -4
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +3 -3
biopipen/scripts/bam/CNVpytor.py +10 -10
biopipen/scripts/bam/ControlFREEC.py +11 -11
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +20 -9
biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/SampleInfo.R +85 -139
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +4 -4
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifScan.py +8 -8
biopipen/scripts/scrna/CellCellCommunication.py +59 -22
biopipen/scripts/scrna/CellsDistribution.R +31 -6
biopipen/scripts/scrna/MarkersFinder.R +272 -602
biopipen/scripts/scrna/MetaMarkers.R +16 -7
biopipen/scripts/scrna/RadarPlots.R +75 -35
biopipen/scripts/scrna/SCP-plot.R +15202 -0
biopipen/scripts/scrna/ScVelo.py +0 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -25
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -47
biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -385
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +33 -13
biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -228
biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
biopipen/scripts/scrna/SeuratMap2Ref.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +138 -81
biopipen/scripts/scrna/SlingShot.R +71 -0
biopipen/scripts/scrna/TopExpressingGenes.R +9 -7
biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
biopipen/scripts/snp/Plink2GTMat.py +26 -11
biopipen/scripts/snp/PlinkFilter.py +7 -7
biopipen/scripts/snp/PlinkFromVcf.py +8 -5
biopipen/scripts/snp/PlinkSimulation.py +4 -4
biopipen/scripts/snp/PlinkUpdateName.py +4 -4
biopipen/scripts/stats/ChowTest.R +48 -22
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +12 -2
biopipen/scripts/tcr/ClonalStats.R +484 -0
biopipen/scripts/tcr/CloneResidency.R +23 -5
biopipen/scripts/tcr/Immunarch-basic.R +8 -1
biopipen/scripts/tcr/Immunarch-clonality.R +5 -0
biopipen/scripts/tcr/Immunarch-diversity.R +25 -4
biopipen/scripts/tcr/Immunarch-geneusage.R +15 -1
biopipen/scripts/tcr/Immunarch-kmer.R +14 -1
biopipen/scripts/tcr/Immunarch-overlap.R +15 -1
biopipen/scripts/tcr/Immunarch-spectratyping.R +10 -1
biopipen/scripts/tcr/Immunarch-tracking.R +6 -0
biopipen/scripts/tcr/Immunarch-vjjunc.R +33 -0
biopipen/scripts/tcr/ScRepLoading.R +127 -0
biopipen/scripts/tcr/TCRClusterStats.R +24 -7
biopipen/scripts/tcr/TCRDock.py +10 -6
biopipen/scripts/tcr/TESSA.R +6 -1
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +4 -4
biopipen/scripts/vcf/BcftoolsView.py +5 -5
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +12 -3
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +3 -3
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
biopipen/scripts/web/gcloud_common.py +1 -1
biopipen/utils/gsea.R +96 -42
biopipen/utils/misc.R +205 -7
biopipen/utils/misc.py +17 -8
biopipen/utils/plot.R +53 -17
biopipen/utils/reference.py +11 -11
biopipen/utils/repr.R +146 -0
biopipen/utils/vcf.py +1 -1
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/METADATA +9 -9
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/RECORD +131 -122
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -139
biopipen/scripts/scrna/SeuratPreparing-common.R +0 -452
biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -201
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0

biopipen/scripts/scrna/MarkersFinder.R CHANGED Viewed

@@ -1,23 +1,11 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
-{{ biopipen_dir | joinpaths: "utils", "caching.R" | source_r }}
-{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
 library(rlang)
 library(dplyr)
-library(tidyr)
-library(tibble)
 library(Seurat)
-library(enrichR)
-library(ggplot2)
-library(ggprism)
-library(ggrepel)
-library(future)
-library(tidyseurat)
-library(ggVennDiagram)
-library(UpSetR)
-log_info("Setting up EnrichR ...")
-setEnrichrSite("Enrichr")
+library(plotthis)
+library(biopipen.utils)
+log <- get_logger()
+reporter <- get_reporter()
 srtfile <- {{ in.srtobj | quote }}
 outdir <- {{ out.outdir | quote }}
@@ -30,58 +18,37 @@ group.by <- {{ envs["group-by"] | r }}
 each <- {{ envs.each | r }}
 prefix_each <- {{ envs.prefix_each | r }}
 prefix_group <- {{ envs.prefix_group | r }}
-section <- {{ envs.section | r }}
-dbs <- {{ envs.dbs | r }}
 assay <- {{ envs.assay | r }}
-sigmarkers <- {{ envs.sigmarkers | r }}
-volcano_genes <- {{ envs.volcano_genes | r }}
 subset <- {{ envs.subset | r }}
+error <- {{ envs.error | r }}
+site <- {{ envs.site | r }}
 rest <- {{ envs.rest | r: todot="-" }}
-dotplot <- {{ envs.dotplot | r: todot="-" }}
-cases <- {{ envs.cases | r: todot="-", skip=1 }}
-overlapping_defaults <- {{ envs.overlap_defaults | r }}
-overlapping <- {{ envs.overlap | r }}
+dbs <- {{ envs.dbs | r }}
+sigmarkers <- {{ envs.sigmarkers | r }}
 cache <- {{ envs.cache | r }}
+allmarker_plots_defaults <- {{ envs.allmarker_plots_defaults | r }}
+allmarker_plots <- {{ envs.allmarker_plots | r }}
+marker_plots_defaults <- {{ envs.marker_plots_defaults | r }}
+marker_plots <- {{ envs.marker_plots | r }}
+enrich_plots_defaults <- {{ envs.enrich_plots_defaults | r }}
+enrich_plots <- {{ envs.enrich_plots | r }}
+cases <- {{ envs.cases | r: todot="-", skip=1 }}
+overlaps_defaults <- {{ envs.overlaps_defaults | r }}
+overlaps <- {{ envs.overlaps | r }}
 if (isTRUE(cache)) { cache <- joboutdir }
-# expand overlapping
-for (sec in names(overlapping)) {
-    overlapping[[sec]] <- list_update(overlapping_defaults, overlapping[[sec]])
-}
-overlapping_sections <- names(overlapping)
-overlaps <- list()
-if (is.character(volcano_genes) && length(volcano_genes) == 1) {
-    volcano_genes <- trimws(strsplit(volcano_genes, ",")[[1]])
-}
 set.seed(8525)
 if (ncores > 1) {
     options(future.globals.maxSize = 80000 * 1024^2)
     plan(strategy = "multicore", workers = ncores)
 }
-log_info("- Reading Seurat object ...")
+log$info("Reading Seurat object ...")
 srtobj <- readRDS(srtfile)
-defassay <- DefaultAssay(srtobj)
-if (defassay == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@commands)) {
-    log_warn("  SCTransform used but PrepSCTFindMarkers not applied, running ...")
-    srtobj <- PrepSCTFindMarkers(srtobj)
-    # compose a new SeuratCommand to record it to srtobj@commands
-    commands <- names(pbmc_small@commands)
-    scommand <- pbmc_small@commands[[commands[length(commands)]]]
-    scommand@name <- "PrepSCTFindMarkers"
-    scommand@time.stamp <- Sys.time()
-    scommand@assay.used <- "SCT"
-    scommand@call.string <- "PrepSCTFindMarkers(object = srtobj)"
-    scommand@params <- list()
-    srtobj@commands$PrepSCTFindMarkers <- scommand
-}
 if (!is.null(mutaters) && length(mutaters) > 0) {
-    log_info("- Mutating meta data ...")
+    log$info("Mutating meta data ...")
     srtobj@meta.data <- srtobj@meta.data %>%
         mutate(!!!lapply(mutaters, parse_expr))
 }
@@ -93,614 +60,317 @@ defaults <- list(
     each = each,
     prefix_each = prefix_each,
     prefix_group = prefix_group,
-    section = section,
     dbs = dbs,
-    assay = assay %||% defassay,
+    assay = assay %||% DefaultAssay(srtobj),
     subset = subset,
+    error = error,
+    site = site,
     sigmarkers = sigmarkers,
-    volcano_genes = volcano_genes,
-    dotplot = dotplot,
+    allmarker_plots = allmarker_plots,
+    marker_plots = marker_plots,
+    enrich_plots = enrich_plots,
+    cache = cache,
     rest = rest
 )
-expand_each <- function(name, case) {
+log$info("Expanding cases ...")
+post_casing <- function(name, case) {
     outcases <- list()
-    no_each <- is.null(case$each) || nchar(case$each) == 0
-    if (no_each && !is.null(case$ident.1)) {
+    no_each <- is.null(case$each) || is.na(case$each) || nchar(case$each) == 0
+    if (no_each) {
         # single cases, no need to expand
-        if (is.null(case$section) || case$section == "DEFAULT") {
-            outcases[[name]] <- case
+        case$allmarker_plots <- lapply(
+            case$allmarker_plots,
+            function(x) { list_update(allmarker_plots_defaults, x) }
+        )
+        case$marker_plots <- lapply(
+            case$marker_plots,
+            function(x) { list_update(marker_plots_defaults, x) }
+        )
+        case$enrich_plots <- lapply(
+            case$enrich_plots,
+            function(x) { list_update(enrich_plots_defaults, x) }
+        )
+        outcases[[name]] <- case
+    } else {  # !no_each
+        if (!is.null(case$subset)) {
+            sobj <- srtobj %>% filter(!!parse_expr(case$subset))
         } else {
-            outcases[[paste0(case$section, "::", name)]] <- case
-        }
-    } else {  # !no_each || is.null(case$ident.1)
-        if (!is.null(case$section) && case$section != "DEFAULT") {
-            log_warn("  Ignoring `section` in case `{name}` that will be expanded (`each` is set or `ident-1` is not set).")
-            case$section <- NULL
+            sobj <- srtobj
         }
-        if (no_each) {  # is.null(ident.1)
-            # no each and no ident.1, use FindAllMarkers
-            key <- paste0(name, "::", name)
-            outcases[[key]] <- case
-            outcases[[key]]$section <- name
-            outcases[[key]]$findall <- TRUE
-        } else if (!no_each) {
-            # expand each
-            if (is.null(case$subset)) {
-                eachs <- srtobj@meta.data %>%
-                    pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
+        eachs <- sobj@meta.data %>% pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
+        case_1 <- case
+        for (each in eachs) {
+            each_name <- ifelse(case_1$prefix_each, paste0(case_1$each, " - ", each), each)
+            if (!is.null(case_1$ident.1)) {
+                # Make name a section
+                key <- paste0(name, "::", each_name)
             } else {
-                eachs <- srtobj@meta.data %>% dplyr::filter(!!parse_expr(case$subset)) %>%
-                    pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
+                key <- paste0(name, ": ", each_name)
             }
-            for (each in eachs) {
-                by <- make.names(paste0("..", name, "_", case$each,"_", each))
-                srtobj@meta.data <<- srtobj@meta.data %>% mutate(
-                    !!sym(by) := if_else(
-                        !!sym(case$each) == each,
-                        !!sym(case$group.by),
-                        NA
-                    )
-                )
-                if (isTRUE(case$prefix_each)) {
-                    key <- paste0(name, "::", case$each, " - ", each)
-                } else {
-                    key <- paste0(name, "::", each)
-                }
-                outcases[[key]] <- case
-                outcases[[key]]$section <- name
-                outcases[[key]]$group.by <- by
-                if (is.null(case$ident.1)) {
-                    outcases[[key]]$findall <- TRUE
-                }
+            if (!is.null(case$subset)) {
+                case_1$subset <- paste0(case$subset, " & `", case_1$each, "` == '", each, "'")
+            } else {
+                case_1$subset <- paste0("`", case_1$each, "` == '", each, "'")
             }
+            case_1$allmarker_plots <- lapply(
+                case_1$allmarker_plots,
+                function(x) { list_update(allmarker_plots_defaults, x) }
+            )
+            case_1$marker_plots <- lapply(
+                case_1$marker_plots,
+                function(x) { list_update(marker_plots_defaults, x) }
+            )
+            case_1$enrich_plots <- lapply(
+                case_1$enrich_plots,
+                function(x) { list_update(enrich_plots_defaults, x) }
+            )
+            outcases[[key]] <- case_1
         }
     }
     outcases
 }
-log_info("- Expanding cases ...")
-cases <- expand_cases(cases, defaults, expand_each)
-plot_volcano = function(markers, volfile, sig, volgenes) {
-    # markers
-    #                  gene        p_val avg_log2FC pct.1 pct.2    p_val_adj
-    # 1            CCL5 1.883596e-11 -4.8282535 0.359 0.927 4.332270e-09
-    # 2        HLA-DQB1 3.667713e-09  6.1543174 0.718 0.098 8.435740e-07
-    # 3        HLA-DRB5 1.242993e-07  3.9032231 0.744 0.195 2.858885e-05
-    # 4           CD79B 2.036731e-07  4.2748835 0.692 0.146 4.684482e-05
-    log_info("  Plotting volcano plot ...")
-    markers = markers %>%
-        mutate(
-            Significant = if_else(
-                !!parse_expr(sig),
-                if_else(avg_log2FC > 0, "Up", "Down"),
-                "No"
-            ),
-            Label = if_else(
-                Significant != "No" & (isTRUE(volgenes) | (gene %in% volgenes)),
-                gene,
-                ""
-            )
-        )
-    p_vol = ggplot(markers, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
-        geom_point(aes(color = Significant), alpha = 0.75) +
-        scale_color_manual(
-            values = c(Up = "#FF3333", Down = "#3333FF", No = "#AAAAAA"),
-            labels = c(Up = "Up", Down = "Down", No = "Non-Significant")
-        ) +
-        geom_text_repel(
-            aes(label = Label),
-            size = 3,
-            color = "#000000",
-            box.padding = unit(0.35, "lines"),
-            point.padding = unit(0.5, "lines"),
-            segment.color = "#000000"
-        ) +
-        theme_prism() +
-        theme(legend.title=element_blank(), plot.margin=unit(c(1,1,1,1), "cm")) +
-        labs(
-            x = "log2 Fold Change",
-            y = "-log10 Adjusted P-value"
-        )
-    png(volfile, res = 100, height = 1200, width = 900)
-    print(p_vol)
-    dev.off()
-}
-# Do enrichment analysis for a case using Enrichr
-# Args:
-#   case: case name
-#   markers: markers dataframe
-#   sig: The expression to filter significant markers
-do_enrich <- function(info, markers, sig, volgenes) {
-    log_info("  Running enrichment for case: {info$casename}")
-    if (nrow(markers) == 0) {
-        log_warn("  No markers found for case: {info$casename}")
-        return(NULL)
-    }
-    plot_volcano(markers, file.path(info$casedir, "volcano.png"), sig, volgenes)
-    markers_sig <- markers %>% filter(!!parse_expr(sig)) %>% arrange(p_val_adj)
-    if (nrow(markers_sig) == 0) {
-        log_warn("  No significant markers found.")
-        return(NULL)
-    }
-    write.table(
-        markers_sig,
-        file.path(info$casedir, "markers.txt"),
-        sep = "\t",
-        row.names = FALSE,
-        col.names = TRUE,
-        quote = FALSE
-    )
-    if (nrow(markers_sig) < 5) {
-        log_warn("  Too few significant markers found for case: {info$casename}")
-    } else {
-        enriched <- enrichr(unique(markers_sig$gene), dbs)
-        for (db in dbs) {
-            write.table(
-                enriched[[db]],
-                file.path(info$casedir, paste0("Enrichr-", db, ".txt")),
-                sep = "\t",
-                row.names = FALSE,
-                col.names = TRUE,
-                quote = FALSE
-            )
-            if (nrow(enriched[[db]]) == 0) {
-                log_warn("  No enrichment found for case: {info$casename} - {db}")
-                next
+cases <- expand_cases(cases, defaults, post_casing)
+# Checking the overlapping cases
+case_markers <- list()
+if (length(overlaps) > 0) {
+    log$info("Checking overlapping cases ...")
+    overlaps <- expand_cases(overlaps, overlaps_defaults)
+    for (ovname in names(overlaps)) {
+        ov <- overlaps[[ovname]]
+        # check the existence of the cases
+        for (case in ov$cases) {
+            if (is.null(cases[[case]])) {
+                stop(paste0("Case '", case, "' not found in the cases for overlapping case '", ovname, "'"))
             }
-            png(
-                file.path(info$casedir, paste0("Enrichr-", db, ".png")),
-                res = 100, height = 1000, width = 1000
-            )
-            print(
-                plotEnrich(enriched[[db]], showTerms = 20, title = db) +
-                theme_prism()
-            )
-            dev.off()
+        }
+        if (length(ov$cases) < 2) {
+            stop("Overlapping cases must have at least 2 cases for overlapping case '", ovname, "'")
+        }
+        for (case in ov$cases) {
+            case_markers[[case]] <- TRUE
+        }
+        if (identical(ov$venn$enabled, "auto")) {
+            overlaps[[ovname]]$venn$enabled <- length(ov$cases) <= 5
         }
     }
-    unique(markers_sig$gene)
 }
-do_dotplot <- function(info, siggenes, dotplot, args) {
-    max_dotplot_features <- dotplot$maxgenes %||% 20
-    dotplot$maxgenes <- NULL
-    if (length(siggenes) > max_dotplot_features) {
-        log_debug("  Too many significant markers ({length(siggenes)}), using first {max_dotplot_features} for dotplot")
-        siggenes <- siggenes[1:max_dotplot_features]
-    }
-    dotplot_devpars <- dotplot$devpars
-    dotplot$devpars <- NULL
-    dotplot$object <- args$object
-    dotplot$features <- siggenes
-    dotplot$group.by <- args$group.by
-    dotplot_width <- dotplot_devpars$width %||%
-        ifelse(length(siggenes) <= 20, length(siggenes) * 60, min(1000, length(siggenes)) * 30)
-    dotplot_height <- dotplot_devpars$height %||% 600
-    dotplot_res <- dotplot_devpars$res %||% 100
-    dotplot_file <- file.path(info$casedir, "dotplot.png")
-    png(dotplot_file, res = dotplot_res, width = dotplot_height, height = dotplot_width)
-    # rotate x axis labels
-    print(
-        do_call(DotPlot, dotplot) +
-        theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
-        coord_flip()
+log$info("Running cases ...")
+process_markers <- function(markers, info, case) {
+    # Save markers
+    write.table(markers, file.path(info$prefix, "markers.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
+    reporter$add2(
+        list(
+            name = "Table",
+            contents = list(list(kind = "table", src = file.path(info$prefix, "markers.tsv"), data = list(nrows = 100)))
+        ),
+        hs = c(info$section, info$name),
+        hs2 = "Markers",
+        ui = "tabs"
     )
-    dev.off()
-}
-add_case_report <- function(info, sigmarkers, siggenes) {
-    h1 = info$h1
-    h2 = info$h2
-    if (is.null(siggenes) || length(siggenes) == 0) {
-        add_report(
+    for (plotname in names(case$marker_plots)) {
+        plotargs <- case$marker_plots[[plotname]]
+        plotargs$degs <- markers
+        plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
+        do_call(VizDEGs, plotargs)
+        reporter$add2(
             list(
-                kind = "error",
-                content = "No significant markers found."
-            ),
-            h1 = h1,
-            h2 = ifelse(h2 == "#", "Markers", h2),
-            h3 = ifelse(h2 == "#", "#", "Markers"),
-            ui = "flat"
-        )
-    } else {
-        add_report(
-            list(
-                title = "Significant Markers",
-                ui = "flat",
-                contents = list(
-                    list(
-                        kind = "descr",
-                        content = paste0(
-                            "The markers are found using Seurat's FindMarkers function, ",
-                            "and filtered by: ",
-                            html_escape(sigmarkers)
-                        )
-                    ),
-                    list(
-                        kind = "table",
-                        data = list(nrows = 100),
-                        src = file.path(info$casedir, "markers.txt")
-                    )
-                )
-            ),
-            list(
-                title = "Volcano Plot",
-                ui = "flat",
-                contents = list(
-                    list(
-                        kind = "img",
-                        src = file.path(info$casedir, "volcano.png")
-                    )
-                )
-            ),
-            list(
-                title = "Dot Plot",
-                ui = "flat",
-                contents = list(
-                    list(
-                        kind = "img",
-                        src = file.path(info$casedir, "dotplot.png")
-                    )
-                )
-            ),
-            h1 = h1,
-            h2 = ifelse(h2 == "#", "Markers", h2),
-            h3 = ifelse(h2 == "#", "#", "Markers"),
+                name = plotname,
+                contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
+            hs = c(info$section, info$name),
+            hs2 = "Markers",
             ui = "tabs"
         )
+    }
-        add_report(
-            list(
-                kind = "descr",
-                content = paste0(
-                    "The enrichment analysis is done using Enrichr. ",
-                    "The significant markers are used as input. "
-                )
-            ),
+    # Do enrichment analysis
+    tryCatch({
+        enrich <- RunEnrichment(
+            markers, deg = case$sigmarkers, dbs = case$dbs, cache = case$cache,
+            error = TRUE, site = case$site)
+        write.table(enrich, file.path(info$prefix, "enrich.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
+        reporter$add2(
             list(
-                kind = "enrichr",
-                dir = info$casedir
+                name = "Table",
+                contents = list(list(kind = "table", src = file.path(info$prefix, "enrich.tsv"), data = list(nrows = 100)))
             ),
-            h1 = h1,
-            h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
-            h3 = ifelse(h2 == "#", "#", "Enrichment Analysis"),
-            ui = "flat"
+            hs = c(info$section, info$name),
+            hs2 = "Enrichment Analysis",
+            ui = "tabs"
         )
-    }
-}
-ensure_sobj <- function(expr, allow_empty) {
-    tryCatch({ expr }, error = function(e) {
-        if (allow_empty) {
-            log_warn("  Ignoring this case: {e$message}")
-            return(NULL)
-        } else {
-            stop(e)
-        }
-    })
-}
-do_case_findall <- function(casename) {
-    # casename
-    ## Cluster::Cluster
-    info <- casename_info(casename, cases, outdir, create = FALSE)
-    if (info$section %in% overlapping_sections) {
-        stop(paste0("  Can't do overlapping analysis for case without `ident-1` set: ", casename))
-    }
-    case <- cases[[casename]]
-    log_info("  Using FindAllMarkers for case: {casename}...")
-    args <- case$rest
-    args$assay <- case$assay
-    args$group.by <- case$group.by
-    # args$logfc.threshold <- args$logfc.threshold %||% 0
-    # args$min.cells.group <- args$min.cells.group %||% 1
-    # args$min.cells.feature <- args$min.cells.feature %||% 1
-    # args$min.pct <- args$min.pct %||% 0
-    allow_empty = startsWith(case$group.by, "..")
-    if (!is.null(case$subset)) {
-        args$object <- ensure_sobj({
-            srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
-        }, allow_empty)
-        if (is.null(args$object)) { return() }
-    } else {
-        args$object <- ensure_sobj({
-            srtobj %>% filter(!is.na(!!sym(case$group.by)))
-        }, allow_empty)
-        if (is.null(args$object)) { return() }
-    }
-    Idents(args$object) <- case$group.by
-    cached <- get_cached(args, "FindAllMarkers", cache)
-    if (!is.null(cached$data)) {
-        log_info("  Using cached markers ...")
-        markers <- cached$data
-    } else {
-        markers <- find_markers(args, find_all = TRUE)
-        cached$data <- markers
-        save_to_cache(cached, "FindAllMarkers", cache)
-    }
-    if (is.null(case$dotplot$assay)) {
-        case$dotplot$assay <- case$assay
-    }
-    if (nrow(markers) == 0) {
-        idents <- unique(Idents(args$object))
-    } else {
-        idents <- unique(markers$cluster)
-    }
-    for (ident in idents) {
-        log_debug("  * Dealing with ident: {ident}...")
-        if (case$prefix_group) {
-            key <- paste0(info$section, "::", case$group.by, " - ", ident)
-        } else {
-            key <- paste0(info$section, "::", ident)
-        }
-        info_ident <- casename_info(key, cases, outdir, create = TRUE)
-        if (nrow(markers) > 0) {
-            markers_ident <- markers %>% filter(cluster == ident)
-        } else {
-            markers_ident <- markers
-        }
-        siggenes <- do_enrich(info_ident, markers_ident, case$sigmarkers, case$volcano_genes)
+        # Visualize enriched terms
+        if (length(case$enrich_plots) > 0) {
+            for (db in case$dbs) {
+                plots <- list()
+                for (plotname in names(case$enrich_plots)) {
+                    plotargs <- case$enrich_plots[[plotname]]
+                    plotargs$enrich <- enrich[enrich$db == db, , drop = FALSE]
+                    plotargs$outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
-        if (length(siggenes) > 0) {
-            args$ident.1 <- as.character(ident)
-            do_dotplot(info_ident, siggenes, case$dotplot, args)
-        }
-        add_case_report(info_ident, case$sigmarkers, siggenes)
-    }
-}
-find_markers <- function(findmarkers_args, find_all = FALSE) {
-    if (find_all) {
-        fun <- FindAllMarkers
-        empty <- data.frame(
-            gene = character(),
-            p_val = numeric(),
-            avg_log2FC = numeric(),
-            pct.1 = numeric(),
-            pct.2 = numeric(),
-            p_val_adj = numeric(),
-            cluster = character()
-        )
-    } else {
-        fun <- FindMarkers
-        empty <- data.frame(
-            gene = character(),
-            p_val = numeric(),
-            avg_log2FC = numeric(),
-            pct.1 = numeric(),
-            pct.2 = numeric(),
-            p_val_adj = numeric()
-        )
-    }
+                    do_call(VizEnrich, plotargs)
-    call_findmarkers <- function(fn, args) {
-        if (find_all) {
-            do_call(fn, args)
-        } else {
-            do_call(fn, args) %>% rownames_to_column("gene")
+                    plots[[length(plots) + 1]] <- reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code)
+                }
+                reporter$add2(
+                    list(name = db, contents = plots),
+                    hs = c(info$section, info$name),
+                    hs2 = "Enrichment Analysis",
+                    ui = "tabs"
+                )
+            }
         }
-    }
-    markers <- tryCatch({
-        call_findmarkers(fun, findmarkers_args)
     }, error = function(e) {
-        if (!grepl("PrepSCTFindMarkers", e$message) && defassay == "SCT") {
-            log_warn(paste0("  ! ", e$message))
+        if (case$error) {
+            stop("Error: ", e$message)
+        } else {
+            log$warn("  ! Error: {e$message}")
+            reporter$add2(
+                list(
+                    name = "Warning",
+                    contents = list(list(kind = "error", content = e$message, kind_ = "warning"))),
+                hs = c(info$section, info$name),
+                hs2 = "Enrichment Analysis",
+                ui = "tabs"
+            )
         }
-        empty
     })
-    if (nrow(markers) == 0 && defassay == "SCT") {
-        log_warn("  ! No markers found from SCT assay, trying recorrect_umi = FALSE")
-        findmarkers_args$recorrect_umi <- FALSE
-        markers <- tryCatch({
-            call_findmarkers(fun, findmarkers_args)
-        }, error = function(e) {
-            log_warn(paste0("  ! ", e$message))
-            empty
-        })
-    }
-    markers
 }
-sections <- c()
-do_case <- function(casename) {
-    if (isTRUE(cases[[casename]]$findall)) {
-        log_info("- Dealing with case: {casename} (all idents) ...")
-        do_case_findall(casename)
-        return()
-    }
-    log_info("- Dealing with case: {casename} ...")
-    info <- casename_info(casename, cases, outdir, create = TRUE)
-    case <- cases[[casename]]
-    # ident1
-    # ident2
-    # groupby
-    # each  # expanded
-    # prefix_each
-    # dbs
-    # sigmarkers
-    # rest
-    args <- case$rest
-    allow_empty = startsWith(case$group.by, "..")
-    if (!is.null(case$subset)) {
-        args$object <- ensure_sobj({
-            srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
-        }, allow_empty)
-        if (is.null(args$object)) { return() }
-    } else {
-        args$object <- ensure_sobj({
-            srtobj %>% filter(!is.na(!!sym(case$group.by)))
-        }, allow_empty)
-        if (is.null(args$object)) { return() }
-    }
+run_case <- function(name) {
+    case <- cases[[name]]
+    log$info("- Case: {name} ...")
-    args$assay <- case$assay
+    args <- case$rest %||% list()
+    args$object <- srtobj
     args$group.by <- case$group.by
     args$ident.1 <- case$ident.1
     args$ident.2 <- case$ident.2
-    if (is.null(args$ident.2)) {
-        args$ident.2 <- ".rest"
-        args$object <- args$object %>% mutate(
-            !!sym(args$group.by) := if_else(
-                !!sym(args$group.by) == args$ident.1,
-                args$ident.1,
-                args$ident.2
-            )
-        )
-    } else {
-        args$object <- args$object %>%
-            filter(!!sym(args$group.by) %in% c(args$ident.1, args$ident.2))
-    }
-    # args$logfc.threshold <- args$logfc.threshold %||% 0
-    # args$min.cells.group <- args$min.cells.group %||% 1
-    # args$min.cells.feature <- args$min.cells.feature %||% 1
-    # args$min.pct <- args$min.pct %||% 0
-    markers <- find_markers(args)
-    siggenes <- do_enrich(info, markers, case$sigmarkers, case$volcano_genes)
+    args$cache <- case$cache
+    args$assay <- case$assay
+    args$error <- case$error
+    args$subset <- case$subset
-    if (length(siggenes) > 0) {
-        case$dotplot$assay <- case$dotplot$assay %||% args$assay
-        do_dotplot(info, siggenes, case$dotplot, args)
+    markers <- do_call(RunSeuratDEAnalysis, args)
+    if (isTRUE(case_markers[[name]])) {
+        case_markers[[name]] <<- markers
     }
+    if (is.null(case$ident.1)) {
+        if (!is.null(case_markers[[name]])) {
+            stop("Case '", name, "' for overlapping analysis must have 'ident.1' defined")
+        }
+        all_idents <- unique(markers[[case$group.by]])
+        # Visualize all markers
+        if (length(case$allmarker_plots) > 0) {
+            log$info("  Visualizing all markers ...")
+            casename <- paste0(name, "::", ifelse(case$prefix_group, paste0(case$group.by, " - All Markers"), "All Markers"))
+            info <- case_info(casename, outdir, create = TRUE)
+            for (plotname in names(case$allmarker_plots)) {
+                plotargs <- case$allmarker_plots[[plotname]]
+                plotargs$degs <- markers
+                plotargs$outprefix <- file.path(info$prefix, slugify(plotname))
+                do_call(VizDEGs, plotargs)
+                reporter$add2(
+                    list(
+                        name = plotname,
+                        contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))
+                    ),
+                    hs = c(info$section, info$name),
+                    ui = "tabs"
+                )
+            }
+        }
+        for (ident in all_idents) {
+            log$info("  {case$group.by}: {ident} ...")
+            ident_markers <- markers[markers[[case$group.by]] == ident, , drop = TRUE]
+            casename <- paste0(name, "::", ifelse(case$prefix_group, paste0(case$group.by, " - ", ident), ident))
+            info <- case_info(casename, outdir, create = TRUE)
-    sections <<- union(sections, info$section)
-    if (info$section %in% overlapping_sections) {
-        overlaps[[info$section]] <<- overlaps[[info$section]] %||% list()
-        overlaps[[info$section]][[info$case]] <<- siggenes %||% character()
+            process_markers(ident_markers, info = info, case = case)
+        }
+    } else {
+        info <- case_info(name, outdir, create = TRUE)
+        process_markers(markers, info = info, case = case)
     }
-    add_case_report(info, case$sigmarkers, siggenes)
 }
-do_overlap <- function(section) {
-    log_info("- Dealing with overlapping: {section}...")
-    ov_args <- overlapping[[section]]
-    ov_dir <- file.path(outdir, "OVERLAPPING", section)
-    dir.create(ov_dir, showWarnings = FALSE, recursive = TRUE)
-    ov_cases <- overlaps[[section]]
-    if (length(ov_cases) < 2) {
-        stop(sprintf("  Not enough cases for overlap: %s", section))
-    }
+sapply(names(cases), run_case)
-    if (is.list(ov_args$venn) && length(ov_cases) > 4) {
-        stop(paste0("  Too many cases (", length(ov_cases)," > 4) for venn plot for section: ", section))
-    }
-    if (is.list(ov_args$venn)) {
-        venn_plot <- file.path(ov_dir, "venn.png")
-        venn_p <- ggVennDiagram(ov_cases, label_percent_digit = 1) +
-            scale_fill_distiller(palette = "Reds", direction = 1) +
-            scale_x_continuous(expand = expansion(mult = .2))
-        ov_args$venn$devpars$file <- venn_plot
-        do.call(png, ov_args$venn$devpars)
-        print(venn_p)
-        dev.off()
-    }
+if (length(overlaps) > 0) {
+    log$info("Running overlapping cases ...")
-    df_markers <- fromList(ov_cases)
-    #  A  B  MARKERS
-    #  1  0  G1
-    #  1  0  G2
-    #  0  1  G3
-    #  0  1  G4
-    #  1  1  G5
-    df_markers$MARKERS = Reduce(union, ov_cases)
-    df_markers = df_markers %>%
-        group_by(across(-MARKERS)) %>%
-        summarise(MARKERS = paste0(MARKERS, collapse = ","), .groups = "drop")
-    write.table(
-        df_markers,
-        file.path(ov_dir, "markers.txt"),
-        sep = "\t",
-        row.names = FALSE,
-        col.names = TRUE,
-        quote = FALSE
-    )
+    run_overlap <- function(ovname) {
+        ov <- overlaps[[ovname]]
+        ov$sigmarkers <- ov$sigmarkers %||% sigmarkers
+        log$info("- Overlapping case: {ovname} ...")
+        markers <- lapply(ov$cases, function(case) {
+            case_markers[[case]] %>% filter(!!parse_expr(ov$sigmarkers)) %>%
+                pull("gene") %>% unique()
+        })
+        names(markers) <- ov$cases
+        info <- case_info(paste0("OVERLAPPING::", ovname), outdir, create = TRUE)
+        if (ov$venn$enabled) {
+            venn <- extract_vars(ov$venn, "enabled", "more_formats", "save_code", "devpars")
+            venn$data <- markers
+            venn$in_form <- "list"
+            prefix <- file.path(info$prefix, "venn")
+            p <- do_call(gglogger::register(VennDiagram), venn)
+            save_plot(p, prefix, devpars, formats = c("png", more_formats))
+            if (save_code) {
+                save_plotcode(
+                    p, prefix,
+                    c("library(plotthis)", "load('data.RData')", "invisible(list2env(venn, .GlobalEnv))"),
+                    "venn",
+                    auto_data_setup = FALSE)
+            }
-    if (is.list(ov_args$upset)) {
-        upset_plot <- file.path(ov_dir, "upset.png")
-        if (nrow(df_markers) == 0) {
-            upset_p <- ggplot() +
-                theme_void() +
-                ggtitle("No overlapping markers found") +
-                # center the title, and make it red
-                theme(plot.title = element_text(hjust = 0.5, color = "red"))
-            ov_args$upset$devpars <- list(
-                res = 100, height = 42, width = 400
+            reporter$add2(
+                list(
+                    name = "Venn Diagram",
+                    contents = list(reporter$image(prefix, more_formats, save_code))
+                ),
+                hs = c(info$section, info$name),
+                ui = "tabs"
             )
-        } else {
-            upset_p <- upset(fromList(ov_cases))
         }
-        ov_args$upset$devpars$file <- upset_plot
-        do.call(png, ov_args$upset$devpars)
-        print(upset_p)
-        dev.off()
-    }
-    add_report(
-        list(
-            title = "Venn Diagram",
-            ui = "flat",
-            contents = list(
-                list(
-                    kind = "img",
-                    src = file.path(ov_dir, "venn.png")
-                )
-            )
-        ),
-        list(
-            title = "UpSet Plot",
-            ui = "flat",
-            contents = list(
-                list(
-                    kind = "img",
-                    src = file.path(ov_dir, "upset.png")
-                )
-            )
-        ),
-        list(
-            title = "Marker Table",
-            ui = "flat",
-            contents = list(
+        if (ov$upset$enabled) {
+            upset <- extract_vars(ov$upset, "enabled", "more_formats", "save_code", "devpars")
+            upset$data <- markers
+            upset$in_form <- "list"
+            prefix <- file.path(info$prefix, "upset")
+            p <- do_call(gglogger::register(UpsetPlot), upset)
+            save_plot(p, prefix, devpars, formats = c("png", more_formats))
+            if (save_code) {
+                save_plotcode(
+                    p, prefix,
+                    c("library(plotthis)", "load('data.RData')", "invisible(list2env(upset, .GlobalEnv))"),
+                    "upset",
+                    auto_data_setup = FALSE)
+            }
+            reporter$add2(
                 list(
-                    kind = "table",
-                    data = list(nrows = 100),
-                    src = file.path(ov_dir, "markers.txt")
-                )
+                    name = "UpSet Plot",
+                    contents = list(reporter$image(prefix, more_formats, save_code))
+                ),
+                hs = c(info$section, info$name),
+                ui = "tabs"
             )
-        ),
-        h1 = "Overlapping Markers",
-        h2 = section,
-        ui = "tabs"
-    )
-}
+        }
-sapply(sort(names(cases)), do_case)
+    }
-unhit_overlaps <- setdiff(overlapping_sections, names(overlaps))
-if (length(unhit_overlaps) > 0) {
-    log_warn(paste0("- No sections found for overlapping analysis: ", paste(unhit_overlaps, collapse = ", ")))
-    log_warn("  Available sections: ", paste(sections, collapse = ", "))
+    sapply(names(overlaps), run_overlap)
 }
-sapply(sort(names(overlaps)), do_overlap)
-save_report(joboutdir)
+reporter$save(joboutdir)

biopipen 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.32.1py3-none-any.whl → 0.33.0py3-none-any.whl