PyPI - biopipen - Versions diffs - 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl - Mend

biopipen 0.33.1py3-none-any.whl → 0.34.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +10 -183
biopipen/core/proc.py +5 -3
biopipen/core/testing.py +8 -1
biopipen/ns/bam.py +40 -4
biopipen/ns/cnv.py +1 -1
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/delim.py +1 -1
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +38 -0
biopipen/ns/plot.py +8 -0
biopipen/ns/scrna.py +290 -288
biopipen/ns/scrna_metabolic_landscape.py +207 -366
biopipen/ns/tcr.py +165 -97
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
biopipen/reports/snp/PlinkCallRate.svelte +2 -2
biopipen/reports/snp/PlinkFreq.svelte +1 -1
biopipen/reports/snp/PlinkHWE.svelte +1 -1
biopipen/reports/snp/PlinkHet.svelte +1 -1
biopipen/reports/snp/PlinkIBD.svelte +1 -1
biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
biopipen/scripts/bam/CNAClinic.R +41 -6
biopipen/scripts/bam/CNVpytor.py +2 -1
biopipen/scripts/bam/ControlFREEC.py +2 -3
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/cnv/AneuploidyScore.R +25 -13
biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
biopipen/scripts/cnv/TMADScore.R +4 -4
biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +4 -1
biopipen/scripts/gene/GeneNameConversion.R +14 -12
biopipen/scripts/gsea/Enrichr.R +2 -2
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/PreRank.R +3 -3
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/plot/VennDiagram.R +2 -2
biopipen/scripts/protein/ProdigySummary.R +34 -27
biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
biopipen/scripts/regulatory/motifs-common.R +10 -9
biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
biopipen/scripts/rnaseq/Simulation.R +0 -2
biopipen/scripts/rnaseq/UnitConversion.R +6 -5
biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
biopipen/scripts/scrna/CellCellCommunication.py +1 -1
biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
biopipen/scripts/scrna/CellsDistribution.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
biopipen/scripts/scrna/MarkersFinder.R +348 -217
biopipen/scripts/scrna/MetaMarkers.R +3 -3
biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
biopipen/scripts/scrna/RadarPlots.R +1 -1
biopipen/scripts/scrna/ScFGSEA.R +157 -75
biopipen/scripts/scrna/ScSimulation.R +11 -10
biopipen/scripts/scrna/ScVelo.py +605 -0
biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
biopipen/scripts/scrna/SeuratClustering.R +31 -48
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
biopipen/scripts/scrna/SeuratPreparing.R +76 -24
biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
biopipen/scripts/snp/MatrixEQTL.R +39 -20
biopipen/scripts/snp/PlinkCallRate.R +43 -34
biopipen/scripts/snp/PlinkFreq.R +34 -41
biopipen/scripts/snp/PlinkHWE.R +23 -18
biopipen/scripts/snp/PlinkHet.R +26 -22
biopipen/scripts/snp/PlinkIBD.R +30 -34
biopipen/scripts/stats/ChowTest.R +9 -8
biopipen/scripts/stats/DiffCoexpr.R +13 -11
biopipen/scripts/stats/LiquidAssoc.R +7 -8
biopipen/scripts/stats/Mediation.R +8 -8
biopipen/scripts/stats/MetaPvalue.R +11 -13
biopipen/scripts/stats/MetaPvalue1.R +6 -5
biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
biopipen/scripts/tcr/ClonalStats.R +5 -4
biopipen/scripts/tcr/CloneResidency.R +3 -3
biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
biopipen/scripts/tcr/ScRepLoading.R +114 -92
biopipen/scripts/tcr/TCRClusterStats.R +2 -2
biopipen/scripts/tcr/TCRClustering.R +86 -97
biopipen/scripts/tcr/TESSA.R +65 -115
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
biopipen/utils/common_docstrs.py +66 -63
biopipen/utils/reporter.py +177 -0
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
biopipen/utils/caching.R +0 -44
biopipen/utils/gene.R +0 -95
biopipen/utils/gsea.R +0 -329
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -602
biopipen/utils/mutate_helpers.R +0 -581
biopipen/utils/plot.R +0 -209
biopipen/utils/repr.R +0 -146
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -207
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0

biopipen/scripts/scrna/TopExpressingGenes.R CHANGED Viewed

@@ -1,13 +1,8 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
 library(Seurat)
-library(tibble)
-library(enrichR)
 library(rlang)
 library(dplyr)
-library(ggprism)
-setEnrichrSite("Enrichr")
+library(tidyselect)
+library(biopipen.utils)
 srtfile <- {{in.srtobj | r}}
 outdir <- {{out.outdir | r}}
@@ -16,238 +11,200 @@ mutaters <- {{ envs.mutaters | r }}
 ident <- {{ envs.ident | r }}
 group.by <- {{ envs["group-by"] | r }}  # nolint
 each <- {{ envs.each | r }}
-prefix_each <- {{ envs.prefix_each | r }}
-section <- {{ envs.section | r }}
 dbs <- {{ envs.dbs | r }}
 n <- {{ envs.n | r }}
+enrich_style <- {{ envs.enrich_style | r }}
 sset <- {{ envs.subset | r }}
+enrich_plots_defaults <- {{ envs.enrich_plots_defaults | r }}
+enrich_plots <- {{ envs.enrich_plots | r }}
 cases <- {{ envs.cases | r: todot = "-" }}  # nolint
 set.seed(8525)
+log <- get_logger()
+reporter <- get_reporter()
-log_info("- Loading Seurat object ...")
-srtobj <- readRDS(srtfile)
+log$info("Reading Seurat object ...")
+srtobj <- read_obj(srtfile)
+if (!"Identity" %in% colnames(srtobj@meta.data)) {
+    srtobj@meta.data$Identity <- Idents(srtobj)
+}
 assay <- DefaultAssay(srtobj)
-log_info("- Mutate meta data if needed ...")
-if (!is.null(mutaters) && length(mutaters)) {
+if (!is.null(mutaters) && length(mutaters) > 0) {
+    log$info("Mutating meta data ...")
     srtobj@meta.data <- srtobj@meta.data %>%
         mutate(!!!lapply(mutaters, parse_expr))
 }
+enrich_plots <- lapply(enrich_plots, function(x) {
+    list_update(enrich_plots_defaults, x)
+})
 defaults <- list(
     ident = ident,
     group.by = group.by,
     each = each,
-    prefix_each = prefix_each,
-    section = section,
     dbs = dbs,
     n = n,
+    enrich_style = enrich_style,
+    enrich_plots = enrich_plots,
+    enrich_plots_defaults = enrich_plots_defaults,
     subset = sset
 )
-expand_each <- function(name, case) {
+cases <- expand_cases(cases, defaults, default_case = "Top Expressing Genes", post = function(name, case) {
     outcases <- list()
-    no_each <- is.null(case$each) || nchar(case$each) == 0
-    no_ident <- is.null(case$ident)
-    has_section <- !is.null(case$section) && case$section != "DEFAULT"
-    if (no_each && !no_ident) {
-        # single cases
-        if (is.null(case$section) || case$section == "DEFAULT") {
-            outcases[[name]] <- case
-        } else {
-            outcases[[paste0(case$section, "::", name)]] <- case
-        }
-    } else if (no_each) {  # no_ident
-        # expanding idents
-        if (has_section) {
-            log_warn("  Ignoring `section` in case `{name}` when no `ident` is set.")
-            case$section <- NULL
-        }
-        if (!is.null(case$subset)) {
-            idents <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
-                pull(case$group.by) %>% unique() %>% na.omit() %>% as.vector()
-        } else {
-            idents <- srtobj@meta.data %>%
-                pull(case$group.by) %>% unique() %>% na.omit() %>% as.vector()
-        }
+    if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
+        case$enrich_plots <- lapply(
+            case$enrich_plots,
+            function(x) { list_update(case$enrich_plots_defaults, x) }
+        )
+        case$enrich_plots_defaults <- NULL
-        for (ident in idents) {
-            key <- paste0(name, "::", ident)
-            outcases[[key]] <- case
-            outcases[[key]]$ident <- ident
-            outcases[[key]]$section <- name
-        }
-    } else {  # has_each
-        if (no_ident) {
-            stop("  `ident` must be set when `each` is set for case `{name}`.")
-        }
-        # expanding eachs
-        if (has_section) {
-            log_warn("  Ignoring `section` in case `{name}` when `each` is set.")
-            case$section <- NULL
+        outcases[[name]] <- case
+    } else {
+        eachs <- if (!is.null(case$subset)) {
+            srtobj@meta.data %>%
+                filter(!!parse_expr(case$subset)) %>%
+                pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
+        } else {
+            srtobj@meta.data %>%
+                pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
         }
-        if (!is.null(case$subset)) {
-            eachs <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
-                pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
-        } else {
-            eachs <- srtobj@meta.data %>%
-                pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
+        if (length(cases) == 0 && name == "Top Expressing Genes") {
+            name <- case$each
         }
         for (each in eachs) {
-            by <- make.names(paste0(".", name, "_", case$each,"_", each))
-            srtobj@meta.data <<- srtobj@meta.data %>% mutate(
-                !!sym(by) := if_else(
-                    !!sym(case$each) == each,
-                    !!sym(case$group.by),
-                    NA
-                )
-            )
+            newname <- paste0(name, " - ", each)
+            newcase <- case
+            newcase$each_name <- case$each
+            newcase$each <- each
-            if (isTRUE(case$prefix_each)) {
-                key <- paste0(name, "::", case$each, " - ", each)
+            if (!is.null(case$subset)) {
+                newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
             } else {
-                key <- paste0(name, "::", each)
+                newcase$subset <- paste0(bQuote(case$each), " == '", each, "'")
             }
-            outcases[[key]] <- case
-            outcases[[key]]$section <- name
-            outcases[[key]]$group.by <- by
+            newcase$enrich_plots <- lapply(
+                case$enrich_plots,
+                function(x) { list_update(case$enrich_plots_defaults, x) }
+            )
+            newcase$enrich_plots_defaults <- NULL
+            outcases[[newname]] <- newcase
         }
     }
-    outcases
-}
-log_info("- Expanding cases ...")
-cases <- expand_cases(cases, defaults, expand_each)
-do_enrich <- function(expr, odir) {
-    log_debug("  Saving expressions ...")
-    expr <- expr %>% as.data.frame()
-    colnames(expr) <- c("Expression")
-    expr <- expr %>% rownames_to_column("Gene") %>% select(Gene, Expression)
-    write.table(
-        expr,
-        file.path(odir, "expr.txt"),
-        sep = "\t",
-        row.names = TRUE,
-        col.names = TRUE,
-        quote = FALSE
+    outcases
+})
+log$info("Running cases ...")
+process_markers <- function(markers, info, case) {
+    # Save markers
+    write.table(markers, file.path(info$prefix, "top_genes.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
+    reporter$add2(
+        list(
+            name = "Table",
+            contents = list(
+                list(kind = "descr", content = "Showing top expressing genes ordered by their expression descendingly."),
+                list(kind = "table", src = file.path(info$prefix, "top_genes.tsv"), data = list(nrows = 100))
+            )
+        ),
+        hs = c(info$section, info$name),
+        hs2 = paste0("Top Genes"),
+        ui = "tabs"
     )
-    write.table(
-        expr %>% head(n),
-        file.path(odir, "exprn.txt"),
-        sep = "\t",
-        row.names = TRUE,
-        col.names = TRUE,
-        quote = FALSE
+    enrich <- RunEnrichment(
+        markers$gene,
+        dbs = case$dbs, style = case$enrich_style)
+    write.table(enrich, file.path(info$prefix, "enrich.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
+    reporter$add2(
+        list(
+            name = "Table",
+            contents = list(list(kind = "table", src = file.path(info$prefix, "enrich.tsv"), data = list(nrows = 100)))
+        ),
+        hs = c(info$section, info$name),
+        hs2 = "Enrichment Analysis",
+        ui = "tabs"
     )
-    log_debug("  Running enrichment ...")
-    enriched <- enrichr(head(expr$Gene, n), dbs)  # nolint
-    for (db in dbs) {
-        write.table(
-            enriched[[db]],
-            file.path(odir, paste0("Enrichr-", db, ".txt")),
-            sep = "\t",
-            row.names = FALSE,
-            col.names = TRUE,
-            quote = FALSE
-        )
+    # Visualize enriched terms
+    if (length(case$enrich_plots) > 0) {
+        for (db in case$dbs) {
+            plots <- list()
+            for (plotname in names(case$enrich_plots)) {
+                plotargs <- case$enrich_plots[[plotname]]
+                plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
-        if (nrow(enriched[[db]]) == 0) {
-            log_warn(paste0("  No enriched terms for ", db))
-            next
-        }
+                p <- do_call(VizEnrichment, plotargs)
-        enrich_p <- plotEnrich(enriched[[db]], showTerms = 20, title = db) +
-            theme_prism()
-        enrich_plot <- file.path(odir, paste0("Enrichr-", db, ".png"))
-        png(enrich_plot, res = 100, height = 1000, width = 1000)
-        print(enrich_p)
-        dev.off()
-        enrich_plot_pdf <- file.path(odir, paste0("Enrichr-", db, ".pdf"))
-        pdf(enrich_plot_pdf, height = 10, width = 10)
-        print(enrich_p)
-        dev.off()
+                outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
+                attr(p, "height") <- attr(p, "height") / 1.5
+                save_plot(p, outprefix, plotargs$devpars, formats = "png")
+                plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
+            }
+            reporter$add2(
+                list(name = db, contents = plots),
+                hs = c(info$section, info$name),
+                hs2 = "Enrichment Analysis",
+                ui = "tabs"
+            )
+        }
     }
 }
-do_case <- function(casename) {
-    log_info("- Running for case: {casename} ...")
-    case <- cases[[casename]]
-    info <- casename_info(casename, cases, outdir, create = TRUE)
-    log_debug("  Calculating average expression ...")
+run_case <- function(name) {
+    log$info("Case: {name} ...")
+    case <- cases[[name]]
+    log$info("- Subsetting cells and calculating average expression ...")
     if (!is.null(case$subset)) {
-        tryCatch({
-            sobj <- subset(srtobj, !!parse_expr(case$subset))
-        }, error = function(e) {
-            log_warn("  No cells found for the subset, skipping ...")
-        })
+        subobj <- filter(srtobj, !!parse_expr(case$subset))
     } else {
-        sobj <- srtobj
+        subobj <- srtobj
+    }
+    case$group.by <- case$group.by %||% "Identity"
+    if (is.null(case$ident)) {
+        case$ident <- as.character(unique(subobj@meta.data[[case$group.by]]))
     }
     avgexpr <- AverageExpression(
-        sobj,
+        subobj,
         group.by = case$group.by,
         assays = assay
     )[[assay]]
     # https://github.com/satijalab/seurat/issues/7893
-    colnames(avgexpr) <- as.character(unique(sobj@meta.data[[case$group.by]]))
+    colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group.by]]))
     avgexpr <- avgexpr[, case$ident, drop = FALSE]
-    avgexpr <- avgexpr[order(-avgexpr), , drop = FALSE]
-    do_enrich(avgexpr, info$casedir)
+    for (idt in case$ident) {
+        log$info("- Processing {idt} ...")
+        info <- case_info(paste0(name, "::", idt), outdir, create = TRUE)
+        expr <- avgexpr[, idt, drop = FALSE]
+        expr <- expr[order(expr, decreasing = TRUE), , drop = FALSE]
+        expr <- expr[1:min(case$n, nrow(expr)), , drop = FALSE]
+        expr <- as.data.frame(expr)
+        expr$gene <- rownames(expr)
+        colnames(expr) <- c("avg_expr", "gene")
+        expr <- expr[, c("gene", "avg_expr"), drop = FALSE]
+        log$info("  Performing enrichment analysis ...")
+        process_markers(expr, info, case = list(
+            ident = idt,
+            dbs = case$dbs,
+            enrich_style = case$enrich_style,
+            enrich_plots = case$enrich_plots
+        ))
+    }
-    add_case_report(info)
+    invisible()
 }
-add_case_report <- function(info) {
-    log_debug("  Adding case report ...")
-    h1 = info$h1
-    h2 = info$h2
-    if (!is.null(info$error)) {
-        add_report(
-            list(
-                kind = "descr",
-                content = paste0("Top ", n, " expressing genes")
-            ),
-            list(kind = "error", content = info$error),
-            h1 = h1,
-            h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
-            h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
-        )
-    } else {
-        add_report(
-            list(
-                kind = "descr",
-                content = paste0("Top ", n, " expressing genes")
-            ),
-            list(
-                kind = "table",
-                src = file.path(info$casedir, "exprn.txt")
-            ),
-            h1 = h1,
-            h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
-            h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
-        )
-        add_report(
-            list(
-                kind = "descr",
-                content = paste0("Enrichment analysis for the top ", n, " expressing genes")
-            ),
-            list(kind = "enrichr", dir = info$casedir),
-            h1 = h1,
-            h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
-            h3 = ifelse(h2 == "#", "#", "Enrichment Analysis")
-        )
-    }
-}
+sapply(names(cases), run_case)
-sapply(sort(names(cases)), do_case)
-save_report(joboutdir)
+reporter$save(joboutdir)

biopipen/scripts/scrna/celltypist-wrapper.py CHANGED Viewed

@@ -12,8 +12,9 @@ parser.add_argument(
 parser.add_argument(
     "-c",
     "--over_clustering",
-    default="seurat_clusters",
-    help="Over clustering. Ignored if the column does not exist.",
+    required=False,
+    default=None,
+    help="Over clustering. Error if the column does not exist.",
 )
@@ -25,8 +26,9 @@ if __name__ == "__main__":
     adata = sc.read_h5ad(args.input)
     over_clustering = args.over_clustering
     if over_clustering and over_clustering not in adata.obs.columns:
-        print("WARNING: Over clustering column not found. Ignoring over clustering.")
-        over_clustering = None
+        raise ValueError(
+            f"Over clustering column '{over_clustering}' not found in AnnData object."
+        )
     annotated = celltypist.annotate(
         adata,

biopipen/scripts/scrna/seurat_anndata_conversion.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Convert Seurat objects to AnnData format back and forth.
+Need R and R packages Seurat, SeuratDisk and biopipen.utils.R installed.
+"""
+def convert_seurat_to_anndata(
+    input_file,
+    output_file,
+    assay=None,
+    subset=None,
+    rscript="Rscript",
+):
+    """Convert Seurat object to AnnData format.
+    Args:
+        input_file (str): Path to the input Seurat RDS or qs/qs2 file.
+        output_file (str): Path to the output AnnData H5AD file.
+        assay (str): Name of the assay to use in the Seurat object.
+        subset (str): An R expression to subset the Seurat object to convert.
+        rscript (RScript): R script executor.
+    """
+    from biopipen.utils.misc import run_command
+    script = f"""
+        library(biopipen.utils)
+        assay <- {repr(assay) if assay else 'NULL'}
+        subset <- {repr(subset) if subset else 'NULL'}
+        ConvertSeuratToAnnData(
+            "{input_file}", "{output_file}", assay = assay, subset = subset
+        )
+    """
+    # Save the script to a temporary file
+    from tempfile import NamedTemporaryFile
+    with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
+        temp_script.write(script.encode('utf-8'))
+        temp_script_path = temp_script.name
+    # Run the R script using the provided Rscript command
+    cmd = [rscript, temp_script_path]
+    run_command(cmd, fg=True)
+def convert_anndata_to_seurat(
+    input_file,
+    output_file,
+    assay=None,
+    rscript="Rscript",
+):
+    """Convert AnnData object to Seurat format.
+    Args:
+        input_file (str): Path to the input AnnData H5AD file.
+        output_file (str): Path to the output Seurat RDS or qs/qs2 file.
+        assay (str): Name of the assay to use in the Seurat object.
+        rscript (RScript): R script executor.
+    """
+    from biopipen.utils.misc import run_command
+    script = f"""
+        library(biopipen.utils)
+        assay <- {repr(assay) if assay else 'NULL'}
+        ConvertAnnDataToSeurat(
+            "{input_file}", "{output_file}", assay = assay
+        )
+    """
+    # Save the script to a temporary file
+    from tempfile import NamedTemporaryFile
+    with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
+        temp_script.write(script.encode('utf-8'))
+        temp_script_path = temp_script.name
+    # Run the R script using the provided Rscript command
+    cmd = [rscript, temp_script_path]
+    run_command(cmd, fg=True)

biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.33.1py3-none-any.whl → 0.34.0py3-none-any.whl