PyPI - biopipen - Versions diffs - 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl - Mend

biopipen 0.21.2py3-none-any.whl → 0.22.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (62) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +142 -0
biopipen/ns/scrna.py +19 -1
biopipen/ns/tcr.py +30 -10
biopipen/reports/delim/SampleInfo.svelte +2 -22
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna/ScFGSEA.svelte +4 -23
biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -168
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/scripts/delim/SampleInfo.R +41 -7
biopipen/scripts/scrna/CellsDistribution.R +127 -16
biopipen/scripts/scrna/MarkersFinder.R +245 -100
biopipen/scripts/scrna/MetaMarkers.R +163 -82
biopipen/scripts/scrna/RadarPlots.R +163 -110
biopipen/scripts/scrna/ScFGSEA.R +51 -11
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
biopipen/scripts/scrna/SeuratClustering.R +73 -26
biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
biopipen/scripts/scrna/SeuratPreparing.R +93 -19
biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
biopipen/scripts/tcr/Attach2Seurat.R +2 -1
biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
biopipen/scripts/tcr/CloneResidency.R +114 -34
biopipen/scripts/tcr/Immunarch-basic.R +18 -4
biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
biopipen/scripts/tcr/Immunarch.R +7 -0
biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
biopipen/scripts/tcr/TCRClusterStats.R +124 -11
biopipen/scripts/tcr/TCRClustering.R +8 -9
biopipen/scripts/tcr/TESSA.R +66 -41
biopipen/utils/misc.R +96 -1
{biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
{biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
{biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
{biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0

biopipen/scripts/scrna/SeuratClusterStats-features.R CHANGED Viewed

@@ -5,20 +5,6 @@ features = {{envs.features | r: todot="-", skip=1}}
 odir = file.path(outdir, "features")
 dir.create(odir, recursive=TRUE, showWarnings=FALSE)
-report_toc_file = file.path(odir, "report_toc.json")
-# Section => list(
-#   list(name?, kind, file),
-#   ...
-# )
-report_toc = list()
-.add_toc = function(section, toc) {
-    if (section %in% names(report_toc)) {
-        report_toc[[section]][[length(report_toc[[section]]) + 1]] <<- toc
-    } else {
-        report_toc[[section]] <<- list(toc)
-    }
-}
 .get_features = function(features) {
     if (is.null(features)) { features = 20 }
@@ -50,11 +36,9 @@ report_toc = list()
 }
 do_one_features = function(name) {
-    print(paste0("Doing features for: ", name))
+    log_info("Doing features for: {name}")
     case = list_update(features_defaults, features[[name]])
-    toc = list()
-    if (!is.null(case$section)) { toc$name = name }
     case$devpars = list_update(features_defaults$devpars, features[[name]]$devpars)
     excluded_args = c(
         "section",
@@ -65,30 +49,40 @@ do_one_features = function(name) {
         "kind"
     )
+    if (is.character(case$subset)) {
+        case$object = srtobj %>% filter(!!rlang::parse_expr(case$subset))
+    } else {
+        case$object = srtobj
+    }
+    if (!is.null(case$ident)) {
+        Idents(case$object) = case$ident
+    }
+    n_uidents = length(unique(Idents(case$object)))
     fn = NULL
     default_devpars = NULL
     if ("ridge" %in% case$kind) {
         case$kind = "ridge"
         if (is.null(case$cols)) {
-            case$cols = pal_ucscgb(alpha = .8)(26)
+            case$cols = pal_biopipen()(32)
         }
         excluded_args = c(excluded_args, "split.by")
         fn = RidgePlot
-        default_devpars = function(features, ncol, uidents) {
+        default_devpars = function(features, ncol) {
             if (is.null(ncol)) { ncol = 1 }
             list(
                 width = 400 * ncol,
-                height = ceiling(length(features) / ncol) * ifelse(length(uidents) < 10, 300, 400),
+                height = ceiling(length(features) / ncol) * ifelse(n_uidents < 10, 300, 400),
                 res = 100
             )
         }
     } else if ("vln" %in% case$kind || "violin" %in% case$kind) {
         case$kind = "violin"
         if (is.null(case$cols)) {
-            case$cols = pal_ucscgb(alpha = .8)(26)
+            case$cols = pal_biopipen()(n_uidents)
         }
         fn = VlnPlot
-        default_devpars = function(features, ncol, uidents) {
+        default_devpars = function(features, ncol) {
             if (is.null(ncol)) { ncol = 1 }
             list(
                 width = 400 * ncol,
@@ -99,12 +93,12 @@ do_one_features = function(name) {
     } else if ("feature" %in% case$kind) {
         case$kind = "feature"
         if (is.null(case$cols)) {
-            case$cols = c("lightgrey", pal_ucscgb()(1))
+            case$cols = c("lightgrey", pal_biopipen()(1))
         }
         excluded_args = c(excluded_args, "group.by", "assay")
         case$shape.by = case$group.by
         fn = FeaturePlot
-        default_devpars = function(features, ncol, uidents) {
+        default_devpars = function(features, ncol) {
             if (is.null(ncol)) { ncol = 1 }
             list(
                 width = 400 * ncol,
@@ -115,16 +109,16 @@ do_one_features = function(name) {
     } else if ("dot" %in% case$kind) {
         case$kind = "dot"
         if (is.null(case$cols)) {
-            case$cols = c("lightgrey", pal_ucscgb()(1))
+            case$cols = c("lightgrey", pal_biopipen()(1))
         }
         if (is.null(case$plus)) {
             case$plus = 'theme_prism(axis_text_angle=90)'
         }
         excluded_args = c(excluded_args, "slot", "ncol")
         fn = DotPlot
-        default_devpars = function(features, ncol, uidents) {
+        default_devpars = function(features, ncol) {
             list(
-                height = max(length(uidents) * 80 + 150, 420),
+                height = max(n_uidents * 80 + 150, 420),
                 width = length(features) * 50 + 150,
                 res = 100
             )
@@ -133,20 +127,20 @@ do_one_features = function(name) {
         case$kind = "heatmap"
         case = list_update(
             list(
-                group.colors = pal_ucscgb(alpha = .8)(26),
+                group.colors = pal_biopipen()(n_uidents),
                 size = 3.5,
                 group.bar.height = 0.01
             ),
             case
         )
         if (is.null(case$plus)) {
-            case$plus = 'scale_fill_gradientn(colors = c("lightgrey", pal_ucscgb()(1)), na.value = "white")'
+            case$plus = 'scale_fill_gradientn(colors = c("lightgrey", pal_biopipen()(1)), na.value = "white")'
         }
         excluded_args = c(excluded_args, "group.by", "split.by", "downsample", "ncol")
         fn = DoHeatmap
-        default_devpars = function(features, ncol, uidents) {
+        default_devpars = function(features, ncol) {
             list(
-                width = length(uidents) * 60 + 150,
+                width = n_uidents * 60 + 150,
                 height = length(features) * 40 + 150,
                 res = 100
             )
@@ -160,7 +154,7 @@ do_one_features = function(name) {
             case$slot = "data"
         }
     } else {
-        stop("Unknown kind of plot")
+        stop(paste0("Unknown kind of plot: ", case$kind))
     }
     for (arg in excluded_args) {
@@ -168,33 +162,34 @@ do_one_features = function(name) {
         case[[arg]] = NULL
     }
-    if (is.character(subset)) {
-        case$object = srtobj %>% filter(!!rlang::parse_expr(subset))
-    } else {
-        case$object = srtobj
-    }
-    if (!is.null(ident)) {
-        Idents(case$object) = ident
-    }
     case$features = .get_features(case$features)
     if (!is.null(case$ncol)) {
         case$ncol = min(case$ncol, length(case$features))
     }
-    toc$kind = kind
     if (kind == "table") {
         expr = do_call(fn, case)$RNA %>%
             as.data.frame() %>%
             rownames_to_column("Feature") %>%
             select(Feature, everything())
-        toc$file = paste0(slugify(name), ".txt")
-        write.table(expr, file.path(odir, toc$file), sep="\t", quote=FALSE, row.names=FALSE)
-    } else {
-        devpars = list_update(
-            default_devpars(case$features, case$ncol, unique(Idents(case$object))),
-            devpars
+        exprfile = paste0(slugify(name), ".txt")
+        write.table(expr, file.path(odir, exprfile), sep="\t", quote=FALSE, row.names=FALSE)
+        add_report(
+            list(
+                kind = "descr",
+                content = paste0("Table of expression value for selected features, by ", ident)
+            ),
+            list(
+                kind = "table",
+                src = exprfile
+            ),
+            h1 = ifelse(is.null(case$section), name, case$section),
+            h2 = ifelse(is.null(case$section), "#", name)
         )
+    } else {
+        devpars = list_update(default_devpars(case$features, case$ncol), devpars)
         if (kind == "heatmap") {
             if (!exists("downsample") || is.null(downsample)) {
                 downsample = "average"
@@ -202,9 +197,9 @@ do_one_features = function(name) {
             if (downsample %in% c("average", "mean")) {
                 case$object = AverageExpression(case$object, return.seurat = TRUE)
             } else if (is.integer(downsample)) {
-                case$object = subset(case$object, downsample = downsample)
+                case$object = base::subset(case$object, downsample = downsample)
             } else {
-                stop("Unknown downsample method.")
+                stop(paste0("Unknown downsample method: ", downsample))
             }
         }
         p = do_call(fn, case)
@@ -213,8 +208,7 @@ do_one_features = function(name) {
                 p = p + eval(parse(text = pls))
             }
         }
-        figfile = file.path(odir, paste0(slugify(name), ".", kind, ".png"))
-        toc$file = basename(figfile)
+        figfile = file.path(odir, paste0(slugify(name), ".", slugify(case$kind), ".png"))
         png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
         tryCatch({
             print(p)
@@ -229,9 +223,20 @@ do_one_features = function(name) {
             )
         })
         dev.off()
+        add_report(
+            list(
+                kind = "descr",
+                content = paste0(kind, "plots for selected features, by ", ident)
+            ),
+            list(
+                kind = "image",
+                src = figfile
+            ),
+            h1 = ifelse(is.null(section), name, section),
+            h2 = ifelse(is.null(section), "#", name)
+        )
     }
-    .add_toc(if (is.null(section)) name else section, toc)
 }
 sapply(names(features), do_one_features)
-.save_toc()

biopipen/scripts/scrna/SeuratClusterStats-stats.R CHANGED Viewed

@@ -5,22 +5,9 @@ stats = {{envs.stats | r: todot="-", skip=1}}
 odir = file.path(outdir, "stats")
 dir.create(odir, recursive=TRUE, showWarnings=FALSE)
-report_toc_file = file.path(odir, "report_toc.json")
-# Realname => {bar: ..., pie: ..., table: ...}
-report_toc = list()
-.add_toc = function(name, toc) {
-    report_toc[[name]] <<- toc
-}
-.save_toc = function() {
-    writeLines(toJSON(report_toc, pretty = TRUE, auto_unbox = TRUE), report_toc_file)
-}
 do_one_stats = function(name) {
-    print(paste0("Doing stats for: ", name))
-    toc = list()
+    log_info("Doing stats for: {name}")
     case = list_update(stats_defaults, stats[[name]])
     case$devpars = list_update(stats_defaults$devpars, case$devpars)
@@ -45,7 +32,6 @@ do_one_stats = function(name) {
         mutate(.frac = .n / sum(.n))
     if (isTRUE(case$table)) {
-        toc$table = basename(tablefile)
         write.table(df_cells, tablefile, sep="\t", quote=FALSE, row.names=FALSE)
     }
     if (isTRUE(case$pie)) {
@@ -54,7 +40,7 @@ do_one_stats = function(name) {
             ggplot(aes(x="", y=.n, fill=!!sym(case$ident))) +
             geom_bar(stat="identity", width=1, alpha=.8, position = position_stack(reverse = TRUE)) +
             coord_polar("y", start=0) +
-            scale_fill_ucscgb(alpha=.8) +
+            scale_fill_biopipen() +
             guides(fill = guide_legend(title = case$ident)) +
             theme_void() +
             geom_label(
@@ -72,7 +58,6 @@ do_one_stats = function(name) {
             p_pie = p_pie + facet_wrap(case$split.by)
         }
-        toc$pie = basename(piefile)
         png(piefile, width=case$devpars$width, height=case$devpars$height, res=case$devpars$res)
         print(p_pie)
         dev.off()
@@ -89,20 +74,53 @@ do_one_stats = function(name) {
         )) +
         geom_bar(stat="identity", position=bar_position, alpha=.8) +
         theme_prism(axis_text_angle = 90) +
-        scale_fill_manual(values=rep(pal_ucscgb(alpha=.8)(26), 10)[1:max(ngroups, nidents)]) +
+        scale_fill_biopipen() +
         ylab(ifelse(isTRUE(case$frac), "Fraction of cells", "Number of cells"))
     if (!is.null(case$split.by)) {
         p = p + facet_wrap(case$split.by)
     }
-    toc$bar = basename(figfile)
     png(figfile, width=case$devpars$width, height=case$devpars$height, res=case$devpars$res)
     print(p)
     dev.off()
-    .add_toc(name, toc)
+    add_report(
+        list(
+            kind = "descr",
+            content = paste0(
+                "Plots showing the ",
+                ifelse(isTRUE(case$frac), "number/faction", "number"),
+                " of cells per cluster",
+                ifelse(
+                    is.null(case$group.by),
+                    "",
+                    paste0(", by ", paste0(case$group.by, collapse = ", "))
+                )
+            )
+        ),
+        h1 = name
+    )
+    add_report(
+        list(
+            name = "Bar Plot",
+            contents = list(list(kind = "image", src = figfile))
+        ),
+        h1 = name,
+        ui = "tabs"
+    )
+    if (isTRUE(case$pie)) {
+        add_report(
+            list(
+                name = "Pie Chart",
+                contents = list(list(kind = "image", src = piefile))
+            ),
+            h1 = name,
+            ui = "tabs"
+        )
+    }
 }
 sapply(names(stats), do_one_stats)
-.save_toc()

biopipen/scripts/scrna/SeuratClusterStats.R CHANGED Viewed

@@ -1,21 +1,23 @@
 source("{{biopipen_dir}}/utils/misc.R")
 source("{{biopipen_dir}}/utils/plot.R")
-library(jsonlite)
 library(slugify)
 library(Seurat)
 library(rlang)
 library(dplyr)
 library(tibble)
 library(ggprism)
-library(ggsci)
 library(ggrepel)
 library(tidyseurat)
 srtfile = {{in.srtobj | r}}
 outdir = {{out.outdir | r}}
+joboutdir = {{job.outdir | r}}
+log_info("Loading Seurat object ...")
 srtobj = readRDS(srtfile)
 {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-stats.R" %}
 {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-features.R" %}
 {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-dimplots.R" %}
+save_report(joboutdir)

biopipen/scripts/scrna/SeuratClustering.R CHANGED Viewed

@@ -4,11 +4,13 @@ library(Seurat)
 library(future)
 library(tidyr)
 library(dplyr)
+library(digest)
 set.seed(8525)
 srtfile = {{in.srtobj | quote}}
 rdsfile = {{out.rdsfile | quote}}
+joboutdir = {{job.outdir | quote}}
 envs = {{envs | r: todot="-"}}
 options(future.globals.maxSize = 80000 * 1024^2)
@@ -26,7 +28,46 @@ envs$IntegrateData = .expand_dims(envs$IntegrateData)
 envs$RunUMAP = .expand_dims(envs$RunUMAP)
 envs$FindNeighbors = .expand_dims(envs$FindNeighbors)
+log_info("Reading Seurat object ...")
 sobj = readRDS(srtfile)
+if (isTRUE(envs$cache)) {
+    envs$cache = joboutdir
+}
+if (is.character(envs$cache) && nchar(envs$cache) > 0) {
+    log_info("Obtainning the signature ...")
+    envs2 = envs
+    envs2$ncores <- NULL
+    sig = c(
+        capture.output(str(sobj)),
+        "\n\n-------------------\n\n",
+        capture.output(str(envs2)),
+        "\n"
+    )
+    digested_sig = digest::digest(sig, algo = "md5")
+    cached_file = file.path(envs$cache, paste0(digested_sig, ".cached.RDS"))
+    if (file.exists(cached_file)) {
+        log_info("Using cached results {cached_file}")
+        # copy cached file to rdsfile
+        file.copy(cached_file, rdsfile, copy.date = TRUE)
+        quit()
+    } else {
+        log_info("Cached results not found, logging the current and cached signatures.")
+        log_info("- Current signature:")
+        print(sig)
+        sigfiles = Sys.glob(file.path(envs$cache, "*.signature.txt"))
+        for (sigfile in sigfiles) {
+            log_info("- Found cached signature file: {sigfile}")
+            cached_sig = readLines(sigfile)
+            log_info("- Cached signature:")
+            print(cached_sig)
+        }
+        writeLines(sig, file.path(envs$cache, paste0(digested_sig, ".signature.txt")))
+    }
+}
 obj_list = SplitObject(sobj, split.by = "Sample")
 rm(sobj)
@@ -51,27 +92,28 @@ if (!is.null(envs$FindIntegrationAnchors$reference)) {
 # ############################
 # Using SCT
 # https://satijalab.org/seurat/articles/integration_rpca.html#performing-integration-on-datasets-normalized-with-sctransform-1
-print("- Performing SCTransform on each sample ...")
+log_info("########## Using SCT route ##########")
+log_info("Performing SCTransform on each sample ...")
 obj_list <- lapply(X = obj_list, FUN = function(x) {
-    print(paste("  Performing SCTransform on sample:", x@meta.data$Sample[1], "..."))
+    log_info("- On sample: {x@meta.data$Sample[1]} ...")
     # # Needed?
     # DefaultAssay(x) <- "RNA"
     args = list_update(envs$SCTransform, list(object = x))
     do_call(SCTransform, args)
 })
-print("- Running SelectIntegrationFeatures ...")
+log_info("Running SelectIntegrationFeatures ...")
 envs$SelectIntegrationFeatures$object.list = obj_list
 features = do_call(SelectIntegrationFeatures, envs$SelectIntegrationFeatures)
-print("- Running PrepSCTIntegration ...")
+log_info("Running PrepSCTIntegration ...")
 envs$PrepSCTIntegration$object.list = obj_list
 envs$PrepSCTIntegration$anchor.features = features
 obj_list = do_call(PrepSCTIntegration, envs$PrepSCTIntegration)
-print("- Running PCA on each sample ...")
+log_info("Running PCA on each sample ...")
 obj_list = lapply(X = obj_list, FUN = function(x) {
-    print(paste("  On sample:", x@meta.data$Sample[1], "..."))
+    log_info("- On sample: {x@meta.data$Sample[1]} ...")
     npcs = if (is.null(envs$RunPCA1$npcs)) 50 else envs$RunPCA1$npcs
     args = list_setdefault(
         envs$RunPCA1,
@@ -83,11 +125,11 @@ obj_list = lapply(X = obj_list, FUN = function(x) {
     do_call(RunPCA, args)
 })
-print("- Running FindIntegrationAnchors ...")
+log_info("Running FindIntegrationAnchors ...")
 if (!is.null(envs$FindIntegrationAnchors$reference)) {
-    print(
+    log_info(
         paste(
-            "  Using samples as reference:",
+            "- Using samples as reference:",
             paste(envs$FindIntegrationAnchors$reference, collapse = ", ")
         )
     )
@@ -106,7 +148,7 @@ fia_args$dims = 1:min(min_dim, max(fia_args$dims))
 fia_args$k.score = min(30, min_dim - 1)
 anchors = do_call(FindIntegrationAnchors, fia_args)
-print("- Running IntegrateData ...")
+log_info("Running IntegrateData ...")
 envs$IntegrateData$anchorset = anchors
 id_args = list_setdefault(
     envs$IntegrateData,
@@ -139,9 +181,10 @@ tryCatch({
 # ############################
 # Using rpca
 # https://satijalab.org/seurat/articles/integration_rpca.html
-print("- Performing NormalizeData + FindVariableFeatures on each sample ...")
+log_info("########## Using rpca route ##########")
+log_info("Performing NormalizeData + FindVariableFeatures on each sample ...")
 obj_list <- lapply(X = obj_list, FUN = function(x) {
-    print(paste("  On sample:", x@meta.data$Sample[1], "..."))
+    log_info("- On sample: {x@meta.data$Sample[1]} ...")
     DefaultAssay(x) <- "RNA"
     args = list_update(envs$NormalizeData, list(object = x))
     x <- do_call(NormalizeData, args)
@@ -150,14 +193,13 @@ obj_list <- lapply(X = obj_list, FUN = function(x) {
     do_call(FindVariableFeatures, args)
 })
-print("- Running SelectIntegrationFeatures ...")
+log_info("Running SelectIntegrationFeatures ...")
 envs$SelectIntegrationFeatures$object.list = obj_list
 features = do_call(SelectIntegrationFeatures, envs$SelectIntegrationFeatures)
-print("- Running ScaleData + RunPCA on each sample ...")
+log_info("Running ScaleData + RunPCA on each sample ...")
 obj_list <- lapply(X = obj_list, FUN = function(x) {
-    print(paste("  On sample:", x@meta.data$Sample[1], "..."))
+    log_info("- On sample: {x@meta.data$Sample[1]} ...")
     args = list_setdefault(envs$ScaleData1, object = x, features = features)
     x <- do_call(ScaleData, args)
@@ -172,11 +214,11 @@ obj_list <- lapply(X = obj_list, FUN = function(x) {
     do_call(RunPCA, args)
 })
-print("- Running FindIntegrationAnchors ...")
+log_info("Running FindIntegrationAnchors ...")
 if (!is.null(envs$FindIntegrationAnchors$reference)) {
-    print(
+    log_info(
         paste(
-            "  Using samples as reference:",
+            "- Using samples as reference:",
             paste(envs$FindIntegrationAnchors$reference, collapse = ", ")
         )
     )
@@ -194,7 +236,7 @@ fia_args$dims = 1:min(min_dim, max(fia_args$dims))
 fia_args$k.score = min(30, min_dim - 1)
 anchors = do_call(FindIntegrationAnchors, fia_args)
-print("- Running IntegrateData ...")
+log_info("Running IntegrateData ...")
 envs$IntegrateData$anchorset = anchors
 id_args = list_setdefault(envs$IntegrateData, dims = 1:30)
 id_args$dims = 1:min(min_dim, max(id_args$dims))
@@ -207,7 +249,7 @@ obj_list = do_call(ScaleData, envs$ScaleData)
 {%- endif %}
-print("- Running RunPCA ...")
+log_info("Running RunPCA ...")
 pca_args = list_setdefault(
     envs$RunPCA,
     object = obj_list,
@@ -216,7 +258,7 @@ pca_args = list_setdefault(
 pca_args$npcs = min(pca_args$npcs, ncol(obj_list) - 1)
 obj_list = do_call(RunPCA, pca_args)
-print("- Running RunUMAP ...")
+log_info("Running RunUMAP ...")
 umap_args = list_setdefault(
     envs$RunUMAP,
     object = obj_list,
@@ -225,16 +267,21 @@ umap_args = list_setdefault(
 umap_args$dims = 1:min(max(umap_args$dims), ncol(obj_list) - 1)
 obj_list = do_call(RunUMAP, umap_args)
-print("- Running FindNeighbors ...")
+log_info("Running FindNeighbors ...")
 envs$FindNeighbors$object = obj_list
 obj_list = do_call(FindNeighbors, envs$FindNeighbors)
-print("- Running FindClusters ...")
+log_info("Running FindClusters ...")
 envs$FindClusters$object = obj_list
 obj_list = do_call(FindClusters, envs$FindClusters)
 nclusters = length(unique(Idents(obj_list)))
-print(paste0("- Identified ", nclusters, " clusters."))
+log_info("Identified {nclusters} clusters.")
-print("- Saving results ...")
+log_info("Saving results ...")
 saveRDS(obj_list, file = rdsfile)
+if (is.character(envs$cache) && nchar(envs$cache) > 0) {
+    log_info("Caching results ...")
+    file.copy(rdsfile, cached_file, overwrite = TRUE)
+}

biopipen/scripts/scrna/SeuratMetadataMutater.R CHANGED Viewed

@@ -1,4 +1,6 @@
+source("{{biopipen_dir}}/utils/misc.R")
 source("{{biopipen_dir}}/utils/mutate_helpers.R")
 library(rlang)
 library(tibble)
 library(dplyr)
@@ -14,7 +16,17 @@ metadata = srt@meta.data
 if (!is.null(metafile)) {
     mdata = read.table(metafile, header=TRUE, row.names=1, sep="\t", check.names=FALSE)
-    metadata = cbind(metadata, mdata[rownames(metadata),,drop=FALSE])
+    ov_cols = intersect(colnames(metadata), colnames(mdata))
+    if (length(ov_cols) > 0) {
+        log_warn(paste0(
+            "The following columns are already present in Seurat object and will be ignored: ",
+            paste(ov_cols, collapse=', ')
+        ))
+    }
+    metadata = cbind(
+        metadata,
+        mdata[rownames(metadata), setdiff(colnames(mdata), ov_cols), drop=FALSE]
+    )
 }
 expr = list()

biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

Potentially problematic release.

biopipen 0.21.2py3-none-any.whl → 0.22.1py3-none-any.whl