PyPI - biopipen - Versions diffs - 0.23.7__py3-none-any.whl → 0.24.0__py3-none-any.whl - Mend

biopipen 0.23.7py3-none-any.whl → 0.24.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (30) hide show

biopipen/__init__.py +1 -1
biopipen/core/proc.py +7 -0
biopipen/ns/cellranger.py +2 -2
biopipen/ns/scrna.py +15 -20
biopipen/ns/tcr.py +8 -6
biopipen/scripts/scrna/ScFGSEA.R +6 -0
biopipen/scripts/scrna/SeuratClustering.R +102 -85
biopipen/scripts/scrna/SeuratPreparing.R +21 -10
biopipen/scripts/scrna/SeuratSubClustering.R +81 -97
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +5 -5
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
biopipen/scripts/tcr/Immunarch-basic.R +2 -3
biopipen/scripts/tcr/Immunarch-clonality.R +2 -3
biopipen/scripts/tcr/Immunarch-diversity.R +70 -49
biopipen/scripts/tcr/Immunarch-geneusage.R +2 -3
biopipen/scripts/tcr/Immunarch-kmer.R +2 -3
biopipen/scripts/tcr/Immunarch-overlap.R +2 -3
biopipen/scripts/tcr/Immunarch-spectratyping.R +2 -3
biopipen/scripts/tcr/Immunarch-tracking.R +2 -3
biopipen/scripts/tcr/Immunarch-vjjunc.R +2 -3
biopipen/scripts/tcr/Immunarch.R +1 -1
biopipen/scripts/tcr/ImmunarchLoading.R +2 -0
biopipen/scripts/tcr/TCRClustering.R +6 -2
biopipen/scripts/tcr/TESSA.R +3 -1
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/utils/caching.R +44 -0
{biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/METADATA +8 -7
{biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/RECORD +30 -28
{biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/WHEEL +0 -0
{biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/entry_points.txt +0 -0

biopipen/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.23.7"
1	+ __version__ = "0.24.0"

biopipen/core/proc.py CHANGED Viewed

@@ -25,3 +25,10 @@ class Proc(PipenProc):
         "filters": {**FILTERS, **filtermanager.filters},
         "search_paths": SEARCH_PATHS + [str(REPORT_DIR)],
     }
+    plugin_opts = {
+        "poplog_pattern": (
+            r"^(?P<level>INFO|WARN|WARNING|CRITICAL|ERROR|DEBUG?)\s*"
+            r"\[\d+-\d+-\d+ \d+:\d+:\d+\] (?P<message>.*)$"
+        )
+    }

biopipen/ns/cellranger.py CHANGED Viewed

@@ -35,7 +35,7 @@ class CellRangerCount(Proc):
             {%- set fastqs = fastqs[0] | glob: "*.fastq.gz" -%}
         {%- endif -%}
         {%- set sample = commonprefix(*fastqs) |
-            regex_replace: "_L\\d+_$", "" |
+            regex_replace: "_L\\d+_?$", "" |
             regex_replace: "_S\\d+$", "" -%}
         {{- sample -}}
     """
@@ -84,7 +84,7 @@ class CellRangerVdj(Proc):
             {%- set fastqs = fastqs[0] | glob: "*.fastq.gz" -%}
         {%- endif -%}
         {%- set sample = commonprefix(*fastqs) |
-            regex_replace: "_L\\d+_$", "" |
+            regex_replace: "_L\\d+_?$", "" |
             regex_replace: "_S\\d+$", "" -%}
         {{- sample -}}
     """

biopipen/ns/scrna.py CHANGED Viewed

@@ -278,18 +278,14 @@ class SeuratClustering(Proc):
                 The results will be saved in `seurat_clusters_<resolution>`.
                 The final resolution will be used to define the clusters at `seurat_clusters`.
             - <more>: See <https://satijalab.org/seurat/reference/findclusters>
-        cache (type=auto): Whether to cache the seurat object with cluster information.
+        cache (type=auto): Whether to cache the information at different steps.
             If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
-            The cached seurat object will be saved as `<signature>.cached.RDS` file, where `<signature>` is the signature determined by
+            The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
             the input and envs of the process.
-            See -
-            * <https://github.com/satijalab/seurat/issues/7849>
-            * <https://github.com/satijalab/seurat/issues/5358> and
-            * <https://github.com/satijalab/seurat/issues/6748> for more details.
+            See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
+            <https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
             To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
-            `<signature>.cached.RDS` in the cache directory.
-            If `True`, the cache directory is `.pipen/<Pipeline>/SeuratClustering/0/output/`
-            You can also specify customized directory to save the cached seurat object by setting `cache` to the directory path.
+            `<signature>.RDS` in the cache directory.
     Requires:
         r-seurat:
@@ -309,7 +305,7 @@ class SeuratClustering(Proc):
         "RunUMAP": {"dims": 30},
         "FindNeighbors": {},
         "FindClusters": {"resolution": 0.8},
-        "cache": False,
+        "cache": config.path.tmpdir,
     }
     script = "file://../scripts/scrna/SeuratClustering.R"
@@ -361,18 +357,14 @@ class SeuratSubClustering(Proc):
                 The results will be saved in `<casename>_<resolution>`.
                 The final resolution will be used to define the clusters at `<casename>`.
             - <more>: See <https://satijalab.org/seurat/reference/findclusters>
-        cache (type=auto): Whether to cache the seurat object with cluster information.
+        cache (type=auto): Whether to cache the information at different steps.
             If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
-            The cached seurat object will be saved as `<signature>.cached.RDS` file, where `<signature>` is the signature determined by
+            The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
             the input and envs of the process.
-            See -
-            * <https://github.com/satijalab/seurat/issues/7849>
-            * <https://github.com/satijalab/seurat/issues/5358> and
-            * <https://github.com/satijalab/seurat/issues/6748> for more details.
+            See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
+            <https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
             To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
-            `<signature>.cached.RDS` in the cache directory.
-            If `True`, the cache directory is `.pipen/<Pipeline>/SeuratClustering/0/output/`
-            You can also specify customized directory to save the cached seurat object by setting `cache` to the directory path.
+            `<signature>.RDS` in the cache directory.
         cases (type=json): The cases to perform subclustering.
             Keys are the names of the cases and values are the dicts inherited from `envs` except `mutaters` and `cache`.
             If empty, a case with name `subcluster` will be created with default parameters.
@@ -387,7 +379,7 @@ class SeuratSubClustering(Proc):
         "RunUMAP": {"dims": 30},
         "FindNeighbors": {},
         "FindClusters": {"resolution": 0.8},
-        "cache": False,
+        "cache": config.path.tmpdir,
         "cases": {"subcluster": {}},
     }
     script = "file://../scripts/scrna/SeuratSubClustering.R"
@@ -1463,6 +1455,7 @@ class ScFGSEA(Proc):
         ident-1: The first group of cells to compare
         ident-2: The second group of cells to compare, if not provided, the rest of the cells that are not `NA`s in `group-by` column are used for `ident-2`.
         each: The column name in metadata to separate the cells into different subsets to do the analysis.
+        subset: An expression to subset the cells.
         section: The section name for the report. Worked only when `each` is not specified. Otherwise, the section name will be constructed from `each` and its value.
             This allows different cases to be put into the same section in the report.
         gmtfile: The pathways in GMT format, with the gene names/ids in the same format as the seurat object.
@@ -1513,6 +1506,7 @@ class ScFGSEA(Proc):
         "ident-1": None,
         "ident-2": None,
         "each": None,
+        "subset": None,
         "section": "DEFAULT",
         "gmtfile": "",
         "method": "s2n",
@@ -2000,4 +1994,5 @@ class MetaMarkers(Proc):
     plugin_opts = {
         "report": "file://../reports/scrna/MetaMarkers.svelte",
         "report_paging": 8,
+        "poplog_max": 15,
     }

biopipen/ns/tcr.py CHANGED Viewed

@@ -563,12 +563,13 @@ class Immunarch(Proc):
                     A Gini coefficient of one (or 100 percents) expresses maximal inequality among values (for example where only one person has all the income).
                 - d50: The D50 index.
                     It is the number of types that are needed to cover 50%% of the total abundance.
-                - dxx: The Dxx index.
-                    It is the number of types that are needed to cover xx%% of the total abundance.
-                    The percentage should be specified in the `args` argument using `perc` key.
                 - raref: Species richness from the results of sampling through extrapolation.
             - by: The variables (column names) to group samples.
                 Multiple columns should be separated by `,`.
+            - plot_type (choice): The type of the plot, works when `by` is specified.
+                Not working for `raref`.
+                - box: Boxplot
+                - bar: Barplot with error bars
             - subset: Subset the data before calculating the clonotype volumes.
                 The whole data will be expanded to cell level, and then subsetted.
                 Clone sizes will be re-calculated based on the subsetted data.
@@ -789,9 +790,9 @@ class Immunarch(Proc):
         },
         # Diversity
         "divs": {
-            "filter": None,
             "method": "gini",
             "by": None,
+            "plot_type": "bar",
             "args": {},
             "order": [],
             "test": {
@@ -805,8 +806,8 @@ class Immunarch(Proc):
             "align_y": False,
             "log": False,
             "devpars": {
-                "width": 1000,
-                "height": 1000,
+                "width": 800,
+                "height": 800,
                 "res": 100,
             },
             "subset": None,
@@ -851,6 +852,7 @@ class Immunarch(Proc):
     plugin_opts = {
         "report": "file://../reports/tcr/Immunarch.svelte",
         "report_paging": 3,
+        "poplog_max": 999,
     }

biopipen/scripts/scrna/ScFGSEA.R CHANGED Viewed

@@ -14,6 +14,7 @@ group.by <- {{envs["group-by"] | r}}  # nolint
 ident.1 <- {{envs["ident-1"] | r}}  # nolint
 ident.2 <- {{envs["ident-2"] | r}}  # nolint
 each <- {{envs.each | r}}  # nolint
+subset <- {{envs.subset | r}}  # nolint
 section <- {{envs.section | r}}  # nolint
 gmtfile <- {{envs.gmtfile | r}}  # nolint
 method <- {{envs.method | r}}  # nolint
@@ -43,6 +44,7 @@ expand_cases <- function() {
                 ident.1 = ident.1,
                 ident.2 = ident.2,
                 each = each,
+                subset = subset,
                 section = section,
                 gmtfile = gmtfile,
                 method = method,
@@ -63,6 +65,7 @@ expand_cases <- function() {
                 ident.1 = ident.1,
                 ident.2 = ident.2,
                 each = each,
+                subset = subset,
                 section = section,
                 gmtfile = gmtfile,
                 method = method,
@@ -136,6 +139,9 @@ do_case <- function(name, case) {
     # prepare expression matrix
     log_info("  Preparing expression matrix...")
     sobj <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
+    if (!is.null(case$subset)) {
+        sobj <- sobj %>% filter(!!!parse_exprs(case$subset))
+    }
     if (!is.null(case$ident.2)) {
         sobj <- sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2))
     }

biopipen/scripts/scrna/SeuratClustering.R CHANGED Viewed

@@ -1,4 +1,5 @@
 source("{{biopipen_dir}}/utils/misc.R")
+source("{{biopipen_dir}}/utils/caching.R")
 library(Seurat)
 library(future)
@@ -35,80 +36,100 @@ envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
 log_info("Reading Seurat object ...")
 sobj <- readRDS(srtfile)
-if (isTRUE(envs$cache)) {
-    envs$cache <- joboutdir
+if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
+if (length(envs$cache) > 1) {
+    log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
+    envs$cache <- envs$cache[1]
 }
-if (is.character(envs$cache) && nchar(envs$cache) > 0) {
-    log_info("Obtainning the signature ...")
-    envs2 <- envs
-    envs2$ncores <- NULL
-    sig <- c(
-        capture.output(str(sobj)),
-        "\n\n-------------------\n\n",
-        capture.output(str(envs2)),
-        "\n"
-    )
-    digested_sig <- digest::digest(sig, algo = "md5")
-    cached_file <- file.path(envs$cache, paste0(digested_sig, ".cached.RDS"))
-    if (file.exists(cached_file)) {
-        log_info("Using cached results {cached_file}")
-        # copy cached file to rdsfile
-        file.copy(cached_file, rdsfile, copy.date = TRUE)
-        quit()
-    } else {
-        log_info("Cached results not found, logging the current and cached signatures.")
-        log_info("- Current signature: {digested_sig}")
-        # print(sig)
-        # sigfiles <- Sys.glob(file.path(envs$cache, "*.signature.txt"))
-        # for (sigfile in sigfiles) {
-        #     log_info("- Found cached signature file: {sigfile}")
-        #     cached_sig <- readLines(sigfile)
-        #     log_info("- Cached signature:")
-        #     print(cached_sig)
-        # }
-        writeLines(sig, file.path(envs$cache, paste0(digested_sig, ".signature.txt")))
-    }
+sobj_sig <- capture.output(str(sobj))
+dig_sig <- digest::digest(sobj_sig, algo = "md5")
+dig_sig <- substr(dig_sig, 1, 8)
+cache_dir <- NULL
+if (is.character(envs$cache)) {
+    cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seurat_cache"))
+    dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
+    writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
 }
 if (length(envs$ScaleData) > 0) {
     if (DefaultAssay(sobj) == "SCT") {
         stop("SCT assay detected, but ScaleData is specified. Use SCTransform instead.")
     }
-    log_info("Running ScaleData ...")
-    envs$ScaleData$object <- sobj
-    sobj <- do_call(ScaleData, envs$ScaleData)
+    cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
+    if (is.null(cached$data)) {
+        log_info("Running ScaleData ...")
+        envs$ScaleData$object <- sobj
+        sobj <- do_call(ScaleData, envs$ScaleData)
+        cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
+        save_to_cache(cached, "ScaleData", cache_dir)
+    } else {
+        log_info("Loading cached ScaleData ...")
+        sobj@assays$RNA <- cached$data$assay
+        sobj@commands <- cached$data$commands
+        DefaultAssay(sobj) <- "RNA"
+    }
 } else if (length(envs$SCTransform) > 0) {
     if (DefaultAssay(sobj) != "SCT") {
         stop("SCT assay not detected, but SCTransform is specified. Use ScaleData instead.")
     }
-    log_info("Running SCTransform ...")
-    envs$SCTransform$object <- sobj
-    sobj <- do_call(SCTransform, envs$SCTransform)
+    cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
+    asssay <- envs$SCTransform$new.assay.name %||% "SCT"
+    if (is.null(cached$data)) {
+        log_info("Running SCTransform ...")
+        envs$SCTransform$object <- sobj
+        sobj <- do_call(SCTransform, envs$SCTransform)
+        cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
+        save_to_cache(cached, "SCTransform", cache_dir)
+    } else {
+        log_info("Loading cached SCTransform ...")
+        sobj@assays[[assay]] <- cached$data$assay
+        sobj@commands <- cached$data$commands
+        DefaultAssay(sobj) <- assay
+    }
 }
-log_info("Running RunUMAP ...")
-umap_args <- list_setdefault(
-    envs$RunUMAP,
-    object = sobj,
-    dims = 1:30,
-    reduction = sobj@misc$integrated_new_reduction %||% "pca"
-)
-umap_args$dims <- 1:min(max(umap_args$dims), ncol(sobj) - 1)
-sobj <- do_call(RunUMAP, umap_args)
-log_info("Running FindNeighbors ...")
-envs$FindNeighbors$object <- sobj
-if (is.null(envs$FindNeighbors$reduction)) {
-    envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
+cached <- get_cached(envs$RunUMAP, "RunUMAP", cache_dir)
+reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
+if (is.null(cached$data)) {
+    log_info("Running RunUMAP ...")
+    umap_args <- list_setdefault(
+        envs$RunUMAP,
+        object = sobj,
+        dims = 1:30,
+        reduction = sobj@misc$integrated_new_reduction %||% "pca"
+    )
+    ncells <- ncol(sobj)
+    umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
+    umap_method <- envs$RunUMAP$umap.method %||% "uwot"
+    if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
+        # https://github.com/satijalab/seurat/issues/4312
+        umap_args$n.neighbors <- min(ncells - 1, 30)
+    }
+    sobj <- do_call(RunUMAP, umap_args)
+    cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
+    save_to_cache(cached, "RunUMAP", cache_dir)
+} else {
+    log_info("Loading cached RunUMAP ...")
+    sobj@reductions[[reduc_name]] <- cached$data$reduc
+    sobj@commands <- cached$data$commands
 }
-sobj <- do_call(FindNeighbors, envs$FindNeighbors)
-log_info("Running FindClusters ...")
-if (is.null(envs$FindClusters$random.seed)) {
-    envs$FindClusters$random.seed <- 8525
+cached <- get_cached(envs$FindNeighbors, "FindNeighbors", cache_dir)
+if (is.null(cached$data)) {
+    log_info("Running FindNeighbors ...")
+    envs$FindNeighbors$object <- sobj
+    envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
+    sobj <- do_call(FindNeighbors, envs$FindNeighbors)
+    cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
+    save_to_cache(cached, "FindNeighbors", cache_dir)
+} else {
+    log_info("Loading cached FindNeighbors ...")
+    sobj@graphs <- cached$data$graphs
+    sobj@commands <- cached$data$commands
 }
-resolution <- envs$FindClusters$resolution
+envs$FindClusters$random.seed <- envs$FindClusters$random.seed %||% 8525
+resolution <- envs$FindClusters$resolution %||% 0.8
 if (is.character(resolution)) {
     if (grepl(",", resolution)) {
         resolution <- as.numeric(trimws(unlist(strsplit(resolution, ","))))
@@ -116,42 +137,38 @@ if (is.character(resolution)) {
         resolution <- as.numeric(resolution)
     }
 }
-if (is.null(resolution) || length(resolution) == 1) {
-    envs$FindClusters$resolution <- resolution
-    envs$FindClusters$object <- sobj
-    sobj <- do_call(FindClusters, envs$FindClusters)
-    levels(sobj$seurat_clusters) <- paste0("c", as.numeric(levels(sobj$seurat_clusters)) + 1)
-    Idents(sobj) <- "seurat_clusters"
-    ident_table <- table(sobj$seurat_clusters)
-    log_info("- Found {length(ident_table)} clusters:")
-    print(ident_table)
-} else {
-    log_info("- Multiple resolutions detected ...")
-    res_key <- NULL
-    for (res in resolution) {
-        findclusters_args <- envs$FindClusters
-        findclusters_args$resolution <- res
-        findclusters_args$object <- sobj
-        sobj <- do_call(FindClusters, findclusters_args)
+for (res in resolution) {
+    envs$FindClusters$resolution <- res
+    cached <- get_cached(envs$FindClusters, paste0("FindClusters_", res), cache_dir)
+    res_key <- paste0("seurat_clusters_", res)
+    if (is.null(cached$data)) {
+        log_info("Running FindClusters at resolution: {res} ...")
+        envs$FindClusters$object <- sobj
+        sobj <- do_call(FindClusters, envs$FindClusters)
         levels(sobj$seurat_clusters) <- paste0("c", as.numeric(levels(sobj$seurat_clusters)) + 1)
-        res_key <- paste0("seurat_clusters_", res)
         sobj[[res_key]] <- sobj$seurat_clusters
-        ident_table <- table(sobj[[res_key]])
-        log_info("- Found {length(ident_table)} at resolution: {res}:")
-        print(ident_table)
+        Idents(sobj) <- "seurat_clusters"
+        cached$data <- list(clusters = sobj$seurat_clusters, commands = sobj@commands)
+        save_to_cache(cached, paste0("FindClusters_", res), cache_dir)
+    } else {
+        log_info("Loading cached FindClusters at resolution: {res} ...")
+        sobj@commands <- cached$data$commands
+        sobj[[res_key]] <- cached$data$clusters
+        sobj$seurat_clusters <- cached$data$clusters
+        Idents(sobj) <- "seurat_clusters"
     }
+    ident_table <- table(Idents(sobj))
+    log_info("- Found {length(ident_table)} clusters")
+    print(ident_table)
+    cat("\n")
 }
 if (DefaultAssay(sobj) == "SCT") {
-    # https://github.com/satijalab/seurat/issues/6968
+        # https://github.com/satijalab/seurat/issues/6968
     log_info("Running PrepSCTFindMarkers ...")
     sobj <- PrepSCTFindMarkers(sobj)
 }
 log_info("Saving results ...")
 saveRDS(sobj, file = rdsfile)
-if (is.character(envs$cache) && nchar(envs$cache) > 0) {
-    log_info("Caching results ...")
-    file.copy(rdsfile, cached_file, overwrite = TRUE)
-}

biopipen/scripts/scrna/SeuratPreparing.R CHANGED Viewed

@@ -99,8 +99,8 @@ load_sample = function(sample) {
     }
     obj <- CreateSeuratObject(exprs, project=sample)
     # filter the cells that don't have any gene expressions
-    cell_exprs = colSums(obj@assays$RNA)
-    obj = subset(obj, cells = names(cell_exprs[cell_exprs > 0]))
+    # cell_exprs = colSums(obj@assays$RNA)
+    # obj = subset(obj, cells = names(cell_exprs[cell_exprs > 0]))
     obj = RenameCells(obj, add.cell.id = sample)
     # Attach meta data
     for (mname in names(mdata)) {
@@ -128,13 +128,7 @@ log_info("Reading samples individually ...")
 obj_list = lapply(samples, load_sample)
 log_info("Merging samples ...")
-if (length(obj_list) >= 2) {
-    y = c()
-    for (i in 2:length(obj_list)) y = c(y, obj_list[[i]])
-    sobj = merge(obj_list[[1]], y)
-} else {
-    sobj = obj_list[[1]]
-}
+sobj = Reduce(merge, obj_list)
 log_info("Adding metadata for QC ...")
 sobj$percent.mt = PercentageFeatureSet(sobj, pattern = "^MT-")
@@ -297,28 +291,41 @@ add_report(
     h1 = "Filters and QC"
 )
+.formatArgs <- function(args) {
+    paste(capture.output(str(args)), collapse = ", ")
+}
 log_info("Performing transformation/scaling ...")
 # Not joined yet
 # sobj[["RNA"]] <- split(sobj[["RNA"]], f = sobj$Sample)
 if (envs$use_sct) {
     log_info("- Running SCTransform ...")
     SCTransformArgs <- envs$SCTransform
+    # log to stdout but don't populate it to running log
+    print("  SCTransform: {.formatArgs(SCTransformArgs)}")
+    log_debug("  SCTransform: {.formatArgs(SCTransformArgs)}")
     SCTransformArgs$object <- sobj
     sobj <- do_call(SCTransform, SCTransformArgs)
     # Default is to use the SCT assay
 } else {
     log_info("- Running NormalizeData ...")
     NormalizeDataArgs <- envs$NormalizeData
+    print("  NormalizeData: {.formatArgs(NormalizeDataArgs)}")
+    log_debug("  NormalizeData: {.formatArgs(NormalizeDataArgs)}")
     NormalizeDataArgs$object <- sobj
     sobj <- do_call(NormalizeData, NormalizeDataArgs)
     log_info("- Running FindVariableFeatures ...")
     FindVariableFeaturesArgs <- envs$FindVariableFeatures
+    print("  FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
+    log_debug("  FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
     FindVariableFeaturesArgs$object <- sobj
     sobj <- do_call(FindVariableFeatures, FindVariableFeaturesArgs)
     log_info("- Running ScaleData ...")
     ScaleDataArgs <- envs$ScaleData
+    print("  ScaleData: {.formatArgs(ScaleDataArgs)}")
+    log_debug("  ScaleData: {.formatArgs(ScaleDataArgs)}")
     ScaleDataArgs$object <- sobj
     sobj <- do_call(ScaleData, ScaleDataArgs)
 }
@@ -326,13 +333,14 @@ if (envs$use_sct) {
 log_info("- Running RunPCA ...")
 RunPCAArgs <- envs$RunPCA
 RunPCAArgs$npcs <- if (is.null(RunPCAArgs$npcs)) { 50 } else { min(RunPCAArgs$npcs, ncol(sobj) - 1) }
+print("  RunPCA: {.formatArgs(RunPCAArgs)}")
+log_debug("  RunPCA: {.formatArgs(RunPCAArgs)}")
 RunPCAArgs$object <- sobj
 sobj <- do_call(RunPCA, RunPCAArgs)
 if (!envs$no_integration) {
     log_info("- Running IntegrateLayers ...")
     IntegrateLayersArgs <- envs$IntegrateLayers
-    IntegrateLayersArgs$object <- sobj
     method <- IntegrateLayersArgs$method
     if (!is.null(IntegrateLayersArgs$reference) && is.character(IntegrateLayersArgs$reference)) {
         log_info("  Using reference samples: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
@@ -359,6 +367,9 @@ if (!envs$no_integration) {
     if (is.null(IntegrateLayersArgs$new.reduction)) {
         IntegrateLayersArgs$new.reduction <- new_reductions[[method]]
     }
+    print("  IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
+    log_debug("  IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
+    IntegrateLayersArgs$object <- sobj
     sobj <- do_call(IntegrateLayers, IntegrateLayersArgs)
     # Save it for dimension reduction plots
     sobj@misc$integrated_new_reduction <- IntegrateLayersArgs$new.reduction

biopipen 0.23.7__py3-none-any.whl → 0.24.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.23.7py3-none-any.whl → 0.24.0py3-none-any.whl