PyPI - biopipen - Versions diffs - 0.23.7__py3-none-any.whl → 0.24.0__py3-none-any.whl - Mend

biopipen 0.23.7py3-none-any.whl → 0.24.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (30) hide show

biopipen/__init__.py +1 -1
biopipen/core/proc.py +7 -0
biopipen/ns/cellranger.py +2 -2
biopipen/ns/scrna.py +15 -20
biopipen/ns/tcr.py +8 -6
biopipen/scripts/scrna/ScFGSEA.R +6 -0
biopipen/scripts/scrna/SeuratClustering.R +102 -85
biopipen/scripts/scrna/SeuratPreparing.R +21 -10
biopipen/scripts/scrna/SeuratSubClustering.R +81 -97
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +5 -5
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
biopipen/scripts/tcr/Immunarch-basic.R +2 -3
biopipen/scripts/tcr/Immunarch-clonality.R +2 -3
biopipen/scripts/tcr/Immunarch-diversity.R +70 -49
biopipen/scripts/tcr/Immunarch-geneusage.R +2 -3
biopipen/scripts/tcr/Immunarch-kmer.R +2 -3
biopipen/scripts/tcr/Immunarch-overlap.R +2 -3
biopipen/scripts/tcr/Immunarch-spectratyping.R +2 -3
biopipen/scripts/tcr/Immunarch-tracking.R +2 -3
biopipen/scripts/tcr/Immunarch-vjjunc.R +2 -3
biopipen/scripts/tcr/Immunarch.R +1 -1
biopipen/scripts/tcr/ImmunarchLoading.R +2 -0
biopipen/scripts/tcr/TCRClustering.R +6 -2
biopipen/scripts/tcr/TESSA.R +3 -1
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/utils/caching.R +44 -0
{biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/METADATA +8 -7
{biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/RECORD +30 -28
{biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/WHEEL +0 -0
{biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/entry_points.txt +0 -0

biopipen/scripts/scrna/SeuratSubClustering.R CHANGED Viewed

@@ -1,4 +1,5 @@
 source("{{biopipen_dir}}/utils/misc.R")
+source("{{biopipen_dir}}/utils/caching.R")
 library(Seurat)
 library(future)
@@ -33,40 +34,10 @@ envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
 log_info("Reading Seurat object ...")
 srtobj <- readRDS(srtfile)
-if (isTRUE(envs$cache)) {
-    envs$cache <- joboutdir
-}
-if (is.character(envs$cache) && nchar(envs$cache) > 0) {
-    log_info("Obtainning the signature ...")
-    envs2 <- envs
-    envs2$ncores <- NULL
-    sig <- c(
-        capture.output(str(srtobj)),
-        "\n\n-------------------\n\n",
-        capture.output(str(envs2)),
-        "\n"
-    )
-    digested_sig <- digest::digest(sig, algo = "md5")
-    cached_file <- file.path(envs$cache, paste0(digested_sig, ".cached.RDS"))
-    if (file.exists(cached_file)) {
-        log_info("Using cached results {cached_file}")
-        # copy cached file to rdsfile
-        file.copy(cached_file, rdsfile, copy.date = TRUE)
-        quit()
-    } else {
-        log_info("Cached results not found.")
-        log_info("- Current signature: {digested_sig}")
-        # print(sig)
-        # sigfiles <- Sys.glob(file.path(envs$cache, "*.signature.txt"))
-        # for (sigfile in sigfiles) {
-        #     log_info("- Found cached signature file: {sigfile}")
-        #     cached_sig <- readLines(sigfile)
-        #     log_info("- Cached signature:")
-        #     print(cached_sig)
-        # }
-        writeLines(sig, file.path(envs$cache, paste0(digested_sig, ".signature.txt")))
-    }
+if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
+if (length(envs$cache) > 1) {
+    log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
+    envs$cache <- envs$cache[1]
 }
 if (!is.null(envs$mutaters) && length(envs$mutaters) > 0) {
@@ -102,30 +73,66 @@ for (key in names(envs$cases)) {
     }
     log_info("- Subsetting ...")
-    sobj <- srtobj %>% filter(!!parse_expr(case$subset))
-    log_info("- Running RunUMAP ...")
-    umap_args <- list_setdefault(
-        case$RunUMAP,
-        object = sobj,
-        dims = 1:30,
-        reduction = sobj@misc$integrated_new_reduction %||% "pca"
-    )
-    umap_args$dims <- 1:min(max(umap_args$dims), ncol(sobj) - 1)
-    sobj <- do_call(RunUMAP, umap_args)
-    log_info("- Running FindNeighbors ...")
-    case$FindNeighbors$object <- sobj
-    if (is.null(case$FindNeighbors$reduction)) {
-        case$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
+    sobj <- tryCatch({
+        srtobj %>% filter(!!parse_expr(case$subset))
+    }, error = function(e) {
+        stop(paste0("  Error in subset: ", e$message))
+    })
+    sobj_sig <- capture.output(str(sobj))
+    dig_sig <- digest::digest(sobj_sig, algo = "md5")
+    dig_sig <- substr(dig_sig, 1, 8)
+    cache_dir <- NULL
+    if (is.character(envs$cache)) {
+        cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seurat_cache"))
+        dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
+        writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
     }
-    sobj <- do_call(FindNeighbors, case$FindNeighbors)
-    log_info("- Running FindClusters ...")
-    if (is.null(case$FindClusters$random.seed)) {
-        case$FindClusters$random.seed <- 8525
+    cached <- get_cached(case$RunUMAP, "RunUMAP", cache_dir)
+    reduc_name <- case$RunUMAP$reduction.name %||% "umap"
+    if (is.null(cached$data)) {
+        log_info("- Running RunUMAP ...")
+        umap_args <- list_setdefault(
+            case$RunUMAP,
+            object = sobj,
+            dims = 1:30,
+            reduction = sobj@misc$integrated_new_reduction %||% "pca"
+        )
+        ncells <- ncol(sobj)
+        umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
+        umap_method <- case$RunUMAP$umap.method %||% "uwot"
+        if (umap_method == "uwot" && is.null(case$RunUMAP$n.neighbors)) {
+            # https://github.com/satijalab/seurat/issues/4312
+            umap_args$n.neighbors <- min(ncells - 1, 30)
+        }
+        sobj <- do_call(RunUMAP, umap_args)
+        cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
+        save_to_cache(cached, "RunUMAP", cache_dir)
+    } else {
+        log_info("- Loading cached RunUMAP ...")
+        sobj@reductions[[reduc_name]] <- cached$data$reduc
+        sobj@commands <- cached$data$commands
+    }
+    reduc <- cached$data$reduc
+    cached <- get_cached(case$FindNeighbors, "FindNeighbors", cache_dir)
+    if (is.null(cached$data)) {
+        log_info("- Running FindNeighbors ...")
+        case$FindNeighbors$object <- sobj
+        if (is.null(case$FindNeighbors$reduction)) {
+            case$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
+        }
+        sobj <- do_call(FindNeighbors, case$FindNeighbors)
+        cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
+        save_to_cache(cached, "FindNeighbors", cache_dir)
+    } else {
+        log_info("- Loading cached FindNeighbors ...")
+        sobj@graphs <- cached$data$graphs
+        sobj@commands <- cached$data$commands
     }
-    resolution <- case$FindClusters$resolution
+    case$FindClusters$random.seed <- case$FindClusters$random.seed %||% 8525
+    resolution <- case$FindClusters$resolution %||% 0.8
     if (is.character(resolution)) {
         if (grepl(",", resolution)) {
             resolution <- as.numeric(trimws(unlist(strsplit(resolution, ","))))
@@ -133,53 +140,30 @@ for (key in names(envs$cases)) {
             resolution <- as.numeric(resolution)
         }
     }
-    if (is.null(resolution) || length(resolution) == 1) {
-        case$FindClusters$resolution <- resolution
-        case$FindClusters$object <- sobj
-        sobj <- do_call(FindClusters, case$FindClusters)
-        levels(sobj$seurat_clusters) <- paste0("s", as.numeric(levels(sobj$seurat_clusters)) + 1)
-        Idents(sobj) <- "seurat_clusters"
-        sobj[[key]] <- sobj$seurat_clusters
-        ident_table <- table(sobj[[key]])
-        log_info("- Found {length(ident_table)} clusters:")
-        print(ident_table)
-        cat("\n")
-        log_info("- Updating meta.data with subclusters...")
-        srtobj <- AddMetaData(srtobj, metadata = sobj@meta.data[, key, drop = FALSE])
-        srtobj[[paste0("sub_umap_", key)]] <- sobj@reductions$umap
-    } else {
-        log_info("- Multiple resolutions detected ...")
-        log_info("")
-        metadata <- NULL
-        for (res in resolution) {
-            findclusters_args <- case$FindClusters
-            findclusters_args$resolution <- res
-            findclusters_args$object <- sobj
-            sobj1 <- do_call(FindClusters, findclusters_args)
-            res_key <- paste0(key, "_", res)
+    for (res in resolution) {
+        case$FindClusters$resolution <- res
+        cached <- get_cached(case$FindClusters, paste0("FindClusters_", res), cache_dir)
+        res_key <- paste0("seurat_clusters_", res)
+        if (is.null(cached$data)) {
+            log_info("- Running FindClusters at resolution: {res} ...")
+            case$FindClusters$object <- sobj
+            sobj1 <- do_call(FindClusters, case$FindClusters)
             levels(sobj1$seurat_clusters) <- paste0("s", as.numeric(levels(sobj1$seurat_clusters)) + 1)
-            Idents(sobj1) <- "seurat_clusters"
             sobj1[[res_key]] <- sobj1$seurat_clusters
-            ident_table <- table(sobj1[[res_key]])
-            log_info("- Found {length(ident_table)} at resolution: {res}:")
-            print(ident_table)
-            cat("\n")
-            log_info("- Updating meta.data with subclusters...")
-            metadata <- sobj1@meta.data[, res_key, drop = FALSE]
-            srtobj <- AddMetaData(srtobj, metadata = metadata)
-            srtobj[[paste0("sub_umap_", res_key)]] <- sobj1@reductions$umap
+            cached$data <- sobj1@meta.data[, res_key, drop = FALSE]
+            save_to_cache(cached, paste0("FindClusters_", res), cache_dir)
+        } else {
+            log_info("- Using cached FindClusters at resolution: {res} ...")
         }
-        srtobj <- AddMetaData(srtobj, metadata = metadata, col.name = key)
-        srtobj[[paste0("sub_umap_", key)]] <- sobj1@reductions$umap
+        ident_table <- table(cached$data[[res_key]])
+        log_info("  Found {length(ident_table)} clusters")
+        print(ident_table)
+        cat("\n")
     }
+    log_info("- Updating meta.data with subclusters...")
+    srtobj <- AddMetaData(srtobj, metadata = cached$data, col.name = key)
+    srtobj[[paste0("sub_umap_", key)]] <- reduc
 }
 log_info("Saving results ...")
 saveRDS(srtobj, file = rdsfile)
-if (is.character(envs$cache) && nchar(envs$cache) > 0) {
-    log_info("Caching results to {cached_file} ...")
-    invisible(file.copy(rdsfile, cached_file, overwrite = TRUE))
-}

biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R CHANGED Viewed

@@ -54,7 +54,7 @@ do_one_comparison <- function(
     subset_prefix,
     groupname
 ) {
-    print(paste("  Design:", compname, "(", case, ",", control, ")"))
+    log_info(paste("  Design: {compname} ({case}, {control})"))
     case_code = paste0("subset(obj, subset = ", subset_col, " == '", case, "')")
     case_obj = tryCatch({
         eval(parse(text = case_code))
@@ -62,7 +62,7 @@ do_one_comparison <- function(
         NULL
     })
     if (is.null(case_obj)) {
-        print("          Skip (not enough cells in case)")
+        log_warn("          Skip (not enough cells in case)")
         return (NULL)
     }
     control_code = paste0("subset(obj, subset = ", subset_col, " == '", control, "')")
@@ -72,7 +72,7 @@ do_one_comparison <- function(
         NULL
     })
     if (is.null(control_obj)) {
-        print("          Skip (not enough cells in control)")
+        log_warn("          Skip (not enough cells in control)")
         add_report(
             list(kind = "error", content = "Not enough cells in control"),
             h1 = groupname,
@@ -86,7 +86,7 @@ do_one_comparison <- function(
     odir = file.path(groupdir, paste0(subset_prefix, compname))
     dir.create(odir, showWarnings = FALSE)
     if (ncol(exprs_case) < 3 || ncol(exprs_control) < 3) {
-        print("          Skip (not enough cells)")
+        log_warn("          Skip (not enough cells)")
         add_report(
             list(kind = "error", content = "Not enough cells"),
             h1 = groupname,
@@ -131,7 +131,7 @@ do_one_comparison <- function(
 }
 do_one_group <- function(group) {
-    print(paste("- Group:", group, "..."))
+    log_info("- Group: {group} ...")
     genes = intersect(metabolics, rownames(sobj))
     group_code = paste0(

biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R CHANGED Viewed

@@ -71,7 +71,7 @@ num_of_pathways <- function(gmtfile, overlapgenes) {
 }
 do_one_subset <- function(s, subset_col, subset_prefix) {
-    print(paste0("  Processing subset: ", s, "..."))
+    log_info("  Processing subset: {s} ...")
     if (is.null(s)) {
         subset_dir <- file.path(outdir, "ALL")
         dir.create(subset_dir, showWarnings = FALSE)
@@ -118,7 +118,7 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
     for (pi in seq_along(pathway_names)) {
         p <- pathway_names[pi]
-        print(paste0("  * Pathway (", pi, "): ", p, "..."))
+        log_info("  * Pathway ({pi}): {p} ...")
         genes <- pathways[[p]]
         genes_comm <- intersect(genes, rownames(subset_obj))
         genes_expressed <- names(rowSums(subset_obj)[rowSums(subset_obj) > 0])
@@ -312,7 +312,7 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
 }
 do_one_subset_col <- function(subset_col, subset_prefix) {
-    print(paste0("- Handling subset column: ", subset_col, " ..."))
+    log_info("- Handling subset column: {subset_col} ...")
     if (is.null(subset_col)) {
         do_one_subset(NULL, subset_col = NULL, subset_prefix = NULL)
     } else {

biopipen/scripts/tcr/Immunarch-basic.R CHANGED Viewed

@@ -2,9 +2,8 @@
 # immfile, outdir, mutaters, immdata, n_samples
 log_info("")
-log_info("#####################################")
-log_info("# Basic analysis                    #")
-log_info("#####################################")
+log_info("# Basic analysis")
+log_info("-----------------------------------")
 volumes = {{envs.volumes | r}}
 lens = {{envs.lens | r}}

biopipen/scripts/tcr/Immunarch-clonality.R CHANGED Viewed

@@ -2,9 +2,8 @@
 # immfile, outdir, mutaters, immdata, n_samples
 log_info("")
-log_info("#####################################")
-log_info("# Clonality analysis                #")
-log_info("#####################################")
+log_info("# Clonality analysis")
+log_info("-----------------------------------")
 top_clones = {{envs.top_clones | r}}
 rare_clones = {{envs.rare_clones | r}}

biopipen/scripts/tcr/Immunarch-diversity.R CHANGED Viewed

@@ -1,30 +1,34 @@
 # Diversity estimation
+source("{{biopipen_dir}}/scripts/tcr/immunarch-patched.R")
 # https://immunarch.com/articles/web_only/v6_diversity.html
 log_info("")
-log_info("#####################################")
-log_info("# Diversity estimation              #")
-log_info("#####################################")
+log_info("# Diversity estimation")
+log_info("-----------------------------------")
 # Other variables are loaded in the parent template
 # immdata is already loaded, meta is mutated
-div_method = {{envs.divs.method | r}}
-div_by = {{envs.divs.by | r}}
-div_order = {{envs.divs.order | r}}
-div_args = {{envs.divs.args | r: todot="-"}}
-div_test = {{envs.divs.test | r}}
-div_cases = {{envs.divs.cases | r: todot="-"}}
-div_devpars = {{envs.divs.devpars | r}}
-div_separate_by = {{envs.divs.separate_by | r}}
-div_split_by = {{envs.divs.split_by | r}}
-div_split_order = {{envs.divs.split_order | r}}
-div_align_x = {{envs.divs.align_x | r}}
-div_align_y = {{envs.divs.align_y | r}}
-div_subset = {{envs.divs.subset | r}}
-div_log = {{envs.divs.log | r}}
-div_ncol = {{envs.divs.ncol | r}}
-div_ymin = {{envs.divs.ymin | r}}
-div_ymax = {{envs.divs.ymax | r}}
+div_method = {{envs.divs.method | default: "gini" | r}}
+div_by = {{envs.divs.by | default: None | r}}
+div_plot_type = {{envs.divs.plot_type | default: "bar" | r}}
+div_order = {{envs.divs.order | default: [] | r}}
+div_args = {{envs.divs.args | default: {} | r: todot="-"}}
+div_test = {{envs.divs.test | default: None | r}}
+div_cases = {{envs.divs.cases | default: {} | r: todot="-"}}
+div_devpars = {{envs.divs.devpars | default: None | r}}
+div_separate_by = {{envs.divs.separate_by | default: None | r}}
+div_split_by = {{envs.divs.split_by | default: None | r}}
+div_split_order = {{envs.divs.split_order | default: None | r}}
+div_align_x = {{envs.divs.align_x | default: False | r}}
+div_align_y = {{envs.divs.align_y | default: False | r}}
+div_subset = {{envs.divs.subset | default: None | r}}
+div_log = {{envs.divs.log | default: False | r}}
+div_ncol = {{envs.divs.ncol | default: 2 | r}}
+div_ymin = {{envs.divs.ymin | default: None | r}}
+div_ymax = {{envs.divs.ymax | default: None | r}}
+div_test = div_test %||% list(method = "none", padjust = "none")
+div_devpars = div_devpars %||% list(res = 100, width = 800, height = 800)
 div_dir = file.path(outdir, "diversity")
 dir.create(div_dir, showWarnings = FALSE)
@@ -38,6 +42,7 @@ update_case = function(case, name) {
     if (!is.null(case$by) && nchar(case$by) > 0) {
         case$by = unlist(strsplit(case$by, ",")) %>% trimws()
     }
+    case$plot_type <- case$plot_type %||% div_plot_type
     case$order <- case$order %||% div_order
     case$args <- case$args %||% div_args
     for (name in names(case$args)) {
@@ -85,23 +90,6 @@ update_case = function(case, name) {
     return (case)
 }
-# See https://github.com/immunomind/immunarch/pull/341
-vis.immunr_gini <- function(.data, .by = NA, .meta = NA,
-                            .errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
-                            .points = TRUE, .test = TRUE, .signif.label.size = 3.5, ...) {
-  # repDiversity(..., .method = "gini") generates a matrix
-  .data = data.frame(Sample = rownames(.data), Value = .data[, 1])
-  vis_bar(
-    .data = .data, .by = .by, .meta = .meta,
-    .errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
-    .points = .points, .test = .test, .signif.label.size = .signif.label.size,
-    .defgroupby = "Sample", .grouping.var = "Group",
-    .labs = c(NA, "Gini coefficient"),
-    .title = "Gini coefficient", .subtitle = "Sample diversity estimation using the Gini coefficient",
-    .legend = NA, .leg.title = NA
-  )
-}
 if (is.null(div_cases) || length(div_cases) == 0) {
     if (is.null(div_method) || length(div_method) == 0 || nchar(div_method) == 0) {
         stop("No method is specified for diversity estimation")
@@ -176,6 +164,15 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
         col.names = TRUE
     )
+    .meta_vals <- function(meta, cols) {
+        if (length(cols) == 1) {
+            return (meta[[cols]])
+        }
+        vlist = lapply(cols, function(.x) meta[[.x]])
+        do.call(function(...) paste(..., sep = "; "), vlist)
+    }
     # plot
     #  by, order, separate_by, align_y
     n_seps = 1
@@ -189,11 +186,19 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
                 metas = metas[intersect(case$split_order, names(metas))]
             }
             ps = lapply(metas, function(meta) {
-                .test = length(unique(meta[[case$by]])) > 1
-                p = vis(filter_div(div, meta$Sample), .by = case$by, .meta = meta, .test = .test)
+                .test = length(unique(.meta_vals(meta, case$by))) > 1
+                p = vis(
+                    filter_div(div, meta$Sample),
+                    .by = case$by,
+                    .meta = meta,
+                    .test = .test,
+                    .plot.type = case$plot_type
+                )
                 p = p + xlab(paste0(case$separate_by, ": ", meta[[case$separate_by]][1], ")"))
                 if (!is.null(case$order) && length(case$order) > 0) {
-                    p = p + scale_x_discrete(limits = intersect(case$order, unique(meta[[case$by]])))
+                    p = p + scale_x_discrete(
+                        limits = intersect(case$order, unique(.meta_vals(meta, case$by)))
+                    )
                 }
                 if (!is.null(case$ymin) && !is.null(case$ymax)) {
                     p = p + ylim(c(case$ymin, case$ymax))
@@ -217,10 +222,18 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
             }
             .i = 0
             ps = lapply(metas, function(meta) {
-                nby = length(unique(meta[[case$by]]))
-                p = vis(filter_div(div, meta$Sample), .by = case$by, .meta = meta, .test = nby > 1)
+                nby = length(unique(.meta_vals(meta, case$by)))
+                p = vis(
+                    filter_div(div, meta$Sample),
+                    .by = case$by,
+                    .meta = meta,
+                    .test = nby > 1,
+                    .plot.type = case$plot_type
+                )
                 if (!is.null(case$order) && length(case$order) > 0) {
-                    p = p + scale_x_discrete(limits = intersect(case$order, unique(meta[[case$by]])))
+                    p = p + scale_x_discrete(
+                        limits = intersect(case$order, unique(.meta_vals(meta, case$by)))
+                    )
                 }
                 p = p + xlab(meta[[case$split_by]][1]) + theme(
                     axis.text.x = element_blank(),
@@ -253,10 +266,10 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
             plots = lapply(ps, function(x) x$p + ylim(c(ymin, ymax)))
             p = wrap_plots(plots, widths = widths, guides = "collect")
         } else {
-            .test = length(unique(d$meta[[case$by]])) > 1
-            p = vis(div, .by = case$by, .meta = d$meta, .test = .test)
+            .test = length(unique(.meta_vals(d$meta, case$by))) > 1
+            p = vis(div, .by = case$by, .meta = d$meta, .test = .test, .plot.type = case$plot_type)
             if (!is.null(case$order) && length(case$order) > 0) {
-                p = p + scale_x_discrete(limits = intersect(case$order, unique(d$meta[[case$by]])))
+                p = p + scale_x_discrete(limits = intersect(case$order, unique(.meta_vals(d$meta, case$by))))
             }
         }
     } else if (!is.null(case$separate_by)) {
@@ -333,7 +346,9 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
     } else {
         p = vis(div)
         if (!is.null(case$order) && length(case$order) > 0) {
-            p = p + scale_x_discrete(limits = intersect(case$order, unique(d$meta[[case$by]])))
+            p = p + scale_x_discrete(
+                limits = intersect(case$order, unique(.meta_vals(d$meta, case$by)))
+            )
         }
     }
@@ -351,7 +366,7 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
     }
     if (is.null(width)) {
         if (!is.null(case$by) && length(case$by) > 0) {
-            width = 200 * length(unique(d$meta[[case$by]])) + 120
+            width = 200 * length(unique(.meta_vals(d$meta, case$by))) + 120
         } else {
             width = 100 * length(unique(d$meta$Sample)) + 120
         }
@@ -400,7 +415,11 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
                         "where all values are the same (for example, where everyone has ",
                         "the same income). A Gini coefficient of one (or 100 percents ) ",
                         "expresses maximal inequality among values (for example where only ",
-                        "one person has all the income).")
+                        "one person has all the income)."),
+                    d50 = paste0(
+                        "the D50 index. ",
+                        "It is the number of types that are needed to cover 50% of the total
+                        abundance.")
                 )
             )
         ),
@@ -705,6 +724,8 @@ run_div_case = function(casename) {
             run_general(casename, d, case, ddir)
         } else if (case$method == "gini") {
             run_general(casename, d, case, ddir, "V1")
+        } else if (case$method == "d50") {
+            run_general(casename, d, case, ddir, "Clones")
         } else {
             stop(paste0("Unknown diversity method: ", case$method))
         }

biopipen/scripts/tcr/Immunarch-geneusage.R CHANGED Viewed

@@ -2,9 +2,8 @@
 # immfile, outdir, mutaters, immdata, n_samples
 log_info("")
-log_info("#####################################")
-log_info("# Gene usage analysis               #")
-log_info("#####################################")
+log_info("# Gene usage analysis")
+log_info("-----------------------------------")
 gene_usages = {{ envs.gene_usages | r: todot="-" }}

biopipen/scripts/tcr/Immunarch-kmer.R CHANGED Viewed

@@ -2,9 +2,8 @@
 # immfile, outdir, mutaters, immdata, n_samples
 log_info("")
-log_info("#####################################")
-log_info("# K-mer analysis                    #")
-log_info("#####################################")
+log_info("# K-mer analysis")
+log_info("-----------------------------------")
 kmers = {{ envs.kmers | r: todot="-" }}

biopipen/scripts/tcr/Immunarch-overlap.R CHANGED Viewed

@@ -2,9 +2,8 @@
 # immfile, outdir, mutaters, immdata, n_samples
 log_info("")
-log_info("#####################################")
-log_info("# Overlap analysis                  #")
-log_info("#####################################")
+log_info("# Overlap analysis")
+log_info("-----------------------------------")
 overlaps = {{ envs.overlaps | r: todot="-" }}

biopipen/scripts/tcr/Immunarch-spectratyping.R CHANGED Viewed

@@ -2,9 +2,8 @@
 # immfile, outdir, mutaters, immdata, n_samples
 log_info("")
-log_info("#####################################")
-log_info("# Spectratyping analysis            #")
-log_info("#####################################")
+log_info("# Spectratyping analysis")
+log_info("-----------------------------------")
 spects = {{ envs.spects | r }}

biopipen/scripts/tcr/Immunarch-tracking.R CHANGED Viewed

@@ -1,7 +1,6 @@
 log_info("")
-log_info("#####################################")
-log_info("# Clonotype tracking                #")
-log_info("#####################################")
+log_info("# Clonotype tracking")
+log_info("-----------------------------------")
 trackings = {{ envs.trackings | r }}

biopipen/scripts/tcr/Immunarch-vjjunc.R CHANGED Viewed

@@ -1,7 +1,6 @@
 log_info("")
-log_info("#####################################")
-log_info("# VJ Junction Circos Plots          #")
-log_info("#####################################")
+log_info("# VJ Junction Circos Plots")
+log_info("-----------------------------------")
 # Already required by immunarch
 library(circlize)

biopipen/scripts/tcr/Immunarch.R CHANGED Viewed

@@ -34,7 +34,7 @@ log_info("Expanding immdata ...")
 exdata = expand_immdata(immdata)
 log_info("Loading metadata if provided ...")
-if (endsWith(metafile, ".rds") || endsWith(metafile, ".RDS")) {
+if (!is.null(metafile) && (endsWith(metafile, ".rds") || endsWith(metafile, ".RDS"))) {
     meta = readRDS(metafile)@meta.data
 } else if (!is.null(metafile) && nchar(metafile) > 0) {
     meta = read.table(metafile, sep = "\t", header = TRUE, row.names = 1)

biopipen/scripts/tcr/ImmunarchLoading.R CHANGED Viewed

@@ -144,6 +144,7 @@ for (i in seq_len(nrow(metadata))) {
     # file.symlink(normalizePath(annofile), file.path(datadir, paste0(sample, ext)))
 }
+log_info("Loading TCR data ...")
 immdata = repLoad(datadir, .mode=mode)
 if (mode == "single") {
     data = immdata$data
@@ -178,6 +179,7 @@ immdata$prefix = prefix
 saveRDS(immdata, file=rdsfile)
+log_info("Saving cell-level data ...")
 exdata <- expand_immdata(immdata, cell_id = "Barcode") %>%
     distinct(Sample, Barcode, .keep_all = TRUE) %>%
     mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%

biopipen/scripts/tcr/TCRClustering.R CHANGED Viewed

@@ -3,6 +3,7 @@
 # python = Sys.which({{envs.python | r}})
 # Sys.setenv(RETICULATE_PYTHON = python)
 # library(reticulate)
+source("{{biopipen_dir}}/utils/misc.R")
 source("{{biopipen_dir}}/utils/single_cell.R")
 library(immunarch)
@@ -97,7 +98,7 @@ clean_clustcr_output = function(clustcr_outfile, clustcr_input) {
 }
 run_clustcr = function() {
-    print(paste("Using tool:", "ClusTCR"))
+    log_info("Running ClusTCR ...")
     clustcr_dir = file.path(outdir, "ClusTCR_Output")
     dir.create(clustcr_dir, showWarnings = FALSE)
     clustcr_file = prepare_clustcr(clustcr_dir)
@@ -110,6 +111,7 @@ run_clustcr = function() {
     )
     print("Running:")
     print(clustcr_cmd)
+    log_debug("- Running command: {clustcr_cmd}")
     rc = system(clustcr_cmd)
     if (rc != 0) {
         quit(status=rc)
@@ -196,7 +198,7 @@ clean_giana_output = function(giana_outfile, giana_infile) {
 }
 run_giana = function() {
-    print(paste("Using tool:", "GIANA"))
+    log_info("Running GIANA ...")
     giana_srcdir = prepare_giana()
     giana_input = prepare_input()
     giana_outdir = file.path(outdir, "GIANA_Output")
@@ -228,6 +230,7 @@ run_giana = function() {
     )
     print("Running:")
     print(giana_cmd)
+    log_debug("- Running command: {giana_cmd}")
     rc = system(giana_cmd)
     if (rc != 0) {
         quit(status=rc)
@@ -276,4 +279,5 @@ if (tolower(tool) == "clustcr") {
     stop(paste("Unknown tool:", tool))
 }
+log_info("Saving results ...")
 attach_to_immdata(out)

biopipen 0.23.7__py3-none-any.whl → 0.24.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.23.7py3-none-any.whl → 0.24.0py3-none-any.whl