PyPI - biopipen - Versions diffs - 0.34.1__py3-none-any.whl → 0.34.3__py3-none-any.whl - Mend

biopipen 0.34.1py3-none-any.whl → 0.34.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (32) hide show

biopipen/__init__.py +1 -1
biopipen/ns/scrna.py +259 -34
biopipen/ns/scrna_metabolic_landscape.py +1 -1
biopipen/ns/tcr.py +9 -4
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +12 -3
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +12 -3
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +12 -3
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +3 -10
biopipen/scripts/scrna/MarkersFinder.R +34 -28
biopipen/scripts/scrna/PseudoBulkDEG.R +592 -0
biopipen/scripts/scrna/ScFGSEA.R +35 -35
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +16 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +29 -6
biopipen/scripts/scrna/SeuratClusterStats-stats.R +29 -1
biopipen/scripts/scrna/SeuratClusterStats.R +1 -0
biopipen/scripts/scrna/TopExpressingGenes.R +6 -6
biopipen/scripts/scrna/celltypist-wrapper.py +2 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +9 -3
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +2 -2
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +1 -0
biopipen/scripts/tcr/GIANA/GIANA4.py +2 -4
biopipen/scripts/tcr/ScRepCombiningExpression.R +3 -2
biopipen/scripts/tcr/ScRepLoading.R +7 -2
biopipen/scripts/tcr/TCRClustering.R +9 -23
biopipen/scripts/tcr/TESSA.R +4 -2
{biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/METADATA +1 -1
{biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/RECORD +30 -31
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -17
biopipen/scripts/scrna/SCP-plot.R +0 -15202
{biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/WHEEL +0 -0
{biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/entry_points.txt +0 -0

biopipen/scripts/scrna/ScFGSEA.R CHANGED Viewed

@@ -7,9 +7,9 @@ srtfile <- {{in.srtobj | r}}  # nolint
 outdir <- {{out.outdir | r}}  # nolint
 joboutdir <- {{job.outdir | r}}  # nolint
 mutaters <- {{envs.mutaters | r}}  # nolint
-group.by <- {{envs["group-by"] | r}}  # nolint
-ident.1 <- {{envs["ident-1"] | r}}  # nolint
-ident.2 <- {{envs["ident-2"] | r}}  # nolint
+group_by <- {{envs.group_by | default: envs["group-by"] | default: None | r}}  # nolint
+ident_1 <- {{envs.ident_1 | default: envs["ident-1"] | default: None | r}}  # nolint
+ident_2 <- {{envs.ident_2 | default: envs["ident-2"] | default: None | r}}  # nolint
 each <- {{envs.each | r}}  # nolint
 subset <- {{envs.subset | r}}  # nolint
 gmtfile <- {{envs.gmtfile | r}}  # nolint
@@ -18,8 +18,8 @@ top <- {{envs.top | r}}  # nolint
 minsize <- {{envs.minSize | default: envs.minsize | r}}  # nolint
 maxsize <- {{envs.maxSize | default: envs.maxsize | r}}  # nolint
 eps <- {{envs.eps | r}}  # nolint
-allpathway_plots_defaults <- {{envs.allpathway_plots_defaults | r}}  # nolint
-allpathway_plots <- {{envs.allpathway_plots | r}}  #
+alleach_plots_defaults <- {{envs.alleach_plots_defaults | r}}  # nolint
+alleach_plots <- {{envs.alleach_plots | r}}  #
 ncores <- {{envs.ncores | r}}  # nolint
 rest <- {{envs.rest | r: todot="-"}}  # nolint
 cases <- {{envs.cases | r: todot="-"}}  # nolint
@@ -27,8 +27,8 @@ cases <- {{envs.cases | r: todot="-"}}  # nolint
 log <- get_logger()
 reporter <- get_reporter()
-allpathway_plots <- lapply(allpathway_plots, function(x) {
-    list_update(allpathway_plots_defaults, x)
+alleach_plots <- lapply(alleach_plots, function(x) {
+    list_update(alleach_plots_defaults, x)
 })
 log$info("Reading Seurat object ...")
@@ -43,9 +43,9 @@ if (!is.null(mutaters) && length(mutaters) > 0) {
 }
 defaults <- list(
-    group.by = group.by,
-    ident.1 = ident.1,
-    ident.2 = ident.2,
+    group_by = group_by,
+    ident_1 = ident_1,
+    ident_2 = ident_2,
     each = each,
     subset = subset,
     gmtfile = gmtfile,
@@ -54,8 +54,8 @@ defaults <- list(
     minsize = minsize,
     maxsize = maxsize,
     eps = eps,
-    allpathway_plots_defaults = allpathway_plots_defaults,
-    allpathway_plots = allpathway_plots,
+    alleach_plots_defaults = alleach_plots_defaults,
+    alleach_plots = alleach_plots,
     ncores = ncores,
     rest = rest
 )
@@ -63,11 +63,11 @@ defaults <- list(
 expand_each <- function(name, case) {
     outcases <- list()
-    case$group.by <- case$group.by %||% "Identity"
+    case$group_by <- case$group_by %||% "Identity"
     if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
-        if (length(case$allpathway_plots) > 0) {
-            stop("Cannot perform `allpathway_plots` without `each` defined.")
+        if (length(case$alleach_plots) > 0) {
+            stop("Cannot perform `alleach_plots` without `each` defined.")
         }
         outcases[[name]] <- case
@@ -93,8 +93,8 @@ expand_each <- function(name, case) {
             newcase$each_name <- case$each
             newcase$each <- each
-            newcase$allpathway_plots_defaults <- NULL
-            newcase$allpathway_plots <- NULL
+            newcase$alleach_plots_defaults <- NULL
+            newcase$alleach_plots <- NULL
             if (!is.null(case$subset)) {
                 newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
@@ -105,13 +105,13 @@ expand_each <- function(name, case) {
             outcases[[newname]] <- newcase
         }
-        if (length(case$allpathway_plots) > 0) {
+        if (length(case$alleach_plots) > 0) {
             newcase <- case
             newcase$gseas <- list()
-            newcase$allpathway_plots <- lapply(
-                newcase$allpathway_plots,
-                function(x) { list_update(newcase$allpathway_plots_defaults, x) }
+            newcase$alleach_plots <- lapply(
+                newcase$alleach_plots,
+                function(x) { list_update(newcase$alleach_plots_defaults, x) }
             )
             outcases[[paste0(name, " (all ", case$each,")")]] <- newcase
@@ -154,8 +154,8 @@ do_case <- function(name) {
         }))
         gseas[[case$each]] <- factor(gseas[[case$each]], levels = each_levels)
-        for (plotname in names(case$allpathway_plots)) {
-            plotargs <- case$allpathway_plots[[plotname]]
+        for (plotname in names(case$alleach_plots)) {
+            plotargs <- case$alleach_plots[[plotname]]
             plotargs <- extract_vars(plotargs, "devpars")
             plotargs$gsea_results <- gseas
             plotargs$group_by <- case$each
@@ -182,12 +182,12 @@ do_case <- function(name) {
     allow_empty = !is.null(case$each)
     # prepare expression matrix
     log$info("  Preparing expression matrix...")
-    sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group.by))) }, allow_empty)
+    sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group_by))) }, allow_empty)
     if (is.null(sobj)) {
         reporter$add2(
             list(
                 kind = "error",
-                content = paste0("No cells with non-NA `", case$group.by, "` in the Seurat object.")
+                content = paste0("No cells with non-NA `", case$group_by, "` in the Seurat object.")
             ),
             hs = c(info$section, info$name)
         )
@@ -200,20 +200,20 @@ do_case <- function(name) {
             reporter$add2(
                 list(
                     kind = "error",
-                    content = paste0("No cells with non-NA `", case$group.by, "` in the Seurat object.")
+                    content = paste0("No cells with non-NA `", case$group_by, "` in the Seurat object.")
                 ),
                 hs = c(info$section, info$name)
             )
             return(NULL)
         }
     }
-    if (!is.null(case$ident.2)) {
-        sobj <- ensure_sobj({ sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2)) }, allow_empty)
+    if (!is.null(case$ident_2)) {
+        sobj <- ensure_sobj({ sobj %>% filter(!!sym(case$group_by) %in% c(case$ident_1, case$ident_2)) }, allow_empty)
         if (is.null(sobj)) {
             reporter$add2(
                 list(
                     kind = "error",
-                    content = paste0("No cells with non-NA `", case$group.by, "` in the Seurat object.")
+                    content = paste0("No cells with non-NA `", case$group_by, "` in the Seurat object.")
                 ),
                 hs = c(info$section, info$name)
             )
@@ -221,16 +221,16 @@ do_case <- function(name) {
         }
     }
-    allclasses <- sobj@meta.data[, case$group.by, drop = TRUE]
-    if (is.null(case$ident.2)) {
-        case$ident.2 <- "Other"
-        allclasses[allclasses != case$ident.1] <- "Other"
+    allclasses <- sobj@meta.data[, case$group_by, drop = TRUE]
+    if (is.null(case$ident_2)) {
+        case$ident_2 <- "Other"
+        allclasses[allclasses != case$ident_1] <- "Other"
     }
     exprs <- GetAssayData(sobj, layer = "data")
     # get preranks
     log$info("  Getting preranks...")
-    ranks <- RunGSEAPreRank(exprs, allclasses, case$ident.1, case$ident.2, case$method)
+    ranks <- RunGSEAPreRank(exprs, allclasses, case$ident_1, case$ident_2, case$method)
     write.table(
         as.data.frame(ranks),
         file.path(info$prefix, "fgsea.rank.txt"),
@@ -310,7 +310,7 @@ do_case <- function(name) {
     reporter$add2(
         list(
-            name = paste0("Table (", case$ident.1, " vs ", case$ident.2, ")"),
+            name = paste0("Table (", case$ident_1, " vs ", case$ident_2, ")"),
             contents = list(
                 list(kind = "descr", content = paste0(
                     "Showing top 50 pathways by padj in descending order. ",

biopipen/scripts/scrna/SeuratClusterStats-clustree.R CHANGED Viewed

@@ -26,6 +26,22 @@ if (
     if (length(clustrees) == 0) {
         log$warn("- no case found, skipping ...")
     } else {
+        reporter$add(
+            list(
+                kind = "descr",
+                content = 'The clustree plots displays clustering results from the Seurat object across different
+                resolutions of the clustering algorithm
+                (<a target="_blank" href="https://satijalab.org/seurat/reference/findclusters">Seurat::FindClusters</a>).
+                Each node represents a cluster, with the resolution levels labeled along the vertical (y) axis.
+                The size of each node reflects the number of cells in that cluster. Edges connect clusters between
+                adjacent resolutions and indicate how cells transition between clusters as resolution increases.
+                The thickness of the edges corresponds to the proportion of shared cells (in_prop) between clusters,
+                where darker lines signify a higher overlap (up to 100%). The color of the edges indicates the actual
+                number of cells that transitioned between clusters.'
+            ),
+            h1 = "Clustree plots"
+        )
         reports <- list()
         for (name in names(clustrees)) {
             if (is.null(clustrees[[name]]$prefix)) {

biopipen/scripts/scrna/SeuratClusterStats-dimplots.R CHANGED Viewed

@@ -40,7 +40,7 @@ do_one_dimplot = function(name) {
     reporter$add(
         list(
             kind = "descr",
-            content = paste0("Dimensionality reduction plot for ", case$group.by)
+            content = paste0("Dimensionality reduction plot for ", case$group_by)
         ),
         reporter$image(prefix, "pdf", FALSE),
         h1 = name

biopipen/scripts/scrna/SeuratClusterStats-features.R CHANGED Viewed

@@ -64,11 +64,11 @@ do_one_features <- function(name) {
     log$info("- Case: {name}")
     case <- list_update(features_defaults, features[[name]])
-    case$descr <- case$descr %||% ""
     case <- extract_vars(
         case,
         "devpars", "more_formats", "save_code", "save_data", "order_by",
-        "subset", "features", "descr")
+        "subset", "features", "descr",
+        allow_nonexisting = TRUE)
     if (!is.null(subset)) {
         case$object <- srtobj %>% filter(!!parse_expr(subset))
@@ -77,6 +77,7 @@ do_one_features <- function(name) {
     }
     if (exists("order_by") && !is.null(order_by)) {
+        case$ident <- case$ident %||% GetIdentityColumn(case$object)
         if (length(order_by) < 2) {
             clusters <- case$object@meta.data %>%
                 group_by(!!sym(case$ident)) %>%
@@ -126,12 +127,34 @@ do_one_features <- function(name) {
         caching$save(info$prefix)
     }
     # add reports
-    if (!is.null(descr) && nchar(descr) > 0) {
-        reporter$add2(
-            list(kind = "descr", content = descr),
-            hs = c(info$section, info$name)
+    default_descr <- glue(
+        "The plot shows the distribution or pattern of the specified features ({paste(case$features %||% features, collapse = ', ')}) ",
+        "across cells",
+        "{if (!is.null(case$ident)) glue(', identified by \"{case$ident}\"') else ''}",
+        "{if (!is.null(case$group_by)) glue(', grouped by \"{case$group_by}\"') else ''}",
+        "{if (!is.null(case$split_by)) glue(', and split by \"{case$split_by}\"') else ''}. ",
+        "The plot type is '{case$plot_type}', ",
+        "{if (case$plot_type == 'dim') 'displaying the features on a dimensional reduction embedding' ",
+        " else if (case$plot_type == 'heatmap') 'arranged as a heatmap by rows_name and other grouping variables' ",
+        " else if (case$plot_type %in% c('violin', 'box', 'ridge')) 'showing the distribution of feature values by the grouping variables' ",
+        " else if (case$plot_type == 'cor') 'showing the correlation between features' ",
+        " else 'showing aggregated feature values by the grouping variables'}. ",
+        "{if (!is.null(case$facet_by)) glue('Plots are further faceted by \"{case$facet_by}\". ') else ''}",
+        "{if (case$plot_type == 'dim') glue('The reduction used is \"{if (!is.null(case$reduction)) case$reduction else DefaultDimReduc(case$object)}\"') else ''}",
+        "{if (case$plot_type == 'dim' && !is.null(case$graph)) glue(', with graph \"{case$graph}\" drawn to show cell neighbor edges') else ''}",
+        "{if (case$plot_type == 'dim' && !is.null(case$bg_cutoff) && case$bg_cutoff > 0) glue(', and a background cutoff of {case$bg_cutoff}') else ''}",
+        "{if (case$plot_type == 'dim') glue(', using dimensions {paste(case$dims %||% 1:2, collapse = \",\")}') else ''}"
+    )
+    if (!is.null(case$comparisons)) {
+        default_descr <- paste0(
+            default_descr,
+            "Statistical comparisons were performed between groups using '{case$pairwise_method %||% 'wilcox.test'}' method."
         )
     }
+    reporter$add2(
+        list(kind = "descr", content = descr %||% default_descr),
+        hs = c(info$section, info$name)
+    )
     if (save_data) {
         reporter$add2(

biopipen/scripts/scrna/SeuratClusterStats-stats.R CHANGED Viewed

@@ -5,17 +5,26 @@ log$info("stats:")
 odir <- file.path(outdir, "stats")
 dir.create(odir, recursive=TRUE, showWarnings=FALSE)
 do_one_stats <- function(name) {
     log$info("- Case: {name}")
     case <- list_update(stats_defaults, stats[[name]])
-    extract_vars(case, "devpars", "more_formats", "save_code", "save_data", "subset")
+    case <- extract_vars(case, "devpars", "more_formats", "save_code", "save_data", "subset", "descr")
     if (!is.null(subset)) {
         case$object <- srtobj %>% filter(!!parse_expr(subset))
     } else {
         case$object <- srtobj
     }
+    ident <- case$ident %||% GetIdentityColumn(case$object)
+    groupings <- unique(c(case$group_by, case$rows_by, case$columns_by, case$pie_group_by, ident))
+    if (length(groupings) > 0) {
+        for (g in groupings) {
+            case$object <- filter(case$object, !is.na(!!sym(g)))
+        }
+    }
     info <- case_info(name, odir, is_dir = FALSE, create = TRUE)
     p <- do_call(gglogger::register(CellStatPlot), case)
@@ -27,6 +36,20 @@ do_one_stats <- function(name) {
             auto_data_setup = FALSE)
     }
+    frac <- case$frac %||% "none"
+    default_descr <- glue(
+        "The {case$plot_type} plot shows the distribution of cells across categories defined by '{ident}'",
+        "{if (!is.null(case$group_by)) glue(', grouped by {case$group_by}') else ''}",
+        "{if (!is.null(case$split_by)) glue(', and split by {case$split_by}') else ''}. ",
+        "The values represent ",
+        "{if (frac == 'none') 'the number of cells' else glue('the fraction of cells calculated by \"{frac}\"')}. "
+    )
+    if (!is.null(case$comparisons)) {
+        default_descr <- paste0(
+            default_descr,
+            "Statistical comparisons were performed between groups using '{case$pairwise_method %||% 'wilcox.test'}' method."
+        )
+    }
     if (save_data) {
         pdata <- attr(p, "data") %||% p$data
         if (!inherits(pdata, "data.frame") && !inherits(pdata, "matrix")) {
@@ -37,6 +60,10 @@ do_one_stats <- function(name) {
             list(
                 name = "Plot",
                 contents = list(
+                    list(
+                        kind = "descr",
+                        content = case$descr %||% default_descr
+                    ),
                     reporter$image(
                         info$prefix, more_formats, save_code, kind = "image")
                 )
@@ -60,6 +87,7 @@ do_one_stats <- function(name) {
         )
     } else {
         reporter$add2(
+            list(kind = "descr", content = case$descr %||% default_descr),
             reporter$image(info$prefix, more_formats, save_code, kind = "image"),
             hs = c(info$section, info$name)
         )

biopipen/scripts/scrna/SeuratClusterStats.R CHANGED Viewed

@@ -3,6 +3,7 @@ library(rlang)
 library(dplyr)
 library(tidyr)
 library(tibble)
+library(glue)
 library(forcats)
 library(tidyseurat)
 library(gglogger)

biopipen/scripts/scrna/TopExpressingGenes.R CHANGED Viewed

@@ -9,7 +9,7 @@ outdir <- {{out.outdir | r}}
 joboutdir <- {{job.outdir | r}}
 mutaters <- {{ envs.mutaters | r }}
 ident <- {{ envs.ident | r }}
-group.by <- {{ envs["group-by"] | r }}  # nolint
+group_by <- {{ envs.group_by | default: envs["group-by"] | default: None | r }}  # nolint
 each <- {{ envs.each | r }}
 dbs <- {{ envs.dbs | r }}
 n <- {{ envs.n | r }}
@@ -41,7 +41,7 @@ enrich_plots <- lapply(enrich_plots, function(x) {
 })
 defaults <- list(
     ident = ident,
-    group.by = group.by,
+    group_by = group_by,
     each = each,
     dbs = dbs,
     n = n,
@@ -171,17 +171,17 @@ run_case <- function(name) {
     } else {
         subobj <- srtobj
     }
-    case$group.by <- case$group.by %||% "Identity"
+    case$group_by <- case$group_by %||% "Identity"
     if (is.null(case$ident)) {
-        case$ident <- as.character(unique(subobj@meta.data[[case$group.by]]))
+        case$ident <- as.character(unique(subobj@meta.data[[case$group_by]]))
     }
     avgexpr <- AverageExpression(
         subobj,
-        group.by = case$group.by,
+        group_by = case$group_by,
         assays = assay
     )[[assay]]
     # https://github.com/satijalab/seurat/issues/7893
-    colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group.by]]))
+    colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group_by]]))
     avgexpr <- avgexpr[, case$ident, drop = FALSE]
     for (idt in case$ident) {

biopipen/scripts/scrna/celltypist-wrapper.py CHANGED Viewed

@@ -29,6 +29,8 @@ if __name__ == "__main__":
         raise ValueError(
             f"Over clustering column '{over_clustering}' not found in AnnData object."
         )
+    if 'neighbors' in adata.uns and 'params' in adata.uns['neighbors']:
+        adata.uns['neighbors']['params'].setdefault('n_neighbors', 15)
     annotated = celltypist.annotate(
         adata,

biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R CHANGED Viewed

@@ -98,7 +98,13 @@ do_comparison <- function(object, caseinfo, subset_by, subset_val, group_by, gro
     }
     classes <- as.character(object@meta.data[[group_by]])
-    classes[classes != group1] <- "_REST"
+    if (!group1 %in% classes) {
+        stop("Group '", group1, "' not found in '", group_by, "' column of the Seurat object.")
+    }
+    if (!is.null(group2) && !group2 %in% classes) {
+        stop("Group '", group2, "' not found in '", group_by, "' column of the Seurat object.")
+    }
+    classes[classes != group1] <- "Other"
     if (any(table(classes) < 5)) {
         msg <- paste0(
             "  ! skipped. Group has less than 5 cells: ",
@@ -266,8 +272,8 @@ do_subset <- function(object, caseinfo, subset_by, subset_val, group_by, compari
             rbind, lapply(
                 as.character(comparisons),
                 function(comparison) {
-                    if (grepl(",", comparison)) {
-                        group1 <- trimws(unlist(strsplit(comparison, ",")))
+                    if (grepl(":", comparison)) {
+                        group1 <- trimws(unlist(strsplit(comparison, ":")))
                         group2 <- group1[2]
                         group1 <- group1[1]
                     } else {

biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R CHANGED Viewed

@@ -315,8 +315,8 @@ do_subset <- function(
             plotargs$keep_empty <- TRUE
             p <- do_call(plotfn, plotargs)
-            devpars$width <- devpars$width %||% (attr(p, "width") * devpars$res) %||% 1000
-            devpars$height <- devpars$height %||% (attr(p, "height") * devpars$res) %||% 1000
+            devpars$width <- devpars$width %||% (attr(p, "width") * 2 * devpars$res) %||% 1000
+            devpars$height <- devpars$height %||% (attr(p, "height") * 2 * devpars$res) %||% 1000
         } else {  # heatmap
             minval <- min(dat)
             maxval <- max(dat)

biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R CHANGED Viewed

@@ -195,6 +195,7 @@ do_subset <- function(object, caseinfo, subset_by, subset_val, group_by, plots,
         plotprefix <- file.path(odir, slugify(plot))
         plotargs$devpars$width <- plotargs$devpars$width %||% (attr(p, "width") * plotargs$devpars$res) %||% 800
         plotargs$devpars$height <- plotargs$devpars$height %||% (attr(p, "height") * plotargs$devpars$res) %||% 600
+        plotargs$devpars$height <- max(plotargs$devpars$height, plotargs$devpars$width / 1.5)
         png(
             filename = paste0(plotprefix, ".png"),
             width = plotargs$devpars$width,

biopipen/scripts/tcr/GIANA/GIANA4.py CHANGED Viewed

@@ -36,9 +36,6 @@ from sklearn.manifold import MDS
 import faiss
 from query import *
 try:
-    from Bio.SubsMat.MatrixInfo import blosum62
-    print(blosum62)
-except ModuleNotFoundError:
     from Bio.Align import substitution_matrices
     blosum62 = substitution_matrices.load("BLOSUM62")
     _tmp = {}
@@ -46,7 +43,8 @@ except ModuleNotFoundError:
         for ab2 in blosum62.alphabet:
             _tmp[(ab1, ab2)] = int(blosum62[(ab1, ab2)])
     blosum62 = _tmp
-    print(blosum62)
+except ModuleNotFoundError:
+    from Bio.SubsMat.MatrixInfo import blosum62
 AAstring = "ACDEFGHIKLMNPQRSTVWY"
 AAstringList = list(AAstring)

biopipen/scripts/tcr/ScRepCombiningExpression.R CHANGED Viewed

@@ -7,7 +7,7 @@ srtobjfile <- {{in.srtobj | r}}
 outfile <- {{out.outfile | r}}
 cloneCall <- {{envs.cloneCall | r}}
 chain <- {{envs.chain | r}}
-group.by <- {{envs["group-by"] | r}}
+group_by <- {{envs.group_by | default: envs["group-by"] | default: None | r}}
 proportion <- {{envs.proportion | r}}
 filterNA <- {{envs.filterNA | r}}
 cloneSize <- {{envs.cloneSize | r}}
@@ -28,12 +28,13 @@ obj <- combineExpression(
     sc.data = srtobj,
     cloneCall = cloneCall,
     chain = chain,
-    group.by = group.by,
+    group.by = group_by,
     proportion = proportion,
     filterNA = filterNA,
     cloneSize = unlist(cloneSize),
     addLabel = addLabel
 )
+obj$TCR_Presence <- !is.na(obj$CTaa)
 log$info("Saving combined object ...")
 save_obj(obj, outfile)

biopipen/scripts/tcr/ScRepLoading.R CHANGED Viewed

@@ -118,8 +118,13 @@ load_contig <- function(input, sample, fmt) {
     fmt <- dirfmt[[2]]
     if (is.null(dir)) { return(NULL) }
     x <- loadContigs(dir, format = fmt %||% "10X")
-    x[[1]]$sample <- NULL
-    x[[1]]
+    x <- x[[1]]
+    x$sample <- NULL
+    if (identical(fmt %||% "10X", "10X") && colnames(x)[1] == "X") {
+        x$X <- NULL
+    }
+    x
 }

biopipen/scripts/tcr/TCRClustering.R CHANGED Viewed

@@ -130,11 +130,10 @@ output.clusters_df.to_csv(clustcr_dir + "/clusters.txt", sep="\t", index=False)
     clustcr_file
 }
-clean_clustcr_output = function(clustcr_outfile, clustcr_input) {
+clean_clustcr_output = function(clustcr_outfile) {
     clustcr_out = read.delim2(clustcr_outfile, header=TRUE, row.names = NULL)
     colnames(clustcr_out) = c("CDR3.aa", "TCR_Cluster")
-    in_cdr3 = read.delim2(clustcr_input, header=TRUE, row.names = NULL)
-    out = left_join(in_cdr3, distinct(clustcr_out), by=c("CDR3.aa")) %>%
+    out = left_join(cdr3aa_df, distinct(clustcr_out), by=c(cdr3seq4clustering = "CDR3.aa")) %>%
         mutate(
             TCR_Cluster = if_else(
                 is.na(TCR_Cluster),
@@ -170,7 +169,7 @@ run_clustcr = function() {
         quit(status=rc)
     }
     clustcr_outfile = file.path(clustcr_dir, "clusters.txt")
-    clean_clustcr_output(clustcr_outfile, clustcr_input)
+    clean_clustcr_output(clustcr_outfile)
 }
 prepare_giana = function() {
@@ -193,21 +192,8 @@ prepare_giana = function() {
 }
 prepare_input = function() {
-    # prepare input file for GIANA
-    cdr3 = c()
-    # cdr3col = if (!on_multi) "cdr3" else "CDR3.aa"
-    cdr3col = "CDR3.aa"
-    for (sample in names(seqdata)) {
-        sdata = seqdata[[sample]]
-        if (on_multi) {
-            sdata[[cdr3col]] = sub(";", "", sdata[[cdr3col]])
-        } else if ("chain" %in% colnames(sdata)) {
-            sdata = sdata %>% separate_rows(chain, cdr3col, sep = ";") %>%
-                filter(chain == "TRB")
-        }
-        cdr3 = union(cdr3, unique(sdata[[cdr3col]]))
-    }
-    cdr3 = unique(cdr3)
+    cdr3aa_df$cdr3seq4clustering <<- gsub("[^A-Z]", "", cdr3aa_df$CDR3.aa)  # Remove non-amino acid characters
+    cdr3 <- unique(cdr3aa_df$cdr3seq4clustering)
     # cdr3 = distinct(cdr3, aminoAcid, vMaxResolved)
@@ -220,15 +206,14 @@ prepare_input = function() {
     cdr3file
 }
-clean_giana_output = function(giana_outfile, giana_infile) {
+clean_giana_output = function(giana_outfile) {
     # generate an output file with columns:
     # CDR3.aa, TCR_Cluster, V.name, Sample
     # If sequence doesn't exist in the input file,
     # Then a unique cluster id is assigned to it.
     giana_out = read.delim2(giana_outfile, header=FALSE, comment.char = "#", row.names = NULL)[, 1:2, drop=FALSE]
     colnames(giana_out) = c("CDR3.aa", "TCR_Cluster")
-    in_cdr3 = read.delim2(giana_infile, header=TRUE, row.names = NULL)
-    out = left_join(in_cdr3, distinct(giana_out), by=c("CDR3.aa")) %>%
+    out = left_join(cdr3aa_df, distinct(giana_out), by=c(cdr3seq4clustering = "CDR3.aa")) %>%
         mutate(
             TCR_Cluster = if_else(
                 is.na(TCR_Cluster),
@@ -283,10 +268,11 @@ run_giana = function() {
         quit(status=rc)
     }
     giana_outfile = file.path(giana_outdir, "cdr3--RotationEncodingBL62.txt")
-    clean_giana_output(giana_outfile, giana_input)
+    clean_giana_output(giana_outfile)
 }
 attach_to_obj = function(obj, out) {
+    out <- as.data.frame(out)
     rownames(out) <- out$Barcode
     if (is_seurat) {
         # Attach results to Seurat object

biopipen/scripts/tcr/TESSA.R CHANGED Viewed

@@ -39,9 +39,11 @@ log$info("Preparing TCR input file ...")
 # If immfile endswith .rds, then it is an immunarch object
 tcrdata <- sobj@meta.data %>%
     rownames_to_column("contig_id") %>%
+    select(contig_id, CTaa, CTgene, sample = Sample) %>%
     filter(!is.na(CTaa) & !is.na(CTgene)) %>%
-    separate(CTaa, into = c(NA, "cdr3"), sep = "_", remove = FALSE) %>%
-    separate(CTgene, into = c(NA, "vjgene"), sep = "_", remove = FALSE) %>%
+    separate(CTaa, into = c(NA, "cdr3"), sep = "_", remove = TRUE) %>%
+    filter(!is.na(cdr3) & cdr3 != "NA" & cdr3 != "nan") %>%
+    separate(CTgene, into = c(NA, "vjgene"), sep = "_", remove = TRUE) %>%
     separate(vjgene, into = c("v_gene", NA, "j_gene", NA), sep = "\\.", remove = TRUE) %>%
     mutate(v_gene = sub("-\\d+$", "", v_gene), j_gene = sub("-\\d+$", "", j_gene))

{biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: biopipen
-Version: 0.34.1
+Version: 0.34.3
 Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
 License: MIT
 Author: pwwang

biopipen 0.34.1__py3-none-any.whl → 0.34.3__py3-none-any.whl

Potentially problematic release.

biopipen 0.34.1py3-none-any.whl → 0.34.3py3-none-any.whl