PyPI - biopipen - Versions diffs - 0.31.7__py3-none-any.whl → 0.32.1__py3-none-any.whl - Mend

biopipen 0.31.7py3-none-any.whl → 0.32.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (23) hide show

biopipen/__init__.py +1 -1
biopipen/ns/scrna.py +153 -0
biopipen/reports/scrna/CellCellCommunicationPlots.svelte +14 -0
biopipen/reports/scrna/SeuratMap2Ref.svelte +10 -6
biopipen/reports/scrna/TopExpressingGenes.svelte +1 -1
biopipen/scripts/scrna/AnnData2Seurat.R +22 -14
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +101 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +191 -0
biopipen/scripts/scrna/ScFGSEA.R +1 -1
biopipen/scripts/scrna/Seurat2AnnData.R +2 -42
biopipen/scripts/scrna/SeuratClusterStats-features.R +1 -1
biopipen/scripts/scrna/SeuratMap2Ref.R +20 -1
biopipen/scripts/scrna/SeuratPreparing-common.R +6 -6
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1364 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/TCRClustering.R +25 -4
biopipen/utils/single_cell.R +92 -1
{biopipen-0.31.7.dist-info → biopipen-0.32.1.dist-info}/METADATA +2 -1
{biopipen-0.31.7.dist-info → biopipen-0.32.1.dist-info}/RECORD +23 -19
{biopipen-0.31.7.dist-info → biopipen-0.32.1.dist-info}/WHEEL +1 -1
{biopipen-0.31.7.dist-info → biopipen-0.32.1.dist-info}/entry_points.txt +0 -0

biopipen/scripts/scrna/CellCellCommunicationPlots.R ADDED Viewed

@@ -0,0 +1,191 @@
+{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
+library(rlang)
+library(dplyr)
+library(ggplot2)
+library(CCPlotR)
+{{ biopipen_dir | joinpaths: "scripts", "scrna", "CCPlotR-patch.R" | source_r }}
+cccfile <- {{ in.cccfile | r }}
+expfile <- {{ in.expfile | r }}
+outdir <- {{ out.outdir | r }}
+joboutdir <- {{ job.outdir | r }}
+score_col <- {{ envs.score_col | r }}
+subset <- {{ envs.subset | r }}
+cases <- {{ envs.cases | r }}
+ccc <- read.table(cccfile, header=TRUE, sep="\t", check.names = FALSE)
+if (!is.null(subset)) {
+    ccc <- ccc %>% dplyr::filter(!!parse_expr(subset))
+}
+if (ncol(ccc) > 10) {
+    # from CellCellCommunication
+    if (!is.null(expfile)) {
+        log_warn("in.cccfile is from CellCellCommunication, in.expfile will be ignored")
+    }
+    if (is.null(score_col)) {
+        stop("'envs.score_col' is required for CellCellCommunication output")
+    }
+    if (!score_col %in% colnames(ccc)) {
+        stop(paste("Score column", score_col, "not found in the in.cccfile"))
+    }
+    # compose the expression data frame
+    exp <- data.frame(
+        cell_type = c(ccc$source, ccc$target),
+        gene = c(ccc$ligand, ccc$receptor),
+        mean_exp = c(ccc$ligand_trimean, ccc$receptor_trimean)
+    ) %>% distinct()
+    ccc <- ccc %>% select(
+        source, target,
+        ligand, receptor,
+        !!sym(score_col)
+    ) %>% rename(score = !!sym(score_col))
+} else {
+    if (!is.null(expfile)) {
+        exp <- read.table(expfile, header=TRUE, sep="\t", check.names = FALSE)
+    }
+}
+if (length(cases) == 0) {
+    stop("No cases provided.")
+}
+.get_default_devpars <- function(kind, nrows, ncols = NULL) {
+    if (kind == "arrow") {
+        list(
+            res = 100,
+            width = 600,
+            height = 50 + nrows * 20
+        )
+    } else if (kind == "circos") {
+        list(
+            res = 100,
+            width = 800,
+            height = 800
+        )
+    } else if (kind == "dotplot") {
+        list(
+            res = 100,
+            width = 120 + ncols * 60,
+            height = 300 + nrows * 40
+        )
+    } else if (kind == "heatmap") {
+        list(
+            res = 100,
+            width = 120 + ncols * 60,
+            height = 300 + ncols * 40
+        )
+    } else if (kind == "network") {
+        list(
+            res = 100,
+            width = 1200,
+            height = 1200
+        )
+    } else if (kind == "sigmoid") {
+        list(
+            res = 100,
+            width = max(800, ncols * 200),
+            height = 100 + nrows * 60
+        )
+    }
+}
+images <- lapply(names(cases), function(name) {
+    log_info("- Case: ", name, " ...")
+    case <- cases[[name]]
+    kind <- match.arg(case$kind, c("arrow", "circos", "dotplot", "heatmap", "network", "sigmoid"))
+    fun <- get(paste0("cc_", kind))
+    case$kind <- NULL
+    gg <- NULL
+    if (kind == "arrow") {
+        cell_types <- case$cell_types
+        if (is.null(cell_types) || length(cell_types) != 2) {
+            stop("'case.cell_types' is required and must be a vector of length 2")
+        }
+        n_ligand <- length(unique(ccc[ccc$source == cell_types[1], "ligand"]))
+        n_receptor <- length(unique(ccc[ccc$target == cell_types[2], "receptor"]))
+        default_devpars <- .get_default_devpars(kind, nrows = max(n_ligand, n_receptor))
+    } else if (kind == "circos") {
+        nrows <- length(unique(c(ccc$source, ccc$target)))
+        default_devpars <- .get_default_devpars(kind, nrows = nrows)
+    } else if (kind == "dotplot" || kind == "heatmap") {
+        nrows <- length(unique(ccc$source))
+        ncols <- length(unique(ccc$target))
+        default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
+        if (
+            (kind == "heatmap" && (is.null(case$option) || case$option != "B")) ||
+            (kind == "dotplot" && (is.null(case$option) || case$option != "B"))) {
+            gg <- theme(axis.text.x = element_text(angle = 90, hjust = 1))
+        }
+    } else if (kind == "network") {
+        nrows <- length(unique(c(ccc$source, ccc$target)))
+        ncols <- length(unique(c(ccc$ligand, ccc$receptor)))
+        default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
+        gg <- theme(plot.margin = margin(c(50, 50, 50, 50), "pt"))
+    } else if (kind == "sigmoid") {
+        nrows <- (case$n_top_ints %||% 20) / 2  # approx
+        ncols <- length(unique(c(ccc$source, ccc$target))) / 2
+        default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
+    }
+    devpars <- case$devpars %||% default_devpars
+    devpars$res <- devpars$res %||% default_devpars$res
+    devpars$width <- devpars$width %||% default_devpars$width
+    devpars$height <- devpars$height %||% default_devpars$height
+    case$devpars <- NULL
+    section <- case$section
+    case$section <- NULL
+    case$cc_df <- ccc
+    if ("exp_df" %in% names(formals(fun))) {
+        case$exp_df <- exp
+    }
+    outpath <- file.path(outdir, paste0(slugify(name), ".png"))
+    png(outpath, width=devpars$width, height=devpars$height, res=devpars$res)
+    p <- do_call(fun, case)
+    if (!is.null(gg)) { p <- p + gg }
+    print(p)
+    dev.off()
+    list(
+        section = section,
+        kind = "table_image",
+        src = outpath,
+        name = name
+    )
+})
+section_images = list()
+for (image in images) {
+    section <- image$section
+    image$section <- NULL
+    if (is.null(section)) {
+        section = "DEFAULT"
+    }
+    if (!section %in% names(section_images)) {
+        section_images[[section]] = list()
+    }
+    section_images[[section]][[length(section_images[[section]]) + 1]] = image
+}
+if (length(section_images) == 1 && names(section_images)[1] == "DEFAULT") {
+    add_report(
+        section_images,
+        h1 = "Cell-Cell Communication Plots",
+        ui = "table_of_images"
+    )
+} else {
+    for (section in names(section_images)) {
+        imgplots = section_images[[section]]
+        add_report(
+            list(
+                ui = "table_of_images",
+                contents = imgplots
+            ),
+            h1 = section
+        )
+    }
+}
+save_report(joboutdir)

biopipen/scripts/scrna/ScFGSEA.R CHANGED Viewed

@@ -180,7 +180,7 @@ do_case <- function(name, case) {
     case$rest$maxSize <- case$maxsize
     case$rest$eps <- case$eps
     case$rest$nproc <- case$ncores
-    runFGSEA(ranks, gmtfile, case$top, info$casedir, case$rest)
+    runFGSEA(ranks, case$gmtfile, case$top, info$casedir, case$rest)
     add_report(
         list(kind = "fgsea", dir = info$casedir),

biopipen/scripts/scrna/Seurat2AnnData.R CHANGED Viewed

@@ -1,48 +1,8 @@
 {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
-library(rlang)
-library(Seurat)
-library(SeuratDisk)
+{{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
 sobjfile <- {{in.sobjfile | r}}
 outfile <- {{out.outfile | r}}
-outdir <- dirname(outfile)
 assay <- {{envs.assay | r}}
-if (endsWith(sobjfile, ".rds") || endsWith(sobjfile, ".RDS")) {
-    assay_name <- ifelse(is.null(assay), "", paste0("_", assay))
-    h5seurat_file <- file.path(
-        outdir,
-        paste0(tools::file_path_sans_ext(basename(outfile)), assay_name, ".h5seurat")
-    )
-    if (file.exists(h5seurat_file) &&
-        (file.mtime(h5seurat_file) < file.mtime(sobjfile))) {
-        file.remove(h5seurat_file)
-    }
-    if (!file.exists(h5seurat_file)) {
-        log_info("Reading RDS file ...")
-        sobj <- readRDS(sobjfile)
-        assay <- assay %||% DefaultAssay(sobj)
-        # In order to convert to h5ad
-        # https://github.com/satijalab/seurat/issues/8220#issuecomment-1871874649
-        sobj$RNAv3 <- as(object = sobj[[assay]], Class = "Assay")
-        DefaultAssay(sobj) <- "RNAv3"
-        sobj$RNA <- NULL
-        sobj <- RenameAssays(sobj, RNAv3 = "RNA")
-        log_info("Saving to H5Seurat file ...")
-        SaveH5Seurat(sobj, h5seurat_file)
-        rm(sobj)
-        sobjfile <- h5seurat_file
-    } else {
-        log_info("Using existing H5Seurat file ...")
-    }
-}
-if (!endsWith(sobjfile, ".h5seurat")) {
-    stop(paste0("Unknown input file format: ",
-        tools::file_ext(sobjfile),
-        ". Supported formats: .rds, .RDS, .h5seurat"))
-}
-Convert(sobjfile, dest = outfile, assay = assay %||% "RNA", overwrite = TRUE)
+seurat_to_anndata(sobjfile, outfile, assay, log_info)

biopipen/scripts/scrna/SeuratClusterStats-features.R CHANGED Viewed

@@ -414,7 +414,7 @@ do_one_features = function(name) {
             p = p + eval(parse(text = pls))
         }
     }
-    figfile = file.path(odir, paste0(slugify(name), ".", slugify(case$kind), ".png"))
+    figfile = file.path(odir, paste0(slugify(name), ".", slugify(kind), ".png"))
     png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
     tryCatch({
         print(p)

biopipen/scripts/scrna/SeuratMap2Ref.R CHANGED Viewed

@@ -5,6 +5,7 @@ library(Seurat)
 library(SeuratDisk)
 library(rlang)
 library(dplyr)
+library(tidyr)
 set.seed(8525)
@@ -377,7 +378,25 @@ for (qname in names(mapquery_args$refdata)) {
         repel = TRUE,
     ) + NoLegend()
-    png(file.path(outdir, paste0("UMAPs.png")), width = 1400, height = 700, res = 100)
+    png(file.path(outdir, paste0("UMAPs-", slugify(qname), ".png")), width = 1500, height = 700, res = 100)
     print(ref_p | query_p)
     dev.off()
+    # summarize the stats
+    log_info("  Summarizing stats: {qname} -> {rname}")
+    ref_stats <- as.data.frame(table(reference@meta.data[[rname]]))
+    colnames(ref_stats) <- c("CellType", "Count_Ref")
+    query_stats <- as.data.frame(table(sobj@meta.data[[paste0("predicted.", qname)]]))
+    colnames(query_stats) <- c("CellType", "Count_Query")
+    stats <- left_join(ref_stats, query_stats, by = "CellType") %>%
+        replace_na(list(Count_Query = 0)) %>%
+        arrange(desc(Count_Query), desc(Count_Ref))
+    write.table(
+        stats,
+        file = file.path(outdir, paste0("stats-", slugify(qname), ".txt")),
+        row.names = FALSE,
+        quote = FALSE,
+        sep = "\t"
+    )
 }

biopipen/scripts/scrna/SeuratPreparing-common.R CHANGED Viewed

@@ -35,10 +35,10 @@ rename_files = function(e, sample, path) {
 perform_cell_qc <- function(sobj, per_sample = FALSE) {
     log_prefix <- ifelse(per_sample, "  ", "- ")
     log_info("{log_prefix}Adding metadata for QC ...")
-    sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-")
-    sobj$percent.ribo <- PercentageFeatureSet(sobj, pattern = "^RP[SL]")
-    sobj$percent.hb <- PercentageFeatureSet(sobj, pattern = "^HB[^(P)]")
-    sobj$percent.plat <- PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
+    sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-|^Mt-|^mt-")
+    sobj$percent.ribo <- PercentageFeatureSet(sobj, pattern = "^RP[SL]|^Rp[sl]")
+    sobj$percent.hb <- PercentageFeatureSet(sobj, pattern = "^HB[^P]|^Hb[^p]")
+    sobj$percent.plat <- PercentageFeatureSet(sobj, pattern = "PECAM1|PF4|Pecam1|Pf4")
     if (is.null(envs$cell_qc) || length(envs$cell_qc) == 0) {
         log_warn("{log_prefix}No cell QC criteria is provided. All cells will be kept.")
@@ -210,7 +210,7 @@ load_sample = function(sample) {
     if (isTRUE(envs$cell_qc_per_sample)) {
         log_info("- Perform cell QC for sample: {sample} ...")
-        obj = perform_cell_qc(obj, TRUE)
+        obj = perform_cell_qc(obj, per_sample = TRUE)
     }
     if (isTRUE(envs$use_sct)) {
@@ -287,7 +287,7 @@ run_cell_qc <- function(sobj) {
         if (!envs$cell_qc_per_sample) {
             log_info("Performing cell QC ...")
-            sobj = perform_cell_qc(sobj)
+            sobj = perform_cell_qc(sobj, per_sample = FALSE)
         }
         cached$data <- list(sobj = sobj, cell_qc_df = cell_qc_df)

biopipen 0.31.7__py3-none-any.whl → 0.32.1__py3-none-any.whl

Potentially problematic release.

biopipen 0.31.7py3-none-any.whl → 0.32.1py3-none-any.whl