PyPI - biopipen - Versions diffs - 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl - Mend

biopipen 0.33.1py3-none-any.whl → 0.34.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +10 -183
biopipen/core/proc.py +5 -3
biopipen/core/testing.py +8 -1
biopipen/ns/bam.py +40 -4
biopipen/ns/cnv.py +1 -1
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/delim.py +1 -1
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +38 -0
biopipen/ns/plot.py +8 -0
biopipen/ns/scrna.py +290 -288
biopipen/ns/scrna_metabolic_landscape.py +207 -366
biopipen/ns/tcr.py +165 -97
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
biopipen/reports/snp/PlinkCallRate.svelte +2 -2
biopipen/reports/snp/PlinkFreq.svelte +1 -1
biopipen/reports/snp/PlinkHWE.svelte +1 -1
biopipen/reports/snp/PlinkHet.svelte +1 -1
biopipen/reports/snp/PlinkIBD.svelte +1 -1
biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
biopipen/scripts/bam/CNAClinic.R +41 -6
biopipen/scripts/bam/CNVpytor.py +2 -1
biopipen/scripts/bam/ControlFREEC.py +2 -3
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/cnv/AneuploidyScore.R +25 -13
biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
biopipen/scripts/cnv/TMADScore.R +4 -4
biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +4 -1
biopipen/scripts/gene/GeneNameConversion.R +14 -12
biopipen/scripts/gsea/Enrichr.R +2 -2
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/PreRank.R +3 -3
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/plot/VennDiagram.R +2 -2
biopipen/scripts/protein/ProdigySummary.R +34 -27
biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
biopipen/scripts/regulatory/motifs-common.R +10 -9
biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
biopipen/scripts/rnaseq/Simulation.R +0 -2
biopipen/scripts/rnaseq/UnitConversion.R +6 -5
biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
biopipen/scripts/scrna/CellCellCommunication.py +1 -1
biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
biopipen/scripts/scrna/CellsDistribution.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
biopipen/scripts/scrna/MarkersFinder.R +348 -217
biopipen/scripts/scrna/MetaMarkers.R +3 -3
biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
biopipen/scripts/scrna/RadarPlots.R +1 -1
biopipen/scripts/scrna/ScFGSEA.R +157 -75
biopipen/scripts/scrna/ScSimulation.R +11 -10
biopipen/scripts/scrna/ScVelo.py +605 -0
biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
biopipen/scripts/scrna/SeuratClustering.R +31 -48
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
biopipen/scripts/scrna/SeuratPreparing.R +76 -24
biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
biopipen/scripts/snp/MatrixEQTL.R +39 -20
biopipen/scripts/snp/PlinkCallRate.R +43 -34
biopipen/scripts/snp/PlinkFreq.R +34 -41
biopipen/scripts/snp/PlinkHWE.R +23 -18
biopipen/scripts/snp/PlinkHet.R +26 -22
biopipen/scripts/snp/PlinkIBD.R +30 -34
biopipen/scripts/stats/ChowTest.R +9 -8
biopipen/scripts/stats/DiffCoexpr.R +13 -11
biopipen/scripts/stats/LiquidAssoc.R +7 -8
biopipen/scripts/stats/Mediation.R +8 -8
biopipen/scripts/stats/MetaPvalue.R +11 -13
biopipen/scripts/stats/MetaPvalue1.R +6 -5
biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
biopipen/scripts/tcr/ClonalStats.R +5 -4
biopipen/scripts/tcr/CloneResidency.R +3 -3
biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
biopipen/scripts/tcr/ScRepLoading.R +114 -92
biopipen/scripts/tcr/TCRClusterStats.R +2 -2
biopipen/scripts/tcr/TCRClustering.R +86 -97
biopipen/scripts/tcr/TESSA.R +65 -115
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
biopipen/utils/common_docstrs.py +66 -63
biopipen/utils/reporter.py +177 -0
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
biopipen/utils/caching.R +0 -44
biopipen/utils/gene.R +0 -95
biopipen/utils/gsea.R +0 -329
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -602
biopipen/utils/mutate_helpers.R +0 -581
biopipen/utils/plot.R +0 -209
biopipen/utils/repr.R +0 -146
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -207
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0

biopipen/scripts/tcr/ScRepCombiningExpression.R ADDED Viewed

@@ -0,0 +1,39 @@
+library(scRepertoire)
+library(Seurat)
+library(biopipen.utils)
+screpfile <- {{in.screpfile | r}}
+srtobjfile <- {{in.srtobj | r}}
+outfile <- {{out.outfile | r}}
+cloneCall <- {{envs.cloneCall | r}}
+chain <- {{envs.chain | r}}
+group.by <- {{envs["group-by"] | r}}
+proportion <- {{envs.proportion | r}}
+filterNA <- {{envs.filterNA | r}}
+cloneSize <- {{envs.cloneSize | r}}
+addLabel <- {{envs.addLabel | r}}
+log <- get_logger()
+log$info("Loading scRepertoire object ...")
+screp <- read_obj(screpfile)
+log$info("Loading Seurat object ...")
+srtobj <- read_obj(srtobjfile)
+log$info("Combining expression data ...")
+obj <- combineExpression(
+    input.data = screp,
+    sc.data = srtobj,
+    cloneCall = cloneCall,
+    chain = chain,
+    group.by = group.by,
+    proportion = proportion,
+    filterNA = filterNA,
+    cloneSize = unlist(cloneSize),
+    addLabel = addLabel
+)
+log$info("Saving combined object ...")
+save_obj(obj, outfile)

biopipen/scripts/tcr/ScRepLoading.R CHANGED Viewed

@@ -1,127 +1,149 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
 library(rlang)
 library(bracer)
 library(scRepertoire)
+library(biopipen.utils)
-metafile <- {{in.metafile | quote}}
-outfile <- {{out.outfile | quote}}
+metafile <- {{in.metafile | r}}
+outfile <- {{out.outfile | r}}
 combineTCR_args <- {{envs.combineTCR | r}}
+combineBCR_args <- {{envs.combineBCR | r}}
+type <- {{envs.type | r}}
 exclude <- {{envs.exclude | r}}
+format <- {{envs.format | r}}
+tmpdir <- {{envs.tmpdir | r}}
+type = toupper(type)
 if (length(exclude) == 1) {
     exclude <- strsplit(exclude, ",")[[1]]
 }
-log_info("Loading metadata ...")
+log <- get_logger()
+log$info("Loading metadata ...")
 metadata <- read.table(metafile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
+data_column <- ifelse(type == "TCR", "TCRData", "BCRData")
+combine_fn <- ifelse(type == "TCR", combineTCR, combineBCR)
+combine_args <- if (type == "TCR") { combineTCR_args } else { combineBCR_args }
 stopifnot("Error: Column `Sample` is not found in metafile." = "Sample" %in% colnames(metadata))
-stopifnot("Error: Column `TCRData` is not found in metafile." = "TCRData" %in% colnames(metadata))
+if (!data_column %in% colnames(metadata)) {
+    stop(paste0("Error: Column `", data_column, "` is not found in metafile."))
+}
 rownames(metadata) <- metadata$Sample
+.gunzip <- function(input, output) {
+    # Open connections
+    con_in <- gzfile(input, "rt")   # "rt" = read text mode
+    con_out <- file(output, "wt")  # "wt" = write text mode
+    # Read line by line and write
+    while(length(line <- readLines(con_in, n = 10, warn = FALSE)) > 0) {
+        writeLines(line, con_out)
+    }
+    # Close connections
+    close(con_in)
+    close(con_out)
+}
+get_file_name <- function(fmt) {
+    if (is.null(fmt)) { return("filtered_contig_annotations.csv") }
+    fmt <- tolower(fmt)
+    if (fmt == "10x") { return("filtered_contig_annotations.csv") }
+    if (fmt == "airr") { return("airr_rearrangement.tsv") }
+    if (fmt == "bd") { return("Contigs_AIRR.tsv") }
+    if (fmt == "dandelion") { return("all_contig_dandelion.tsv") }
+    if (fmt == "immcantation") { return("data.tsv") }
+    if (fmt == "json") { return("contigs.json") }
+    if (fmt == "parsebio") { return("barcode_report.tsv") }
+    if (fmt == "mixcr") { return("clones.tsv") }
+    if (fmt == "omniscope") { return("contigs.csv") }
+    if (fmt == "trust4") { return("barcode_report.tsv") }
+    if (fmt == "wat3r") { return("barcode_results.csv") }
+    stop("Unsupported format: ", fmt)
+}
+get_format <- function(filename) {
+    if (identical(filename, "filtered_contig_annotations.csv")) { return("10X") }
+    if (identical(filename, "airr_rearrangement.tsv")) { return("AIRR") }
+    if (identical(filename, "Contigs_AIRR.tsv")) { return("BD") }
+    if (identical(filename, "all_contig_dandelion.tsv")) { return("Dandelion") }
+    if (identical(filename, "data.tsv")) { return("Immcantation") }
+    if (endsWith(filename, ".json")) { return("JSON") }
+    if (identical(filename, "barcode_report.tsv")) { return("ParseBio") }
+    if (identical(filename, "clones.tsv")) { return("MiXCR") }
+    if (identical(filename, "contigs.csv")) { return("Omniscope") }
+    # if (identical(filename, "barcode_report.tsv")) { return("TRUST4") }
+    if (identical(filename, "barcode_results.csv")) { return("WAT3R") }
+    return("10X")
+}
 # helper function
-get_contig_annofile <- function(dir, sample, warn = TRUE) {
-    if (is.na(dir) || !is.character(dir) || nchar(dir) == 0 || dir == "NA") {
+get_contig_dir <- function(input, sample, fmt) {
+    if (is.na(input) || !is.character(input) || nchar(input) == 0 || input == "NA") {
         warning(paste0("No path found for sample: ", sample), immediate. = TRUE)
-        return (NULL)
+        return(list(NULL, fmt))
     }
-    if (file.exists(dir) && !dir.exists(dir)) {
-        return(dir)
+    if (!file.exists(input)) {
+        stop(paste0("Input path does not exist for sample: ", sample, ": ", input))
     }
-    annofilepat <- paste0("*", "{all,filtered}", "_contig_annotations.csv*")  # .gz
-    annofiles <- glob(file.path(as.character(dir), annofilepat))
-    if (length(annofiles) == 0) {
-        stop(
-            "Cannot find neither `filtered_contig_annotations.csv[.gz]` nor",
-            "`all_contig_annotations.csv[.gz]` in given TCRData for sample: ",
-            sample
-        )
+    if (dir.exists(input)) {
+        return(list(input, fmt))
     }
-    if (length(annofiles) > 1) {
-        if (warn) {
-            warning("Found more than one file in given TCRData for sample: ", sample, immediate. = TRUE)
-        }
-        for (annofile in annofiles) {
-            # use filtered if both filtered_ and all_ are found
-            if (grepl("filtered", annofile)) {
-                annofiles <- annofile
-                break
-            }
-            # give a warning if only all_ is found
-            if (warn) {
-                warning("Using all_contig_annotations as filtred_config_annotations not found ",
-                        "in given TCRData for sample: ", sample,
-                        immediate. = TRUE
-                )
-            }
-        }
+    # file
+    filedir <- file.path(tmpdir, slugify(sample))
+    dir.create(filedir, recursive = TRUE, showWarnings = FALSE)
+    # if it is gzipped
+    if (grepl("\\.gz$", input)) {
+        flatfile <- file.path(filedir, sub("\\.gz$", "", basename(input)))
+        .gunzip(input, flatfile)
+        input <- flatfile
     }
-    annofiles[1]
+    fmt <- fmt %||% get_format(basename(input))
+    filename <- get_file_name(fmt)
+    file.symlink(input, file.path(filedir, filename))
+    return(list(filedir, fmt))
 }
-# for (i in seq_len(nrow(metadata))) {
-#     sample <- as.character(metadata$Sample[i])
-#     annofile <- get_contig_annofile(metadata$TCRData[i], sample)
-#     if (is.null(annofile)) { next }
-#     anno <- read.delim2(annofile, sep = ",", header = TRUE, stringsAsFactors = FALSE)
-#     # Add cdr1, cdr2, fwr1, fwr2, etc columns
-#     anno$cdr1 <- anno$cdr1 %||% ""
-#     anno$cdr1_nt <- anno$cdr1_nt %||% ""
-#     anno$cdr2 <- anno$cdr2 %||% ""
-#     anno$cdr2_nt <- anno$cdr2_nt %||% ""
-#     anno$fwr1 <- anno$fwr1 %||% ""
-#     anno$fwr1_nt <- anno$fwr1_nt %||% ""
-#     anno$fwr2 <- anno$fwr2 %||% ""
-#     anno$fwr2_nt <- anno$fwr2_nt %||% ""
-#     anno$fwr3 <- anno$fwr3 %||% ""
-#     anno$fwr3_nt <- anno$fwr3_nt %||% ""
-#     anno$fwr4 <- anno$fwr4 %||% ""
-#     anno$fwr4_nt <- anno$fwr4_nt %||% ""
-#     annotfile = file.path(datadir, paste0(sample, ".csv"))
-#     write.table(anno, annotfile, sep = ",", quote = FALSE, row.names = FALSE, col.names = TRUE)
-# }
-log_info("Reading TCR data ...")
+load_contig <- function(input, sample, fmt) {
+    log$info("- Sample: {sample}")
+    dirfmt <- get_contig_dir(input, sample, fmt)
+    dir <- dirfmt[[1]]
+    fmt <- dirfmt[[2]]
+    if (is.null(dir)) { return(NULL) }
+    x <- loadContigs(dir, format = fmt %||% "10X")
+    x[[1]]$sample <- NULL
+    x[[1]]
+}
+log$info("Reading {type} data ...")
 contig_list <- lapply(seq_len(nrow(metadata)), function(i) {
     sample <- as.character(metadata$Sample[i])
-    annofile <- get_contig_annofile(metadata$TCRData[i], sample)
-    if (is.null(annofile)) { return (NULL) }
-    log_info("- Sample: {sample} ...")
-    anno <- read.delim2(annofile, sep = ",", header = TRUE, stringsAsFactors = FALSE)
-    # Add cdr1, cdr2, fwr1, fwr2, etc columns for compatibility
-    anno$cdr1 <- anno$cdr1 %||% ""
-    anno$cdr1_nt <- anno$cdr1_nt %||% ""
-    anno$cdr2 <- anno$cdr2 %||% ""
-    anno$cdr2_nt <- anno$cdr2_nt %||% ""
-    anno$fwr1 <- anno$fwr1 %||% ""
-    anno$fwr1_nt <- anno$fwr1_nt %||% ""
-    anno$fwr2 <- anno$fwr2 %||% ""
-    anno$fwr2_nt <- anno$fwr2_nt %||% ""
-    anno$fwr3 <- anno$fwr3 %||% ""
-    anno$fwr3_nt <- anno$fwr3_nt %||% ""
-    anno$fwr4 <- anno$fwr4 %||% ""
-    anno$fwr4_nt <- anno$fwr4_nt %||% ""
-    anno
+    path <- metadata[[data_column]][i]
+    load_contig(path, sample, fmt = format)
 })
 names(contig_list) <- as.character(metadata$Sample)
 contig_list <- contig_list[!sapply(contig_list, is.null)]
-log_info("Combining TCR data and adding meta data ...")
-if (isTRUE(combineTCR_args$samples)) {
-    combineTCR_args$samples <- names(contig_list)
+log$info("Combining {type} data and adding meta data ...")
+if (isTRUE(combine_args$samples)) {
+    combine_args$samples <- names(contig_list)
 }
-combineTCR_args$input.data <- contig_list
-screp_data <- do_call(combineTCR, combineTCR_args)
+combine_args$input.data <- contig_list
+screp_data <- do_call(combine_fn, combine_args)
 for (col in colnames(metadata)) {
     if (col %in% exclude) { next }
     screp_data <- addVariable(screp_data, col, metadata[names(screp_data), col])
 }
-rm(contig_list, combineTCR_args)
+rm(contig_list, combine_args)
-log_info("Saving TCR data ...")
-saveRDS(screp_data, outfile)
+log$info("Saving {type} data ...")
+save_obj(screp_data, outfile)

biopipen/scripts/tcr/TCRClusterStats.R CHANGED Viewed

@@ -7,8 +7,8 @@ library(rlang)
 library(immunarch)
 library(ggprism)
-immfile = {{in.immfile | quote}}
-outdir = {{out.outdir | quote}}
+immfile = {{in.immfile | r}}
+outdir = {{out.outdir | r}}
 cluster_size_envs = {{envs.cluster_size | r}}
 shared_clusters_envs = {{envs.shared_clusters | r}}
 sample_diversity_envs = {{envs.sample_diversity | r}}

biopipen/scripts/tcr/TCRClustering.R CHANGED Viewed

@@ -1,49 +1,67 @@
-# # https://stackoverflow.com/questions/50145643/unable-to-change-python-path-in-reticulate
-# python = Sys.which({{envs.python | r}})
-# Sys.setenv(RETICULATE_PYTHON = python)
-# library(reticulate)
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
-{{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
-library(immunarch)
 library(dplyr)
 library(tidyr)
 library(tibble)
 library(glue)
+library(biopipen.utils)
+screpfile <- {{in.screpfile | r}}
+outdir <- normalizePath({{job.outdir | r}})
+outfile <- {{out.outfile | r}}
-immfile = {{in.immfile | r}}
-outdir = normalizePath({{job.outdir | r}})
-outfile = {{out.immfile | r}}
-clusterfile = {{out.clusterfile | r}}
-tool = {{envs.tool | r}}
-python = {{envs.python | r}}
-on_multi = {{envs.on_multi | r}}
-args = {{envs.args | r}}
-prefix = {{envs.prefix | r}}
+tool <- {{envs.tool | r}}
+python <- {{envs.python | r}}
+within_sample <- {{envs.within_sample | r}}
+args <- {{envs.args | r}}
+chain <- {{envs.chain | r}}
 setwd(outdir)
-immdata = readRDS(immfile)
-if (on_multi) {
-    seqdata = immdata$multi
-} else {
-    seqdata = immdata$data
-}
-if (is.null(prefix)) { prefix = immdata$prefix }
-if (is.null(prefix)) { prefix = "" }
+log <- get_logger()
+log$info("Reading input file ...")
+obj <- read_obj(screpfile)
+is_seurat <- inherits(obj, "Seurat")
 get_cdr3aa_df = function() {
-    out = expand_immdata(immdata, cell_id = "Barcode") %>%
-        mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
-    if (on_multi) {
-        out$CDR3.aa = sub(";", "", out$CDR3.aa)
-    } else if ("chain" %in% colnames(out)) {
-        out = out %>% separate_rows(chain, CDR3.aa, sep = ";") %>%
-            filter(chain == "TRB")
+    if (!is_seurat) {
+        out <- NULL
+        for (sample in names(obj)) {
+            df <- data.frame(
+                Sample = sample,
+                Barcode = obj[[sample]]$barcode
+            )
+            if (chain == "both") {
+                df$CDR3.aa <- obj[[sample]]$CTaa
+            } else if (chain == "alpha") {
+                df$CDR3.aa <- obj[[sample]]$cdr3_aa1
+            } else if (chain == "beta") {
+                df$CDR3.aa <- obj[[sample]]$cdr3_aa2
+            }
+            out <- rbind(out, df)
+        }
+    } else {
+        out <- obj@meta.data
+        out$Barcode <- rownames(out)
+        out <- out %>% filter(!is.na(CTaa))
+        if (grepl("_", out$CTaa[1])) {
+            if (chain == "both") {
+                out$CDR3.aa <- out$CTaa
+            } else {
+                out <- separate(out, CTaa, into = c("alpha.aa", "beta.aa"), sep = "_")
+                if (chain == "alpha") {
+                    out$CDR3.aa <- out$alpha.aa
+                } else if (chain == "beta") {
+                    out$CDR3.aa <- out$beta.aa
+                }
+            }
+        } else {
+            out$CDR3.aa <- out$CTaa
+        }
+        out <- select(out, Sample, Barcode, CDR3.aa)
     }
-    out %>% select(Barcode, CDR3.aa)
+    # Sample, Barcode, CDR3.aa
+    out
 }
 cdr3aa_df = get_cdr3aa_df()
@@ -124,24 +142,16 @@ clean_clustcr_output = function(clustcr_outfile, clustcr_input) {
                 paste0("M_", as.character(TCR_Cluster))
             )
         )
-    out = left_join(
-        cdr3aa_df,
-        out,
-        by = "CDR3.aa"
-    )
-    df = out %>%
-        select(Barcode, TCR_Cluster) %>%
-        add_count(TCR_Cluster, name="TCR_Cluster_Size") %>%
-        distinct(Barcode, .keep_all = TRUE) %>%
-        add_count(TCR_Cluster, name="TCR_Cluster_Size1") %>%
-        column_to_rownames("Barcode")
-    write.table(df, clusterfile, row.names=T, col.names=T, quote=F, sep="\t")
-    out
+    if (within_sample) {
+        out <- mutate(out, TCR_Cluster = paste0(Sample, ".", TCR_Cluster))
+    }
+    left_join(cdr3aa_df, out, by = "CDR3.aa")
 }
 run_clustcr = function() {
-    log_info("Running ClusTCR ...")
+    log$info("Running ClusTCR ...")
     clustcr_dir = file.path(outdir, "ClusTCR_Output")
     dir.create(clustcr_dir, showWarnings = FALSE)
     clustcr_file = prepare_clustcr(clustcr_dir)
@@ -154,7 +164,7 @@ run_clustcr = function() {
     )
     print("Running:")
     print(clustcr_cmd)
-    log_debug("- Running command: {clustcr_cmd}")
+    log$debug("- Running command: {clustcr_cmd}")
     rc = system(clustcr_cmd)
     if (rc != 0) {
         quit(status=rc)
@@ -164,7 +174,8 @@ run_clustcr = function() {
 }
 prepare_giana = function() {
-    giana_srcdir = "{{biopipen_dir}}/scripts/tcr/GIANA"
+    biopipen_dir <- get_biopipen_dir(python)
+    giana_srcdir = file.path(biopipen_dir, "scripts", "tcr", "GIANA")
     # # The source code of GIANA is downloaded now to giana_srcdir
     # giana_file = file.path(giana_srcdir, "GIANA.py")
@@ -226,24 +237,15 @@ clean_giana_output = function(giana_outfile, giana_infile) {
             )
         )
-    out = left_join(
-        cdr3aa_df,
-        out,
-        by = "CDR3.aa"
-    )
-    df = out %>%
-        select(Barcode, TCR_Cluster) %>%
-        add_count(TCR_Cluster, name="TCR_Cluster_Size") %>%
-        distinct(Barcode, .keep_all = TRUE) %>%
-        add_count(TCR_Cluster, name="TCR_Cluster_Size1") %>%
-        column_to_rownames("Barcode")
-    write.table(df, clusterfile, row.names=T, col.names=T, quote=F, sep="\t")
-    out
+    if (within_sample) {
+        out <- mutate(out, TCR_Cluster = paste0(Sample, ".", TCR_Cluster))
+    }
+    left_join(cdr3aa_df, out, by = "CDR3.aa")
 }
 run_giana = function() {
-    log_info("Running GIANA ...")
+    log$info("Running GIANA ...")
     giana_srcdir = prepare_giana()
     giana_input = prepare_input()
     giana_outdir = file.path(outdir, "GIANA_Output")
@@ -275,7 +277,7 @@ run_giana = function() {
     )
     print("Running:")
     print(giana_cmd)
-    log_debug("- Running command: {giana_cmd}")
+    log$debug("- Running command: {giana_cmd}")
     rc = system(giana_cmd)
     if (rc != 0) {
         quit(status=rc)
@@ -284,35 +286,19 @@ run_giana = function() {
     clean_giana_output(giana_outfile, giana_input)
 }
-attach_to_immdata = function(out) {
-    seqdata2 = list()
-    # by = if (!on_multi) c(cdr3 = "CDR3.aa") else "CDR3.aa"
-    by = "CDR3.aa"
-    for (sample in names(seqdata)) {
-        sample_out = left_join(seqdata[[sample]], out, by=by)
-        seqdata2[[sample]] = sample_out
-        if (!on_multi) {
-            immdata$data[[sample]] = immdata$data[[sample]] %>% left_join(
-                out, by = "CDR3.aa"
-            )
-        } else {
-            immdata$multi[[sample]] = immdata$multi[[sample]] %>% left_join(
-                out, by = c(cdr3 = "CDR3.aa")
-            )
-        }
-        # if ("single" %in% names(immdata)) {
-        #     immdata$data[[sample]] = immdata$data[[sample]] %>% left_join(
-        #         out, by = "CDR3.aa"
-        #     )
-        # }
-    }
-    if (!on_multi) {
-        immdata$data = seqdata2
+attach_to_obj = function(obj, out) {
+    rownames(out) <- out$Barcode
+    if (is_seurat) {
+        # Attach results to Seurat object
+        obj@meta.data$TCR_Cluster <- out[rownames(obj@meta.data), "TCR_Cluster"]
     } else {
-        immdata$multi = seqdata2
+        # Attach results to the list of data frames
+        for (sample in names(obj)) {
+            sout <- filter(out, Sample == sample)
+            obj[[sample]]$TCR_Cluster <- sout[obj[[sample]]$barcode, "TCR_Cluster"]
+        }
     }
-    saveRDS(immdata, file = outfile)
-    # seqdata2
+    obj
 }
@@ -324,5 +310,8 @@ if (tolower(tool) == "clustcr") {
     stop(paste("Unknown tool:", tool))
 }
-log_info("Saving results ...")
-attach_to_immdata(out)
+log$info("Attaching results to the input object ...")
+out <- attach_to_obj(obj, out)
+log$info("Saving results ...")
+save_obj(out, outfile)

biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.33.1py3-none-any.whl → 0.34.0py3-none-any.whl