PyPI - biopipen - Versions diffs - 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl - Mend

biopipen 0.33.1py3-none-any.whl → 0.34.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +10 -183
biopipen/core/proc.py +5 -3
biopipen/core/testing.py +8 -1
biopipen/ns/bam.py +40 -4
biopipen/ns/cnv.py +1 -1
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/delim.py +1 -1
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +38 -0
biopipen/ns/plot.py +8 -0
biopipen/ns/scrna.py +328 -292
biopipen/ns/scrna_metabolic_landscape.py +207 -366
biopipen/ns/tcr.py +165 -97
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
biopipen/reports/snp/PlinkCallRate.svelte +2 -2
biopipen/reports/snp/PlinkFreq.svelte +1 -1
biopipen/reports/snp/PlinkHWE.svelte +1 -1
biopipen/reports/snp/PlinkHet.svelte +1 -1
biopipen/reports/snp/PlinkIBD.svelte +1 -1
biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
biopipen/scripts/bam/CNAClinic.R +41 -6
biopipen/scripts/bam/CNVpytor.py +2 -1
biopipen/scripts/bam/ControlFREEC.py +2 -3
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/cnv/AneuploidyScore.R +25 -13
biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
biopipen/scripts/cnv/TMADScore.R +4 -4
biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +4 -1
biopipen/scripts/gene/GeneNameConversion.R +14 -12
biopipen/scripts/gsea/Enrichr.R +2 -2
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/PreRank.R +3 -3
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/plot/VennDiagram.R +2 -2
biopipen/scripts/protein/ProdigySummary.R +34 -27
biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
biopipen/scripts/regulatory/motifs-common.R +10 -9
biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
biopipen/scripts/rnaseq/Simulation.R +0 -2
biopipen/scripts/rnaseq/UnitConversion.R +6 -5
biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
biopipen/scripts/scrna/CellCellCommunication.py +1 -1
biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
biopipen/scripts/scrna/CellsDistribution.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
biopipen/scripts/scrna/MarkersFinder.R +481 -215
biopipen/scripts/scrna/MetaMarkers.R +3 -3
biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
biopipen/scripts/scrna/RadarPlots.R +1 -1
biopipen/scripts/scrna/ScFGSEA.R +231 -76
biopipen/scripts/scrna/ScSimulation.R +11 -10
biopipen/scripts/scrna/ScVelo.py +605 -0
biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
biopipen/scripts/scrna/SeuratClustering.R +31 -48
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
biopipen/scripts/scrna/SeuratPreparing.R +76 -24
biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
biopipen/scripts/snp/MatrixEQTL.R +39 -20
biopipen/scripts/snp/PlinkCallRate.R +43 -34
biopipen/scripts/snp/PlinkFreq.R +34 -41
biopipen/scripts/snp/PlinkHWE.R +23 -18
biopipen/scripts/snp/PlinkHet.R +26 -22
biopipen/scripts/snp/PlinkIBD.R +30 -34
biopipen/scripts/stats/ChowTest.R +9 -8
biopipen/scripts/stats/DiffCoexpr.R +13 -11
biopipen/scripts/stats/LiquidAssoc.R +7 -8
biopipen/scripts/stats/Mediation.R +8 -8
biopipen/scripts/stats/MetaPvalue.R +11 -13
biopipen/scripts/stats/MetaPvalue1.R +6 -5
biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
biopipen/scripts/tcr/ClonalStats.R +6 -5
biopipen/scripts/tcr/CloneResidency.R +3 -3
biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
biopipen/scripts/tcr/ScRepLoading.R +114 -92
biopipen/scripts/tcr/TCRClusterStats.R +2 -2
biopipen/scripts/tcr/TCRClustering.R +86 -97
biopipen/scripts/tcr/TESSA.R +65 -115
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
biopipen/utils/common_docstrs.py +66 -63
biopipen/utils/reporter.py +177 -0
{biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
{biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
{biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
biopipen/reports/scrna/ScFGSEA.svelte +0 -16
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
biopipen/utils/caching.R +0 -44
biopipen/utils/gene.R +0 -95
biopipen/utils/gsea.R +0 -329
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -602
biopipen/utils/mutate_helpers.R +0 -581
biopipen/utils/plot.R +0 -209
biopipen/utils/repr.R +0 -146
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -207
{biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0

biopipen/scripts/stats/Mediation.R CHANGED Viewed

@@ -1,8 +1,7 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
 library(rlang)
 library(parallel)
 library(mediation)
+library(biopipen.utils)
 infile <- {{in.infile | r}}
 fmlfile <- {{in.fmlfile | r}}
@@ -16,15 +15,16 @@ cases <- {{envs.cases | r}}
 transpose_input <- {{envs.transpose_input | r}}
 set.seed(123)
+log <- get_logger()
-log_info("Reading input file ...")
+log$info("Reading input file ...")
 indata <- read.table(infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
 if (transpose_input) { indata <- t(indata) }
-log_info("Reading formula file/cases ...")
+log$info("Reading formula file/cases ...")
 if (!is.null(fmlfile)) {
     if (!is.null(cases) && length(cases) > 0) {
-        log_warn("envs.cases ignored as in.fmlfile is provided")
+        log$warn("envs.cases ignored as in.fmlfile is provided")
     }
     fmldata <- read.table(fmlfile, header = TRUE, sep = "\t", row.names = NULL)
     # Case   M   Y   X   Cov     Model_M    Model_Y
@@ -39,14 +39,14 @@ medanalysis <- function(i, total) {
     casename <- names(cases)[i]
     case <- cases[[casename]]
     if (total < 50) {
-        log_info("- Case: ", casename)
+        log$info("- Case: ", casename)
     } else if (total < 500) {
         if (i %% 10 == 0) {
-            log_info("- Processing case {i}/{total} ...")
+            log$info("- Processing case {i}/{total} ...")
         }
     } else {
         if (i %% 100 == 0) {
-            log_info("- Processing case {i}/{total} ...")
+            log$info("- Processing case {i}/{total} ...")
         }
     }
     M <- case$M

biopipen/scripts/stats/MetaPvalue.R CHANGED Viewed

@@ -1,10 +1,9 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
 library(metap)
 library(rlang)
 library(dplyr)
+library(biopipen.utils)
-infiles <- {{in.infiles | r}}
+infiles <- {{in.infiles | each: str | r}}
 outfile <- {{out.outfile | r}}
 id_cols <- {{envs.id_cols | r}}
 id_exprs <- {{envs.id_exprs | r}}
@@ -16,11 +15,13 @@ padj <- {{envs.padj | r}}
 if (method == "fisher") { method = "sumlog" }
+log <- get_logger()
 if (length(infiles) == 1 && padj == "none") {
-    log_info("Only one input file, copying to output ...")
+    log$info("Only one input file, copying to output ...")
     file.copy(infiles, outfile)
 } else if (length(infiles) == 1) {
-    log_info("Only one input file, performing p-value adjustment ...")
+    log$info("Only one input file, performing p-value adjustment ...")
     if (is.null(pval_cols)) {
         stop("Must provide envs.pval_cols")
     }
@@ -30,7 +31,7 @@ if (length(infiles) == 1 && padj == "none") {
     }
     indata$Padj <- p.adjust(indata[, pval_cols], method = padj)
-    log_info("Writing output ...")
+    log$info("Writing output ...")
     write.table(indata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
 } else {
     # Check pval_cols
@@ -68,7 +69,7 @@ if (length(infiles) == 1 && padj == "none") {
         }
     }
-    log_info("Reading and preparing data ...")
+    log$info("Reading and preparing data ...")
     outdata <- NULL
     for (i in seq_along(infiles)) {
         infile <- infiles[i]
@@ -89,7 +90,7 @@ if (length(infiles) == 1 && padj == "none") {
         }
     }
-    log_info("Running metap on each row ...")
+    log$info("Running metap on each row ...")
     metaps <- c()
     ns <- c()
     pval_columns <- setdiff(colnames(outdata), id_cols)
@@ -119,14 +120,11 @@ if (length(infiles) == 1 && padj == "none") {
     outdata <- outdata %>% arrange(MetaPval)
     if (padj != "none") {
-        log_info("Calculating adjusted p-values ...")
+        log$info("Calculating adjusted p-values ...")
         outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
     }
-    log_info("Writing output ...")
+    log$info("Writing output ...")
     write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
 }

biopipen/scripts/stats/MetaPvalue1.R CHANGED Viewed

@@ -1,8 +1,7 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
 library(metap)
 library(rlang)
 library(dplyr)
+library(biopipen.utils)
 infile <- {{in.infile | r}}
 outfile <- {{out.outfile | r}}
@@ -13,6 +12,8 @@ na <- {{envs.na | r}}
 keep_single <- {{envs.keep_single | r}}
 padj <- {{envs.padj | r}}
+log <- get_logger()
 if (method == "fisher") { method = "sumlog" }
 # Check pval_cols
@@ -24,7 +25,7 @@ if (length(id_cols) == 1) {
     id_cols <- trimws(strsplit(id_cols, ",")[[1]])
 }
-log_info("Reading input and performing meta-analysis ...")
+log$info("Reading input and performing meta-analysis ...")
 outdata <- read.table(
         infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE
     ) %>%
@@ -64,10 +65,10 @@ outdata$.pvals <- NULL
 outdata <- outdata %>% arrange(MetaPval)
 if (padj != "none") {
-    log_info("Calculating adjusted p-values ...")
+    log$info("Calculating adjusted p-values ...")
     outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
 }
-log_info("Writing output ...")
+log$info("Writing output ...")
 write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)

biopipen/scripts/tcr/CDR3AAPhyschem.R CHANGED Viewed

@@ -1,35 +1,45 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
+library(rlang)
 library(dplyr)
 library(tidyr)
 library(tibble)
-library(ggplot2)
-library(ggridges)
 library(glue)
 library(hash)
 library(glmnet)
 library(broom.mixed)
 library(stringr)
+library(plotthis)
+library(biopipen.utils)
-immdatafile = {{in.immdata | quote}}
-srtobjfile = {{in.srtobj | r}}
-outdir = {{out.outdir | quote}}
-joboutdir = {{job.outdir | quote}}
-group_name = {{envs.group | r}}
-comparison = {{envs.comparison | r}}
-prefix = {{envs.prefix | r}}
-target = {{envs.target | r}}
-subset_cols = {{envs.subset | r}}
+scrfile <- {{in.scrfile | r}}
+outdir <- {{out.outdir | r}}
+joboutdir <- {{job.outdir | r}}
+group_name <- {{envs.group | r}}
+comparison <- {{envs.comparison | r}}
+target <- {{envs.target | r}}
+each_cols <- {{envs.each | r}}
+log <- get_logger()
+reporter <- get_reporter()
 if (is.null(group_name) || is.null(comparison)) {
     stop("envs.group and envs.comparison must be specified")
 }
-if (is.null(target)) {
-    stop("envs.target must be specified, which should be one of the keys in `envs.comparison`")
+if (length(comparison) != 2) {
+    stop("envs.comparison must have exactly two elements or keys, representing the two groups to compare")
+}
+if (!is.list(comparison)) {
+    comparison <- stats::setNames(as.list(comparison), comparison)
+}
+target <- target %||% names(comparison)[1]
+if (!(target %in% names(comparison))) {
+    stop(paste0("Target group '", target, "' not found in the comparison groups."))
 }
-if (is.character(subset_cols) && length(subset_cols) == 1) {
-    subset_cols = trimws(strsplit(subset_cols, ",")[[1]])
+if (is.character(each_cols) && length(each_cols) == 1) {
+    each_cols = trimws(strsplit(each_cols, ",")[[1]])
 }
 ### Helpers
@@ -142,103 +152,43 @@ for (i in 1:3){
   AA_MAPS[[i]] <- create_hashmap(as.character(RF$AA), as.vector(RF[,(i+1),drop=TRUE]))
 }
-# Loading metadata from srtobjfile
-log_info("Loading metadata from srtobjfile")
-if (is.null(srtobjfile)) {
-    metadata = NULL
-} else {
-    # Get the extension (lowercase) of srtobjfile, see if it is .rds file
-    srtobjfile_ext = tolower(tools::file_ext(srtobjfile))
-    if (srtobjfile_ext != "rds") {
-        metadata = read.table(
-            srtobjfile,
-            sep = "\t",
-            header = TRUE,
-            row.names = 1,
-            stringsAsFactors = FALSE,
-            check.names = FALSE,
-        )
-    } else {
-        metadata = readRDS(srtobjfile)@meta.data
-    }
-}
-log_info("Loading immdata from immdatafile")
-immdata = readRDS(immdatafile)
+log$info("Loading data from input file")
+mdata <- read_obj(scrfile)@meta.data
+if (!group_name %in% colnames(mdata)) {
+    stop(paste0("Group name '", group_name, "' not found in the data."))
+}
-merge_data = function(sam) {
-    # Merge the data for one sample from immdata and metadata
-    out = immdata$data[[sam]]
-    if ("chain" %in% colnames(out)) {
-        out = out %>% separate_rows(chain, CDR3.aa, V.name, J.name, sep = ";") %>%
-            filter(chain == "TRB")
-    }
-    out = out %>%
-        mutate(
-            Sample = sam,
-            locus = "TCRB",
-            sequence = CDR3.aa,
-            length = nchar(sequence),
-            vgene = V.name,
-            jgene = J.name,
-        ) %>%
-        select(Sample, Barcode, locus, sequence, length, vgene, jgene) %>%
-        separate_longer_delim(Barcode, delim = ";") %>%
-        left_join(immdata$meta, by = "Sample")
-    if (is.null(metadata)) {
-        # No metadata, just return
-        return (out)
-    }
+# check if valuess of comparison is in the group_name column
+if (!all(unlist(comparison) %in% as.character(mdata[[group_name]]))) {
+    stop(paste0("Some values in comparison are not found in the group_name column: ",
+                paste(setdiff(unlist(comparison), mdata[[group_name]]), collapse = ", ")))
+}
-    # Merge with metadata
-    sdata = metadata %>% filter(Sample == sam)
-    if (!is.null(prefix) && nchar(prefix) > 0) {
-        # Replace the placeholder like {Sample} with the data in other columns
-        # in the same row
-        sdata = sdata %>% mutate(.prefix_len = nchar(glue(prefix)))
-        # Remove the prefix in the rownames of sdata
-        rownames(sdata) = substring(rownames(sdata), sdata$.prefix_len + 1)
-        sdata = sdata %>% select(-.prefix_len)
-    }
-    sdata = rownames_to_column(sdata, "Barcode")
-    out = out %>% left_join(sdata, by = "Barcode", suffix = c("", "_seurat"))
-    out$.Group = NA_character_
-    for (k in names(comparison)) {
-        group_mask = out[[group_name]] %in% comparison[[k]]
-        if (sum(group_mask) == 0) {
-            stop(
-                glue("No cells in comparison group {k}. Please check if the group items {comparison[[k]]} exist.")
-            )
+# add a new column with the keys of comparison, when their values are in the group_name column
+mdata$.Group <- sapply(as.character(mdata[[group_name]]), function(x) {
+    for (key in names(comparison)) {
+        if (x %in% comparison[[key]]) {
+            return(key)
         }
-        out$.Group[out[[group_name]] %in% comparison[[k]]] = k
-    }
-    if (!is.null(subset_cols)) {
-        out = out %>% unite(".Subset", all_of(subset_cols), sep = "_", remove = FALSE)
     }
-    return (out)
-}
-# Expanded and merged with metadata
-# Now we are able to select the cells using group and comparison
-log_info("Merging data with metadata for each sample")
-merged = NULL
-for (sam in immdata$meta$Sample) {
-    log_info("- For sample {sam}")
-    md = merge_data(sam)
-    merged = if (is.null(merged)) md else rbind(merged, md)
-}
+    return(NA)
+})
+mdata <- mdata %>%
+    separate(CTaa, into = c(NA, "sequence"), sep = "_", remove = FALSE) %>%
+    separate(CTgene, into = c(NA, "vjgene"), sep = "_", remove = FALSE) %>%
+    separate(vjgene, into = c("vgene", NA, "jgene", NA), sep = "\\.", remove = FALSE) %>%
+    mutate(length = nchar(sequence))
 # Statistics about the cell numbers with groups avaiable in metadata
 # !!group_name, TotalCells, AvailCells, AvailCellsPct
-log_info("Calculating statistics")
-if (is.null(subset_cols)) {
-    stats = merged %>%
+log$info("Calculating statistics")
+if (is.null(each_cols)) {
+    stats = mdata %>%
         # group by group_name
         group_by(.Group) %>%
         summarise(
-            TotalCells = nrow(merged),
+            TotalCells = nrow(mdata),
             CellsPerGroup = n(),
             AvailCellsPerGroup = sum(length >= CDR3_MINLEN & length <= CDR3_MAXLEN),
             # Percentage with % in character
@@ -246,14 +196,15 @@ if (is.null(subset_cols)) {
             .groups = "drop"
         )
 } else {
-    stats = merged %>%
+    stats = mdata %>%
+        unite(".Subset", all_of(each_cols), sep = "_", remove = FALSE) %>%
         group_by(.Subset) %>%
         group_map(function(df, .y) {
             df %>%
                 group_by(.Group) %>%
                 summarise(
                     .Subset = .y$.Subset[1],
-                    AllCells = nrow(merged),
+                    AllCells = nrow(mdata),
                     TotalCells = nrow(df),
                     CellsPerGroup = n(),
                     AvailCellsPerGroup = sum(length >= CDR3_MINLEN & length <= CDR3_MAXLEN),
@@ -274,7 +225,7 @@ write.table(
     row.names = FALSE,
 )
-add_report(
+reporter$add(
     list(
         kind = "descr",
         content = "Statistics about the cells mapped to the comparison groups. Columns:"
@@ -304,20 +255,22 @@ add_report(
-log_info("Add amino acid features")
-merged = merged %>%
+log$info("Add amino acid features")
+mdata = mdata %>%
     filter(!is.na(.Group) & length >= CDR3_MINLEN & length <= CDR3_MAXLEN) %>%
     add_percentAA() %>%
     add_positionalAA()
 do_one_subset = function(s) {
-    log_info(paste("Processing subset", s))
+    if (!is.null(s)) {
+        log$info(paste("Processing subset", s))
+    }
     if (is.null(s)) {
-        data = merged
+        data = mdata
         odir = file.path(outdir, "ALL")
     } else {
-        data = merged %>% filter(.Subset == s)
+        data = mdata %>% filter(.Subset == s)
         odir = file.path(outdir, slugify(s))
     }
     dir.create(odir, recursive = TRUE, showWarnings = FALSE)
@@ -342,6 +295,13 @@ do_one_subset = function(s) {
             }
         }
         y = ifelse(data_fit$.Group == target, 1, 0)
+        if (any(table(y) <= 3) || length(table(y)) < 2) {
+            if (is.null(s)) {
+                log$warn(paste0("Not enough observations for target group '", target, "' with CDR3 length ", len, ". At least 4 observations are required."))
+            } else {
+                log$warn(paste0("Not enough observations for target group '", target, "' in subset '", s, "' with CDR3 length ", len, ". At least 4 observations are required."))
+            }
+        }
         # one multinomial or binomial class has 1 or 0 observations; not allowed
         if (any(table(y) <= 1)) { next }
         fit = glmnet(x, y, data=data_fit, alpha=0, lambda=0.01, family="binomial")
@@ -370,26 +330,22 @@ do_one_subset = function(s) {
     write.table(alldf, file = file.path(odir, "estimates.txt"), sep = "\t", quote = FALSE, row.names = FALSE)
     # save the plots
-    gr = alldf %>%
-        group_by(imgt_pos, feature) |>
+    gr <- alldf %>%
+        group_by(imgt_pos, feature) %>%
         summarise(coef = mean(estimate))
     # Avoid too large values
-    gr$coef[gr$coef > 1.5] = 1.5
+    gr$coef[gr$coef > 1.5] <- 1.5
+    gr$coef <- exp(gr$coef)  # Exponentiate the coefficients
-    g = ggplot(gr, aes(imgt_pos, exp(coef), color=feature))
-    g = g + geom_point() + geom_line(aes(group=feature)) + theme_classic() + geom_hline(yintercept=1)
-    g = g + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values=c("#eead0c", "#ed6a51", "#02868a"))
-    g = g + xlab("TCR position") + ylab(paste("Coefficient for", target, "prediction")) + ggtitle(s)
+    g <- LinePlot(gr, x = "imgt_pos", y = "coef", group_by = "feature",
+        add_line = 1, x_text_angle = 90, xlab = "TCR position",
+        ylab = paste("Coefficient for", target, "prediction"), title = s)
-    png(file.path(odir, "estimated_coefficients.png"), width=1000, height=1000, res=100)
-    print(g)
-    dev.off()
+    save_plot(g, file.path(odir, "estimated_coefficients"),
+        devpars = list(width = 1000, height = 1000, res = 100),
+        formats = c("png", "pdf"))
-    pdf(file.path(odir, "estimated_coefficients.pdf"), width=10, height=10)
-    print(g)
-    dev.off()
-    add_report(
+    reporter$add(
         list(
             kind = "descr",
             content = "Estimated coefficients for each feature and position in the CDR3"
@@ -397,7 +353,7 @@ do_one_subset = function(s) {
         h1 = ifelse(
             is.null(s),
             "Estimated OR (per s.d.)",
-            paste0(paste(subset_cols, collapse = ", "), " - ", s)
+            paste0(paste(each_cols, collapse = ", "), " - ", s)
         ),
         h2 = ifelse(
             is.null(s),
@@ -406,7 +362,7 @@ do_one_subset = function(s) {
         )
     )
-    add_report(
+    reporter$add(
         list(
             name = "Plot",
             contents = list(
@@ -429,7 +385,7 @@ do_one_subset = function(s) {
         h1 = ifelse(
             is.null(s),
             "Estimated OR (per s.d.)",
-            paste0(paste(subset_cols, collapse = ", "), " - ", s)
+            paste0(paste(each_cols, collapse = ", "), " - ", s)
         ),
         h2 = ifelse(
             is.null(s),
@@ -443,38 +399,23 @@ do_one_subset = function(s) {
     data$mid_hydro = sapply(data$midseq, function(x) get_feat_score(x, AA_MAPS[[2]]))
     data$smid_hydro = scale(data$mid_hydro)[,1]
-    g = ggplot()
-    # Give colors for different groups
-    cols = c("turquoise3", "darkmagenta", "darkorange", "darkgreen", "darkblue", "darkred")
-    groups = unique(data$.Group)
-    if (length(groups) > length(cols)) {
-        cols = c(cols, c("darkcyan", "darkviolet", "darkgoldenrod", "darkolivegreen", "darkslategray", "darkkhaki"))
-    }
-    cols = cols[1:length(groups)]
-    for (i in seq_along(groups)) {
-        g = g + geom_vline(
-          xintercept = mean(data$smid_hydro[data$.Group==groups[i]]),
-          color=cols[i]
-        )
-    }
-    g = g + geom_density_ridges(
-      aes(x=data$smid_hydro, y=data$.Group, color=data$.Group, fill=data$.Group),
-      bandwidth=0.5,
-      alpha=0.4,
-      show.legend = FALSE
-    ) + scale_color_manual(values=cols)
-    g = g + scale_fill_manual(values=cols) + theme_bw(base_size=12)
-    g = g + xlim(c(-4,4)) + xlab("CDR3bmr hydrophobicity") + ylab("") + coord_flip() + ggtitle(s)
-    png(file.path(odir, "distribution.png"), width=1000, height=1000, res=100)
-    print(g)
-    dev.off()
-    pdf(file.path(odir, "distribution.pdf"), width=10, height=10)
-    print(g)
-    dev.off()
-    add_report(
+    g <- RidgePlot(
+        data = data,
+        x = "smid_hydro",
+        group_by = ".Group",
+        xlab = "CDR3bmr hydrophobicity",
+        ylab = "",
+        add_vline = TRUE,
+        alpha = 0.5,
+        title = s,
+        flip = TRUE
+    )
+    save_plot(g, file.path(odir, "distribution"),
+        devpars = list(width = 1000, height = 1000, res = 100),
+        formats = c("png", "pdf"))
+    reporter$add(
         list(
             kind = "table_image",
             descr = paste0(
@@ -488,7 +429,7 @@ do_one_subset = function(s) {
         h1 = ifelse(
             is.null(s),
             "Hydrophobicity Distribution",
-            paste0(paste(subset_cols, collapse = ", "), " - ", s)
+            paste0(paste(each_cols, collapse = ", "), " - ", s)
         ),
         h2 = ifelse(
             is.null(s),
@@ -499,11 +440,11 @@ do_one_subset = function(s) {
 }
-if (is.null(subset_cols)) {
+if (is.null(each_cols)) {
     do_one_subset(NULL)
 } else {
-    subsets = na.omit(unique(merged$.Subset))
+    subsets = na.omit(unique(obj$.Subset))
     sapply(subsets, do_one_subset)
 }
-save_report(joboutdir)
+reporter$save(joboutdir)

biopipen/scripts/tcr/ClonalStats.R CHANGED Viewed

@@ -1,12 +1,13 @@
 library(rlang)
 library(glue)
+library(dplyr)
 library(scplotter)
 library(biopipen.utils)
-screpfile <- {{in.screpfile | quote}}
-outdir <- {{out.outdir | quote}}
-joboutdir <- {{job.outdir | quote}}
-envs <- {{envs | r}}
+screpfile <- {{in.screpfile | r}}
+outdir <- {{out.outdir | r}}
+joboutdir <- {{job.outdir | r}}
+envs <- {{envs | r: todot="-"}}
 mutaters <- envs$mutaters
 cases <- envs$cases
 envs$mutaters <- NULL
@@ -397,7 +398,7 @@ get_plot_descr <- function(viz_type, case) {
 }
 log$info("Loading scRepertoire object ...")
-screp <- readRDS(screpfile)
+screp <- read_obj(screpfile)
 log$info("Applying mutaters if any ...")
 screp <- ScRepMutate(screp, mutaters)

biopipen/scripts/tcr/CloneResidency.R CHANGED Viewed

@@ -14,10 +14,10 @@ library(ComplexUpset)
 theme_set(theme_prism())
-immfile <- {{ in.immdata | quote }}
+immfile <- {{ in.immdata | r }}
 metafile <- {{ in.metafile | r }}
-outdir <- {{ out.outdir | quote }}
-joboutdir <- {{ job.outdir | quote }}
+outdir <- {{ out.outdir | r }}
+joboutdir <- {{ job.outdir | r }}
 subject_key <- {{ envs.subject | r }}
 group_key <- {{ envs.group | r }}

biopipen/scripts/tcr/CloneSizeQQPlot.R CHANGED Viewed

@@ -6,8 +6,8 @@ library(tidyr)
 library(ggprism)
-immfile <- {{ in.immdata | quote }}
-outdir <- {{ out.outdir | quote }}
+immfile <- {{ in.immdata | r }}
+outdir <- {{ out.outdir | r }}
 subject_key <- {{ envs.subject | r }}
 group_key <- {{ envs.group | r }}

biopipen/scripts/tcr/Immunarch2VDJtools.R CHANGED Viewed

@@ -3,8 +3,8 @@ library(dplyr)
 library(tidyr)
 library(stringr)
-immfile = {{in.immdata | quote}}
-outdir = {{out.outdir | quote}}
+immfile = {{in.immdata | r}}
+outdir = {{out.outdir | r}}
 immdata = readRDS(immfile)

biopipen/scripts/tcr/ImmunarchFilter.R CHANGED Viewed

@@ -6,15 +6,15 @@ library(tidyr)
 library(tibble)
 library(immunarch)
-immfile = {{in.immdata | quote}}
+immfile = {{in.immdata | r}}
 {% if in.filterfile %}
 filters = {{in.filterfile | toml_load | r}}
 {% else %}
 filters = {{envs.filters | r}}
 {% endif %}
 metacols = {{envs.metacols | r}}
-outfile = {{out.outfile | quote}}
-groupfile = {{out.groupfile | quote}}
+outfile = {{out.outfile | r}}
+groupfile = {{out.groupfile | r}}
 immdata0 = readRDS(immfile)
 groupname = filters$name

biopipen/scripts/tcr/ImmunarchLoading.R CHANGED Viewed

@@ -9,11 +9,11 @@ library(tibble)
 library(glue)
 library(bracer)
-metafile = {{ in.metafile | quote }}
-rdsfile = {{ out.rdsfile | quote }}
-metatxt = {{ out.metatxt | quote }}
-tmpdir = {{ envs.tmpdir | quote }}
-mode = {{ envs.mode | quote }}
+metafile = {{ in.metafile | r }}
+rdsfile = {{ out.rdsfile | r }}
+metatxt = {{ out.metatxt | r }}
+tmpdir = {{ envs.tmpdir | r }}
+mode = {{ envs.mode | r }}
 extracols = {{ envs.extracols | r}}
 prefix = {{ envs.prefix | r }}

biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl

Potentially problematic release.

biopipen 0.33.1py3-none-any.whl → 0.34.1py3-none-any.whl