PyPI - biopipen - Versions diffs - 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl - Mend

biopipen 0.32.1py3-none-any.whl → 0.33.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (134) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +6 -0
biopipen/core/filters.py +77 -26
biopipen/core/testing.py +6 -1
biopipen/ns/bam.py +39 -0
biopipen/ns/cellranger.py +5 -0
biopipen/ns/cellranger_pipeline.py +2 -2
biopipen/ns/cnvkit_pipeline.py +4 -1
biopipen/ns/delim.py +33 -27
biopipen/ns/protein.py +99 -0
biopipen/ns/scrna.py +411 -250
biopipen/ns/snp.py +16 -3
biopipen/ns/tcr.py +125 -1
biopipen/ns/vcf.py +34 -0
biopipen/ns/web.py +5 -1
biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
biopipen/reports/tcr/ClonalStats.svelte +15 -0
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +2 -2
biopipen/scripts/bam/BamSampling.py +4 -4
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +3 -3
biopipen/scripts/bam/CNVpytor.py +10 -10
biopipen/scripts/bam/ControlFREEC.py +11 -11
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +20 -9
biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/SampleInfo.R +85 -139
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +4 -4
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifScan.py +8 -8
biopipen/scripts/scrna/CellCellCommunication.py +59 -22
biopipen/scripts/scrna/CellsDistribution.R +31 -6
biopipen/scripts/scrna/MarkersFinder.R +272 -602
biopipen/scripts/scrna/MetaMarkers.R +16 -7
biopipen/scripts/scrna/RadarPlots.R +75 -35
biopipen/scripts/scrna/SCP-plot.R +15202 -0
biopipen/scripts/scrna/ScVelo.py +0 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -25
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -47
biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -385
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +33 -13
biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -228
biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
biopipen/scripts/scrna/SeuratMap2Ref.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +138 -81
biopipen/scripts/scrna/SlingShot.R +71 -0
biopipen/scripts/scrna/TopExpressingGenes.R +9 -7
biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
biopipen/scripts/snp/Plink2GTMat.py +26 -11
biopipen/scripts/snp/PlinkFilter.py +7 -7
biopipen/scripts/snp/PlinkFromVcf.py +8 -5
biopipen/scripts/snp/PlinkSimulation.py +4 -4
biopipen/scripts/snp/PlinkUpdateName.py +4 -4
biopipen/scripts/stats/ChowTest.R +48 -22
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +12 -2
biopipen/scripts/tcr/ClonalStats.R +484 -0
biopipen/scripts/tcr/CloneResidency.R +23 -5
biopipen/scripts/tcr/Immunarch-basic.R +8 -1
biopipen/scripts/tcr/Immunarch-clonality.R +5 -0
biopipen/scripts/tcr/Immunarch-diversity.R +25 -4
biopipen/scripts/tcr/Immunarch-geneusage.R +15 -1
biopipen/scripts/tcr/Immunarch-kmer.R +14 -1
biopipen/scripts/tcr/Immunarch-overlap.R +15 -1
biopipen/scripts/tcr/Immunarch-spectratyping.R +10 -1
biopipen/scripts/tcr/Immunarch-tracking.R +6 -0
biopipen/scripts/tcr/Immunarch-vjjunc.R +33 -0
biopipen/scripts/tcr/ScRepLoading.R +127 -0
biopipen/scripts/tcr/TCRClusterStats.R +24 -7
biopipen/scripts/tcr/TCRDock.py +10 -6
biopipen/scripts/tcr/TESSA.R +6 -1
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +4 -4
biopipen/scripts/vcf/BcftoolsView.py +5 -5
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +12 -3
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +3 -3
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
biopipen/scripts/web/gcloud_common.py +1 -1
biopipen/utils/gsea.R +96 -42
biopipen/utils/misc.R +205 -7
biopipen/utils/misc.py +17 -8
biopipen/utils/plot.R +53 -17
biopipen/utils/reference.py +11 -11
biopipen/utils/repr.R +146 -0
biopipen/utils/vcf.py +1 -1
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/METADATA +9 -9
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/RECORD +131 -122
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -139
biopipen/scripts/scrna/SeuratPreparing-common.R +0 -452
biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -201
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0

biopipen/scripts/tcr/Immunarch-diversity.R CHANGED Viewed

@@ -353,7 +353,14 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
             width = width * case$ncol
         }
     }
-    png(file.path(ddir, "diversity.png"), width = width, height = height, res = res)
+    div_plot = file.path(ddir, "diversity.png")
+    png(div_plot, width = width, height = height, res = res)
+    print(p)
+    dev.off()
+    div_plot_pdf = file.path(ddir, "diversity.pdf")
+    pdf(div_plot_pdf, width = width / res, height = height / res)
     print(p)
     dev.off()
@@ -407,7 +414,7 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
     add_report(
         list(
             name = "Diversity Plot",
-            contents = list(list(kind = "image", src = file.path(ddir, "diversity.png")))
+            contents = list(list(kind = "image", src = div_plot, download = div_plot_pdf))
         ),
         list(
             name = "Diversity Table",
@@ -559,6 +566,10 @@ run_raref_single = function(d, case, ddir, suffix = "", save_p = TRUE) {
         png(file.path(ddir, "raref.png"), width = devpars$width, height = devpars$height, res = devpars$res)
         print(p)
         dev.off()
+        pdf(file.path(ddir, "raref.pdf"), width = devpars$width / devpars$res, height = devpars$height / devpars$res)
+        print(p)
+        dev.off()
     } else {
         return (list(p = p, width = devpars$width))
     }
@@ -628,6 +639,14 @@ run_raref_multi = function(d, case, ddir) {
     )
     print(p)
     dev.off()
+    pdf(
+        file.path(ddir, paste0("raref-", slugify(case$separate_by), ".pdf")),
+        width = width / res,
+        height = height / res
+    )
+    print(p)
+    dev.off()
 }
 # Run the diversity estimation for one case
@@ -673,7 +692,8 @@ run_div_case = function(casename) {
             add_report(
                 list(
                     kind = "image",
-                    src = file.path(ddir, paste0("raref-", slugify(case$separate_by), ".png"))
+                    src = file.path(ddir, paste0("raref-", slugify(case$separate_by), ".png")),
+                    download = file.path(ddir, paste0("raref-", slugify(case$separate_by), ".pdf"))
                 ),
                 h1 = "Rarefraction",
                 h2 = casename
@@ -683,7 +703,8 @@ run_div_case = function(casename) {
             add_report(
                 list(
                     kind = "image",
-                    src = file.path(ddir, "raref.png")
+                    src = file.path(ddir, "raref.png"),
+                    download = file.path(ddir, "raref.pdf")
                 ),
                 h1 = "Rarefraction",
                 h2 = casename

biopipen/scripts/tcr/Immunarch-geneusage.R CHANGED Viewed

@@ -126,10 +126,16 @@ do_one_case_geneusage = function(name, case, gu_dir) {
     print(p + scale_fill_biopipen())
     dev.off()
+    ofig_pdf = file.path(odir, paste0(name, ".pdf"))
+    pdf(ofig_pdf, width = case$devpars$width / case$devpars$res, height = case$devpars$height / case$devpars$res)
+    print(p + scale_fill_biopipen())
+    dev.off()
     add_report(
         list(
             kind = "table_image",
             src = ofig,
+            download = ofig_pdf,
             descr = paste0(
                  "Distribution of known gene segments following the ",
                  '<a href="http://www.imgt.org/IMGTrepertoire/LocusGenes/" target="_blank">IMGT</a> ',
@@ -165,15 +171,23 @@ do_one_case_geneusage = function(name, case, gu_dir) {
             ap = do_call(vis, avis_args)
             if (aname == "DEFAULT") {
                 aofig = file.path(odir, paste0(name, "-analysis.png"))
+                aofig_pdf = file.path(odir, paste0(name, "-analysis.pdf"))
             } else {
                 aofig = file.path(odir, paste0(name, "-", aname, "-analysis.png"))
+                aofig_pdf = file.path(odir, paste0(name, "-", aname, "-analysis.pdf"))
             }
             png(aofig, width = case$analyses$cases[[aname]]$devpars$width, height = case$analyses$cases[[aname]]$devpars$height, res = case$analyses$cases[[aname]]$devpars$res)
             print(ap)
             dev.off()
+            pdf(aofig_pdf,
+                width = case$analyses$cases[[aname]]$devpars$width / case$analyses$cases[[aname]]$devpars$res,
+                height = case$analyses$cases[[aname]]$devpars$height / case$analyses$cases[[aname]]$devpars$res)
+            print(ap)
+            dev.off()
             add_report(
-                list(src = aofig, name = aname),
+                list(src = aofig, name = aname, download = aofig_pdf),
                 h1 = "Gene Usage",
                 h2 = ifelse(name == "DEFAULT", "#", name),
                 h3 = "Gene Usage Analysis",

biopipen/scripts/tcr/Immunarch-kmer.R CHANGED Viewed

@@ -105,6 +105,11 @@ do_one_case_kmer = function(name, case, kmer_dir) {
     print(p)
     dev.off()
+    ofig_pdf = file.path(odir, "Allsamples.pdf")
+    pdf(ofig_pdf, width = case$devpars$width / case$devpars$res, height = case$devpars$height / case$devpars$res)
+    print(p)
+    dev.off()
     add_report(
         list(
             kind = "descr",
@@ -116,7 +121,7 @@ do_one_case_kmer = function(name, case, kmer_dir) {
     )
     add_report(
-        list(kind = "image", src = ofig),
+        list(kind = "image", src = ofig, download = ofig_pdf),
         h1 = "Kmer and sequence motif analysis",
         h2 = ifelse(name == "DEFAULT", "#", name),
         h3 = "Kmer sequence occurrences"
@@ -150,9 +155,17 @@ do_one_case_kmer = function(name, case, kmer_dir) {
                 print(ap)
                 dev.off()
+                aofig_pdf = gsub(".png$", ".pdf", aofig)
+                pdf(aofig_pdf,
+                    width = case$profiles$cases[[aname]]$devpars$width / case$profiles$cases[[aname]]$devpars$res,
+                    height = case$profiles$cases[[aname]]$devpars$height / case$profiles$cases[[aname]]$devpars$res)
+                print(ap)
+                dev.off()
                 add_report(
                     list(
                         src = aofig,
+                        download = aofig_pdf,
                         name = paste0(sample, ifelse(aname == "DEFAULT", "", paste0(" - ", aname)))
                     ),
                     h1 = "Kmer and sequence motif analysis",

biopipen/scripts/tcr/Immunarch-overlap.R CHANGED Viewed

@@ -138,10 +138,16 @@ do_one_case_overlap = function(name, case, ov_dir) {
     print(p)
     dev.off()
+    ofig_pdf = file.path(odir, paste0(name, ".pdf"))
+    pdf(ofig_pdf, width = case$devpars$width / case$devpars$res, height = case$devpars$height / case$devpars$res)
+    print(p)
+    dev.off()
     add_report(
         list(
             kind = "table_image",
             src = ofig,
+            download = ofig_pdf,
             descr = paste0(
                 "Repertoire overlap is the most common approach to measure repertoire similarity, ",
                 "using method <code>", case$method, "</code>, ",
@@ -179,15 +185,23 @@ do_one_case_overlap = function(name, case, ov_dir) {
             ap = do_call(vis, avis_args)
             if (aname == "DEFAULT") {
                 aofig = file.path(odir, paste0(name, "-analysis.png"))
+                aofig_pdf = file.path(odir, paste0(name, "-analysis.pdf"))
             } else {
                 aofig = file.path(odir, paste0(name, "-", aname, "-analysis.png"))
+                aofig_pdf = file.path(odir, paste0(name, "-", aname, "-analysis.pdf"))
             }
             png(aofig, width = case$analyses$cases[[aname]]$devpars$width, height = case$analyses$cases[[aname]]$devpars$height, res = case$analyses$cases[[aname]]$devpars$res)
             print(ap)
             dev.off()
+            pdf(aofig_pdf,
+                width = case$analyses$cases[[aname]]$devpars$width / case$analyses$cases[[aname]]$devpars$res,
+                height = case$analyses$cases[[aname]]$devpars$height / case$analyses$cases[[aname]]$devpars$res)
+            print(ap)
+            dev.off()
             add_report(
-                list(src = aofig, name = aname),
+                list(src = aofig, name = aname, download = aofig_pdf),
                 h1 = "Repertoire Overlaps",
                 h2 = ifelse(name == "DEFAULT", "#", name),
                 h3 = "Repertoire Overlap Analysis",

biopipen/scripts/tcr/Immunarch-spectratyping.R CHANGED Viewed

@@ -69,8 +69,17 @@ do_one_case_spectratyping = function(name, case, spect_dir) {
         print(vis(spec_obj))
         dev.off()
+        spectfile_pdf = file.path(odir, paste0(slugify(sample), ".spect.pdf"))
+        pdf(
+            spectfile_pdf,
+            width = case$devpars$width / case$devpars$res,
+            height = case$devpars$height / case$devpars$res
+        )
+        print(vis(spec_obj))
+        dev.off()
         add_report(
-            list(src = spectfile, name = sample),
+            list(src = spectfile, name = sample, download = spectfile_pdf),
             h1 = "Spectratyping",
             h2 = name,
             ui = "table_of_images"

biopipen/scripts/tcr/Immunarch-tracking.R CHANGED Viewed

@@ -88,6 +88,11 @@ run_tracking_case = function(casename) {
         print(vis(imm_tracking))
         dev.off()
+        tracking_pdf = file.path(tracking_dir, paste0(slugify(casename), ".pdf"))
+        pdf(tracking_pdf, height=10, width=6 + 1.5 * length(subjects))
+        print(vis(imm_tracking))
+        dev.off()
         add_report(
             list(
                 kind = "descr",
@@ -105,6 +110,7 @@ run_tracking_case = function(casename) {
         add_report(
             list(
                 src = tracking_png,
+                download = tracking_pdf,
                 name = if (casename == "DEFAULT") NULL else casename
             ),
             h1 = "Tracking of clonotypes",

biopipen/scripts/tcr/Immunarch-vjjunc.R CHANGED Viewed

@@ -110,7 +110,40 @@ do_one_case_vjjunc <- function(name, case) {
         }, bg.border = NA) # here set bg.border to NA is important
         dev.off()
+        # figfile_pdf <- file.path(odir, paste0(slugify(by_name), ".pdf"))
+        # png(figfile_pdf, width = case$devpars$width / case$devpars$res, height = case$devpars$height / case$devpars$res)
+        # circos.clear()
+        # tryCatch({
+        #     chordDiagram(
+        #         gsd,
+        #         annotationTrack = c("grid", "axis"),
+        #         preAllocateTracks = list(track.height = 0.25)
+        #     )
+        # }, error = function(e) {
+        #     log_warn("Error encountered: {e$message}, setting gap.after ...")
+        #     circos.par(gap.after = c(rep(1, nrow(gsd) - 1), 5, rep(1, nrow(gsd) - 1), 5))
+        #     chordDiagram(
+        #         gsd,
+        #         annotationTrack = c("grid", "axis"),
+        #         preAllocateTracks = list(track.height = 0.25)
+        #     )
+        # })
+        # circos.track(track.index = 1, panel.fun = function(x, y) {
+        #     circos.text(
+        #         CELL_META$xcenter,
+        #         CELL_META$ylim[1],
+        #         CELL_META$sector.index,
+        #         cex = .8,
+        #         facing = "clockwise",
+        #         niceFacing = TRUE,
+        #         adj = c(-0.2, 0.5)
+        #     )
+        # }, bg.border = NA) # here set bg.border to NA is important
+        # dev.off()
         add_report(
+            # list(src = figfile, name = by_name, download = figfile_pdf),
             list(src = figfile, name = by_name),
             h1 = "V-J Junction Circos Plots",
             h2 = ifelse(name == "DEFAULT", "#" , name),

biopipen/scripts/tcr/ScRepLoading.R ADDED Viewed

@@ -0,0 +1,127 @@
+{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
+library(rlang)
+library(bracer)
+library(scRepertoire)
+metafile <- {{in.metafile | quote}}
+outfile <- {{out.outfile | quote}}
+combineTCR_args <- {{envs.combineTCR | r}}
+exclude <- {{envs.exclude | r}}
+if (length(exclude) == 1) {
+    exclude <- strsplit(exclude, ",")[[1]]
+}
+log_info("Loading metadata ...")
+metadata <- read.table(metafile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
+stopifnot("Error: Column `Sample` is not found in metafile." = "Sample" %in% colnames(metadata))
+stopifnot("Error: Column `TCRData` is not found in metafile." = "TCRData" %in% colnames(metadata))
+rownames(metadata) <- metadata$Sample
+# helper function
+get_contig_annofile <- function(dir, sample, warn = TRUE) {
+    if (is.na(dir) || !is.character(dir) || nchar(dir) == 0 || dir == "NA") {
+        warning(paste0("No path found for sample: ", sample), immediate. = TRUE)
+        return (NULL)
+    }
+    if (file.exists(dir) && !dir.exists(dir)) {
+        return(dir)
+    }
+    annofilepat <- paste0("*", "{all,filtered}", "_contig_annotations.csv*")  # .gz
+    annofiles <- glob(file.path(as.character(dir), annofilepat))
+    if (length(annofiles) == 0) {
+        stop(
+            "Cannot find neither `filtered_contig_annotations.csv[.gz]` nor",
+            "`all_contig_annotations.csv[.gz]` in given TCRData for sample: ",
+            sample
+        )
+    }
+    if (length(annofiles) > 1) {
+        if (warn) {
+            warning("Found more than one file in given TCRData for sample: ", sample, immediate. = TRUE)
+        }
+        for (annofile in annofiles) {
+            # use filtered if both filtered_ and all_ are found
+            if (grepl("filtered", annofile)) {
+                annofiles <- annofile
+                break
+            }
+            # give a warning if only all_ is found
+            if (warn) {
+                warning("Using all_contig_annotations as filtred_config_annotations not found ",
+                        "in given TCRData for sample: ", sample,
+                        immediate. = TRUE
+                )
+            }
+        }
+    }
+    annofiles[1]
+}
+# for (i in seq_len(nrow(metadata))) {
+#     sample <- as.character(metadata$Sample[i])
+#     annofile <- get_contig_annofile(metadata$TCRData[i], sample)
+#     if (is.null(annofile)) { next }
+#     anno <- read.delim2(annofile, sep = ",", header = TRUE, stringsAsFactors = FALSE)
+#     # Add cdr1, cdr2, fwr1, fwr2, etc columns
+#     anno$cdr1 <- anno$cdr1 %||% ""
+#     anno$cdr1_nt <- anno$cdr1_nt %||% ""
+#     anno$cdr2 <- anno$cdr2 %||% ""
+#     anno$cdr2_nt <- anno$cdr2_nt %||% ""
+#     anno$fwr1 <- anno$fwr1 %||% ""
+#     anno$fwr1_nt <- anno$fwr1_nt %||% ""
+#     anno$fwr2 <- anno$fwr2 %||% ""
+#     anno$fwr2_nt <- anno$fwr2_nt %||% ""
+#     anno$fwr3 <- anno$fwr3 %||% ""
+#     anno$fwr3_nt <- anno$fwr3_nt %||% ""
+#     anno$fwr4 <- anno$fwr4 %||% ""
+#     anno$fwr4_nt <- anno$fwr4_nt %||% ""
+#     annotfile = file.path(datadir, paste0(sample, ".csv"))
+#     write.table(anno, annotfile, sep = ",", quote = FALSE, row.names = FALSE, col.names = TRUE)
+# }
+log_info("Reading TCR data ...")
+contig_list <- lapply(seq_len(nrow(metadata)), function(i) {
+    sample <- as.character(metadata$Sample[i])
+    annofile <- get_contig_annofile(metadata$TCRData[i], sample)
+    if (is.null(annofile)) { return (NULL) }
+    log_info("- Sample: {sample} ...")
+    anno <- read.delim2(annofile, sep = ",", header = TRUE, stringsAsFactors = FALSE)
+    # Add cdr1, cdr2, fwr1, fwr2, etc columns for compatibility
+    anno$cdr1 <- anno$cdr1 %||% ""
+    anno$cdr1_nt <- anno$cdr1_nt %||% ""
+    anno$cdr2 <- anno$cdr2 %||% ""
+    anno$cdr2_nt <- anno$cdr2_nt %||% ""
+    anno$fwr1 <- anno$fwr1 %||% ""
+    anno$fwr1_nt <- anno$fwr1_nt %||% ""
+    anno$fwr2 <- anno$fwr2 %||% ""
+    anno$fwr2_nt <- anno$fwr2_nt %||% ""
+    anno$fwr3 <- anno$fwr3 %||% ""
+    anno$fwr3_nt <- anno$fwr3_nt %||% ""
+    anno$fwr4 <- anno$fwr4 %||% ""
+    anno$fwr4_nt <- anno$fwr4_nt %||% ""
+    anno
+})
+names(contig_list) <- as.character(metadata$Sample)
+contig_list <- contig_list[!sapply(contig_list, is.null)]
+log_info("Combining TCR data and adding meta data ...")
+if (isTRUE(combineTCR_args$samples)) {
+    combineTCR_args$samples <- names(contig_list)
+}
+combineTCR_args$input.data <- contig_list
+screp_data <- do_call(combineTCR, combineTCR_args)
+for (col in colnames(metadata)) {
+    if (col %in% exclude) { next }
+    screp_data <- addVariable(screp_data, col, metadata[names(screp_data), col])
+}
+rm(contig_list, combineTCR_args)
+log_info("Saving TCR data ...")
+saveRDS(screp_data, outfile)

biopipen/scripts/tcr/TCRClusterStats.R CHANGED Viewed

@@ -69,6 +69,7 @@ cluster_size_distribution = function(name) {
     outfile = file.path(odir, "cluster_size_distribution.txt")
     outplot = file.path(odir, "cluster_size_distribution.png")
+    outplot_pdf = file.path(odir, "cluster_size_distribution.pdf")
     write.table(clsizes, outfile, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
     plotGG(
@@ -82,14 +83,15 @@ cluster_size_distribution = function(name) {
             "scale_fill_biopipen()"
         ),
         devpars = case$devpars,
-        outfile = outplot
+        outfile = c(outplot, outplot_pdf)
     )
     add_report(
         list(
             src = outplot,
             name = ifelse(name == "DEFAULT", FALSE, name),
-            descr = paste0("Cluster size distribution for each ", case$by)
+            descr = paste0("Cluster size distribution for each ", case$by),
+            download = outplot_pdf
         ),
         ui = "table_of_images",
         h1 = "Cluster Size Distribution"
@@ -162,6 +164,8 @@ shared_clusters = function(name) {
         row_samples = samples
     }
+    hmplot = file.path(odir, "shared_clusters.png")
+    hmplot_pdf = file.path(odir, "shared_clusters.pdf")
     # Plot heatmap
     plotHeatmap(
         plotdata,
@@ -178,12 +182,13 @@ shared_clusters = function(name) {
             }
         ),
         devpars = case$devpars,
-        outfile = file.path(odir, "shared_clusters.png")
+        outfile = c(hmplot, hmplot_pdf)
     )
     add_report(
         list(
-            src = file.path(odir, "shared_clusters.png"),
+            src = hmplot,
+            download = hmplot_pdf,
             name = ifelse(name == "DEFAULT", FALSE, name),
             descr = paste0("Shared TCR clusters across samples")
         ),
@@ -219,16 +224,18 @@ shared_clusters_by_grouping = function(name) {
     }
     outfile = file.path(odir, "shared_clusters.png")
+    outfile_pdf = file.path(odir, "shared_clusters.pdf")
     plotVenn(
         data,
         ggs = 'ggtitle("Shared TCR Clusters")',
         devpars = case$devpars,
-        outfile = outfile
+        outfile = c(outfile, outfile_pdf)
     )
     add_report(
         list(
             src = outfile,
+            download = outfile_pdf,
             name = ifelse(name == "DEFAULT", FALSE, name),
             descr = paste0("Shared TCR clusters across ", grouping)
         ),
@@ -275,6 +282,7 @@ sample_diversity = function(name) {
     }
     outfile = file.path(odir, "diversity.txt")
     outplot = file.path(odir, "diversity.png")
+    outplot_pdf = file.path(odir, "diversity.pdf")
     div = repDiversity(data, .method = case$method)
     write.table(
         if (ncol(div) == 1) {
@@ -320,7 +328,7 @@ sample_diversity = function(name) {
             args = list(mapping = mapping),
             ggs = ggs,
             devpars = case$devpars,
-            outfile = outplot
+            outfile = c(outplot, outplot_pdf)
         )
     } else {
         if (is.null(case$by) || length(case$by) == 0) {
@@ -338,6 +346,14 @@ sample_diversity = function(name) {
         )
         print(p)
         dev.off()
+        pdf(
+            outplot_pdf,
+            width=case$devpars$width / case$devpars$res,
+            height=case$devpars$height / case$devpars$res
+        )
+        print(p)
+        dev.off()
     }
     add_report(
@@ -359,7 +375,8 @@ sample_diversity = function(name) {
                 ),
                 list(
                     kind = "image",
-                    src = outplot
+                    src = outplot,
+                    download = outplot_pdf
                 )
             )
         ),

biopipen/scripts/tcr/TCRDock.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import os
 import sys
 from pathlib import Path
@@ -7,10 +9,10 @@ import pandas as pd
 from tempfile import gettempdir
 from biopipen.utils.misc import logger, run_command
-configfile = {{in.configfile | repr}}  # pyright: ignore
-outdir = Path({{out.outdir | repr}})  # pyright: ignore
-envs = {{envs | dict | repr}}  # pyright: ignore
-python = sys.executable
+configfile: str = {{in.configfile | quote}}  # pyright: ignore  # noqa
+outdir = Path({{out.outdir | quote}})  # pyright: ignore
+envs: dict = {{envs | dict | repr}}  # pyright: ignore
+python: str | list[str] = sys.executable
 args = envs.copy()
 config = rtoml.load(Path(configfile))
@@ -18,8 +20,8 @@ args.update(config)
 model_name = args.pop("model_name")
 model_file = Path(args.pop("model_file"))
 data_dir = args.pop("data_dir", None)
-tcrdock = args.pop("tcrdock", None)
-tmpdir = args.pop("tmpdir", gettempdir())
+tcrdock: Path | str | None = args.pop("tcrdock", None)
+tmpdir: str = args.pop("tmpdir", gettempdir())
 python = args.pop("python", python)
 if not isinstance(python, (list, tuple)):
@@ -46,6 +48,8 @@ if not tcrdock:
     ]
     run_command(cmd, fg=True, cwd=str(tcrdock))
+tcrdock = str(tcrdock)
 if not model_file.is_absolute():
     model_file = Path(data_dir) / "params" / model_file

biopipen/scripts/tcr/TESSA.R CHANGED Viewed

@@ -198,10 +198,15 @@ png(file.path(result_dir, "Cluster_size_dist.png"), width=8, height=8, units="in
 print(p)
 dev.off()
+pdf(file.path(result_dir, "Cluster_size_dist.pdf"), width=8, height=8)
+print(p)
+dev.off()
 add_report(
     list(
         src = file.path(result_dir, "Cluster_size_dist.png"),
-        descr = "Histogram of cluster size distribution"
+        descr = "Histogram of cluster size distribution",
+        download = file.path(result_dir, "Cluster_size_dist.pdf")
     ),
     list(
         src = file.path(result_dir, "clone_size.png"),

biopipen/scripts/tcr/vdjtools-patch.sh CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 # run the command and capture the stdout
-out=$(command $@)
+out=$(command "$@")
 echo "$out"

biopipen/scripts/vcf/BcftoolsAnnotate.py CHANGED Viewed

@@ -6,11 +6,11 @@ from biopipen.utils.reference import tabix_index
 from biopipen.utils.misc import logger
 from biopipen.scripts.vcf.bcftools_utils import run_bcftools
-infile = {{in.infile | repr}}  # pyright: ignore # noqa: E999
-annfile = {{in.annfile | repr}}  # pyright: ignore
-outfile = {{out.outfile | repr}}  # pyright: ignore
-joboutdir = {{job.outdir | repr}}  # pyright: ignore
-envs = {{envs | dict | repr}}  # pyright: ignore
+infile: str = {{in.infile | quote}}  # pyright: ignore # noqa: E999
+annfile: str = {{in.annfile | quote}}  # pyright: ignore
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
+joboutdir: str = {{job.outdir | quote}}  # pyright: ignore
+envs: dict = {{envs | dict | repr}}  # pyright: ignore
 bcftools = envs.pop("bcftools")
 tabix = envs.pop("tabix")
@@ -25,14 +25,14 @@ if isinstance(columns, list):
     columns = ",".join(columns)
 if "c" in envs:
-    logger.warning("Ignoring envs\[c], use envs\[columns] instead.")
+    logger.warning(r"Ignoring envs\[c], use envs\[columns] instead.")
     del envs["c"]
 if isinstance(remove, list):
     remove = ",".join(remove)
 if "x" in envs:
-    logger.warning("Ignoring envs\[x], use envs\[remove] instead.")
+    logger.warning(r"Ignoring envs\[x], use envs\[remove] instead.")
     del envs["x"]
 envs_has_annfile = "a" in envs or "annotations" in envs
@@ -43,7 +43,7 @@ if header:
 if annfile and envs_has_annfile:
     logger.warning(
-        "Ignoring envs\[a/annotations] because in.annfile is provided."
+        r"Ignoring envs\[a/annotations] because in.annfile is provided."
     )
     with suppress(KeyError):
         del envs["a"]

biopipen/scripts/vcf/BcftoolsFilter.py CHANGED Viewed

@@ -3,11 +3,11 @@ from pathlib import Path, PosixPath  # noqa: F401
 from biopipen.utils.misc import logger
 from biopipen.scripts.vcf.bcftools_utils import run_bcftools
-infile = {{in.infile | repr}}  # pyright: ignore # noqa: #999
-outfile = {{out.outfile | repr}}  # pyright: ignore
+infile: str | Path = {{in.infile | quote}}  # pyright: ignore # noqa: #999
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
 outdir = Path(outfile).parent
-envs = {{envs | dict | repr}}  # pyright: ignore
+envs: dict = {{envs | dict | repr}}  # pyright: ignore
 bcftools = envs.pop("bcftools")
 tabix = envs.pop("tabix")
 keep = envs.pop("keep")

biopipen/scripts/vcf/BcftoolsMerge.py ADDED Viewed

@@ -0,0 +1,31 @@
+from biopipen.utils.reference import tabix_index
+from biopipen.utils.misc import logger
+from biopipen.scripts.vcf.bcftools_utils import run_bcftools
+infiles: list = {{in.infiles | each: as_path}}  # pyright: ignore # noqa: E999
+outfile = {{out.outfile | repr}}  # pyright: ignore
+joboutdir = {{job.outdir | repr}}  # pyright: ignore
+envs: dict = {{envs | dict | repr}}  # pyright: ignore
+bcftools = envs.pop("bcftools")
+tabix = envs.pop("tabix")
+ncores = envs.pop("ncores")
+gz = envs.pop("gz")
+index = envs.pop("index")
+envs.setdefault("force-single", True)
+envs.setdefault("missing-to-ref", True)
+if index and not gz:
+    logger.warning("Forcing envs.gz to True because envs.index is True.")
+    gz = True
+if "O" not in envs and "output-type" not in envs and "output_type" not in envs:
+    envs["O"] = "z" if gz else "v"
+envs[""] = [bcftools, "merge"]
+envs["o"] = outfile
+envs["threads"] = ncores
+envs["_"] = infiles
+run_bcftools(envs, bcftools=bcftools, index=index, tabix=tabix)

biopipen 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.32.1py3-none-any.whl → 0.33.0py3-none-any.whl