PyPI - biopipen - Versions diffs - 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +28 -0
biopipen/core/filters.py +79 -4
biopipen/core/proc.py +12 -3
biopipen/core/testing.py +75 -3
biopipen/ns/bam.py +148 -6
biopipen/ns/bed.py +75 -0
biopipen/ns/cellranger.py +186 -0
biopipen/ns/cellranger_pipeline.py +126 -0
biopipen/ns/cnv.py +19 -3
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/cnvkit_pipeline.py +20 -12
biopipen/ns/delim.py +34 -35
biopipen/ns/gene.py +68 -23
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +39 -14
biopipen/ns/plot.py +304 -1
biopipen/ns/protein.py +183 -0
biopipen/ns/regulatory.py +290 -0
biopipen/ns/rnaseq.py +142 -5
biopipen/ns/scrna.py +2053 -473
biopipen/ns/scrna_metabolic_landscape.py +228 -382
biopipen/ns/snp.py +659 -0
biopipen/ns/stats.py +484 -0
biopipen/ns/tcr.py +683 -98
biopipen/ns/vcf.py +236 -2
biopipen/ns/web.py +97 -6
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/common.svelte +15 -0
biopipen/reports/protein/ProdigySummary.svelte +16 -0
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
biopipen/reports/tcr/ClonalStats.svelte +16 -0
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -155
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +11 -15
biopipen/scripts/bam/BamSampling.py +90 -0
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +38 -0
biopipen/scripts/bam/CNAClinic.R +41 -5
biopipen/scripts/bam/CNVpytor.py +153 -54
biopipen/scripts/bam/ControlFREEC.py +13 -14
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +138 -0
biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
biopipen/scripts/cnv/AneuploidyScore.R +55 -20
biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
biopipen/scripts/cnv/TMADScore.R +25 -9
biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +116 -118
biopipen/scripts/gene/GeneNameConversion.R +67 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/gsea/Enrichr.R +5 -5
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/GSEA.R +2 -2
biopipen/scripts/gsea/PreRank.R +5 -5
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/plot/Heatmap.R +3 -3
biopipen/scripts/plot/Manhattan.R +147 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/plot/Scatter.R +112 -0
biopipen/scripts/plot/VennDiagram.R +5 -9
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +119 -0
biopipen/scripts/protein/ProdigySummary.R +140 -0
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
biopipen/scripts/regulatory/motifs-common.R +324 -0
biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
biopipen/scripts/rnaseq/Simulation.R +21 -0
biopipen/scripts/rnaseq/UnitConversion.R +325 -54
biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +150 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
biopipen/scripts/scrna/CellsDistribution.R +456 -167
biopipen/scripts/scrna/DimPlots.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
biopipen/scripts/scrna/ExprImputation.R +7 -0
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +679 -400
biopipen/scripts/scrna/MetaMarkers.R +265 -161
biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
biopipen/scripts/scrna/RadarPlots.R +355 -134
biopipen/scripts/scrna/ScFGSEA.R +298 -100
biopipen/scripts/scrna/ScSimulation.R +65 -0
biopipen/scripts/scrna/ScVelo.py +617 -0
biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
biopipen/scripts/scrna/SeuratClustering.R +36 -233
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +223 -173
biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
biopipen/scripts/scrna/SeuratTo10X.R +27 -0
biopipen/scripts/scrna/Slingshot.R +65 -0
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
biopipen/scripts/snp/MatrixEQTL.R +217 -0
biopipen/scripts/snp/Plink2GTMat.py +148 -0
biopipen/scripts/snp/PlinkCallRate.R +199 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +291 -0
biopipen/scripts/snp/PlinkFromVcf.py +81 -0
biopipen/scripts/snp/PlinkHWE.R +85 -0
biopipen/scripts/snp/PlinkHet.R +96 -0
biopipen/scripts/snp/PlinkIBD.R +196 -0
biopipen/scripts/snp/PlinkSimulation.py +124 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/ChowTest.R +146 -0
biopipen/scripts/stats/DiffCoexpr.R +152 -0
biopipen/scripts/stats/LiquidAssoc.R +135 -0
biopipen/scripts/stats/Mediation.R +108 -0
biopipen/scripts/stats/MetaPvalue.R +130 -0
biopipen/scripts/stats/MetaPvalue1.R +74 -0
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/Attach2Seurat.R +3 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
biopipen/scripts/tcr/CDR3Clustering.R +343 -0
biopipen/scripts/tcr/ClonalStats.R +526 -0
biopipen/scripts/tcr/CloneResidency.R +255 -131
biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/Immunarch-basic.R +31 -9
biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
biopipen/scripts/tcr/Immunarch.R +63 -11
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
biopipen/scripts/tcr/SampleDiversity.R +1 -1
biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
biopipen/scripts/tcr/ScRepLoading.R +166 -0
biopipen/scripts/tcr/TCRClusterStats.R +176 -22
biopipen/scripts/tcr/TCRDock.py +110 -0
biopipen/scripts/tcr/TESSA.R +102 -118
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/TruvariBench.sh +14 -7
biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
biopipen/scripts/vcf/TruvariConsistency.R +1 -1
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +13 -4
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
biopipen/scripts/web/gcloud_common.py +49 -0
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.py +146 -20
biopipen/utils/reference.py +64 -20
biopipen/utils/reporter.py +177 -0
biopipen/utils/vcf.py +1 -1
biopipen-0.34.26.dist-info/METADATA +27 -0
biopipen-0.34.26.dist-info/RECORD +292 -0
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
biopipen/ns/bcftools.py +0 -111
biopipen/ns/scrna_basic.py +0 -255
biopipen/reports/delim/SampleInfo.svelte +0 -36
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
biopipen/reports/scrna/ScFGSEA.svelte +0 -35
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
biopipen/scripts/scrna/ExprImpution.R +0 -7
biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
biopipen/scripts/scrna/Write10X.R +0 -11
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
biopipen/scripts/tcr/TCRClustering.R +0 -280
biopipen/utils/common_docstrs.py +0 -61
biopipen/utils/gene.R +0 -49
biopipen/utils/gsea.R +0 -193
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -114
biopipen/utils/mutate_helpers.R +0 -433
biopipen/utils/plot.R +0 -173
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -115
biopipen-0.21.0.dist-info/METADATA +0 -22
biopipen-0.21.0.dist-info/RECORD +0 -218

biopipen/scripts/tcr/TCRClusterStats.R CHANGED Viewed

@@ -1,5 +1,5 @@
-source("{{biopipen_dir}}/utils/misc.R")
-source("{{biopipen_dir}}/utils/plot.R")
+{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
+{{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
 library(tibble)
 library(tidyr)
 library(dplyr)
@@ -7,12 +7,14 @@ library(rlang)
 library(immunarch)
 library(ggprism)
-immfile = {{in.immfile | quote}}
-outdir = {{out.outdir | quote}}
+immfile = {{in.immfile | r}}
+outdir = {{out.outdir | r}}
 cluster_size_envs = {{envs.cluster_size | r}}
 shared_clusters_envs = {{envs.shared_clusters | r}}
 sample_diversity_envs = {{envs.sample_diversity | r}}
+joboutdir = {{job.outdir | r}}
+log_info("Expanding analysis cases ...")
 expand_cases = function(envs) {
     cases = envs$cases
     envs$cases = NULL
@@ -51,8 +53,9 @@ shared_clusters_cases = expand_cases(shared_clusters_envs)
 sample_diversity_cases = expand_cases(sample_diversity_envs)
 cluster_size_distribution = function(name) {
-    print(paste0("- Working on cluster size distribution: ", name))
-    odir = file.path(outdir, "ClusterSizeDistribution", name)
+    log_info("- Working on cluster size distribution: {name}")
+    odir = file.path(outdir, "ClusterSizeDistribution", slugify(name))
     dir.create(odir, showWarnings = FALSE, recursive = TRUE)
     case = cluster_size_cases[[name]]
@@ -66,6 +69,7 @@ cluster_size_distribution = function(name) {
     outfile = file.path(odir, "cluster_size_distribution.txt")
     outplot = file.path(odir, "cluster_size_distribution.png")
+    outplot_pdf = file.path(odir, "cluster_size_distribution.pdf")
     write.table(clsizes, outfile, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
     plotGG(
@@ -75,16 +79,29 @@ cluster_size_distribution = function(name) {
         ggs = c(
             "theme_prism()",
             "scale_y_continuous(trans='log10')",
-            "labs(x='TCR cluster size', y='Count')"
+            "labs(x='TCR cluster size', y='Count')",
+            "scale_fill_biopipen()"
         ),
         devpars = case$devpars,
-        outfile = outplot
+        outfile = c(outplot, outplot_pdf)
+    )
+    add_report(
+        list(
+            src = outplot,
+            name = ifelse(name == "DEFAULT", FALSE, name),
+            descr = paste0("Cluster size distribution for each ", case$by),
+            download = outplot_pdf
+        ),
+        ui = "table_of_images",
+        h1 = "Cluster Size Distribution"
     )
 }
 shared_clusters = function(name) {
-    print(paste0("- Working on shared clusters: ", name))
-    odir = file.path(outdir, "SharedClusters", name)
+    log_info("- Working on shared clusters: {name}")
+    odir = file.path(outdir, "SharedClusters", slugify(name))
     dir.create(odir, showWarnings = FALSE, recursive = TRUE)
     case = shared_clusters_cases[[name]]
     if (!is.null(case$grouping)) {
@@ -115,13 +132,40 @@ shared_clusters = function(name) {
         row.names=TRUE, col.names=TRUE, quote=FALSE, sep="\t"
     )
+    if (!is.null(case$sample_order) && length(case$sample_order) > 0) {
+        if (length(case$sample_order) == 1) {
+            case$sample_order = trimws(strsplit(case$sample_order, ",")[[1]])
+        }
+        nonexisting = setdiff(case$sample_order, samples)
+        if (length(nonexisting) > 0) {
+            stop(paste("  The following samples do not exist in `sample_order`:", paste(nonexisting, collapse=", ")))
+        }
+        plotdata = plotdata[, case$sample_order, drop=FALSE]
+    }
     if (is.null(case$heatmap_meta) || length(case$heatmap_meta) == 0) {
         anno = NULL
     } else {
-        anno = as.list(immdata$meta[, case$heatmap_meta, drop=FALSE])
+        anno = as.list(
+            immdata$meta[
+                match(colnames(plotdata), immdata$meta$Sample),
+                case$heatmap_meta,
+                drop=FALSE
+            ])
         anno = do_call(ComplexHeatmap::HeatmapAnnotation, anno)
     }
+    cluster_rows = case$cluster_rows && nrow(plotdata) > 2
+    col_samples = colnames(plotdata)
+    if (!cluster_rows) {
+        plotdata = plotdata[col_samples, ]
+        row_samples = col_samples
+    } else {
+        row_samples = samples
+    }
+    hmplot = file.path(odir, "shared_clusters.png")
+    hmplot_pdf = file.path(odir, "shared_clusters.pdf")
     # Plot heatmap
     plotHeatmap(
         plotdata,
@@ -129,22 +173,32 @@ shared_clusters = function(name) {
             name = "Shared TCR Clusters",
             col = c("#ffe1e1", "red3"),
             cluster_columns = FALSE,
-            cluster_rows = nrow(plotdata) > 2,
+            cluster_rows = cluster_rows,
             top_annotation = anno,
             cell_fun = if (
                 is.null(case$numbers_on_heatmap) || !case$numbers_on_heatmap
             ) NULL else function(j, i, x, y, width, height, fill) {
-                grid.text(plotdata[samples[i], samples[j]], x, y, gp = gpar(fontsize = 10))
+                grid.text(row_samples[i], col_samples[j], x, y, gp = gpar(fontsize = 10))
             }
         ),
         devpars = case$devpars,
-        outfile = file.path(odir, "shared_clusters.png")
+        outfile = c(hmplot, hmplot_pdf)
+    )
+    add_report(
+        list(
+            src = hmplot,
+            download = hmplot_pdf,
+            name = ifelse(name == "DEFAULT", FALSE, name),
+            descr = paste0("Shared TCR clusters across samples")
+        ),
+        ui = "table_of_images",
+        h1 = "Shared TCR Clusters"
     )
 }
 shared_clusters_by_grouping = function(name) {
-    odir = file.path(outdir, "SharedClusters", name)
+    odir = file.path(outdir, "SharedClusters", slugify(name))
     case = shared_clusters_cases[[name]]
     data = list()
@@ -170,18 +224,55 @@ shared_clusters_by_grouping = function(name) {
     }
     outfile = file.path(odir, "shared_clusters.png")
+    outfile_pdf = file.path(odir, "shared_clusters.pdf")
     plotVenn(
         data,
         ggs = 'ggtitle("Shared TCR Clusters")',
         devpars = case$devpars,
-        outfile = outfile
+        outfile = c(outfile, outfile_pdf)
+    )
+    add_report(
+        list(
+            src = outfile,
+            download = outfile_pdf,
+            name = ifelse(name == "DEFAULT", FALSE, name),
+            descr = paste0("Shared TCR clusters across ", grouping)
+        ),
+        ui = "table_of_images",
+        h1 = "Shared TCR Clusters"
     )
 }
+div_methods = list(
+    gini = list(
+        name = "The Gini coefficient",
+        descr = "The Gini coefficient is a measure of statistical dispersion intended to represent the income or wealth distribution of a nation's residents, and is the most commonly used measurement of inequality."
+    ),
+    gini.simp = list(
+        name = "The Gini-Simpson index",
+        descr = "The Gini-Simpson index is a measure of diversity. It is one of the most commonly used in ecology. It is also known as the Simpson index, the Simpson concentration index, the Simpson dominance index, or the Simpson diversity index."
+    ),
+    inv.simp = list(
+        name = "The inverse Simpson index",
+        descr = "It is the effective number of types that is obtained when
+                 the weighted arithmetic mean is used to quantify average
+                 proportional abundance of types in the dataset of interest."
+    ),
+    div = list(
+        name = "The true diversity",
+        descr = "It refers to the number of equally abundant types needed
+                 for the average proportional abundance of the types to
+                 equal that observed in the dataset of interest where all
+                 types may not be equally abundant."
+    )
+)
 sample_diversity = function(name) {
-    print(paste0("- Working on sample diversity: ", name))
-    odir = file.path(outdir, "SampleDiversity", name)
+    log_info("- Working on sample diversity: {name}")
+    odir = file.path(outdir, "SampleDiversity", slugify(name))
     dir.create(odir, showWarnings = FALSE, recursive = TRUE)
     case = sample_diversity_cases[[name]]
@@ -191,8 +282,21 @@ sample_diversity = function(name) {
     }
     outfile = file.path(odir, "diversity.txt")
     outplot = file.path(odir, "diversity.png")
+    outplot_pdf = file.path(odir, "diversity.pdf")
     div = repDiversity(data, .method = case$method)
-    write.table(div, outfile, row.names=TRUE, col.names=TRUE, quote=FALSE, sep="\t")
+    write.table(
+        if (ncol(div) == 1) {
+            as.data.frame(div) %>% rownames_to_column("Sample")
+        } else {
+            div
+        },
+        outfile,
+        row.names=TRUE,
+        col.names=TRUE,
+        quote=FALSE,
+        sep="\t"
+    )
     if (case$method == "gini") {
         div = as.data.frame(div) %>% rownames_to_column("Sample")
         colnames(div)[2] = "gini"
@@ -201,7 +305,8 @@ sample_diversity = function(name) {
         mapping = aes(x = Sample, y = gini, fill = Sample)
         ggs = c(
             "theme_prism(axis_text_angle = 90)",
-            "labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')"
+            "labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')",
+            "scale_fill_biopipen()"
         )
         if (is.null(case$by) || length(case$by) == 0) {
@@ -223,9 +328,8 @@ sample_diversity = function(name) {
             args = list(mapping = mapping),
             ggs = ggs,
             devpars = case$devpars,
-            outfile = outplot
+            outfile = c(outplot, outplot_pdf)
         )
     } else {
         if (is.null(case$by) || length(case$by) == 0) {
             p = vis(div)
@@ -242,7 +346,51 @@ sample_diversity = function(name) {
         )
         print(p)
         dev.off()
+        pdf(
+            outplot_pdf,
+            width=case$devpars$width / case$devpars$res,
+            height=case$devpars$height / case$devpars$res
+        )
+        print(p)
+        dev.off()
     }
+    add_report(
+        list(
+            ui = "flat",
+            label = "Diversity Plot",
+            contents = list(
+                list(
+                    kind = "descr",
+                    content = paste(
+                        div_methods[[case$method]]$name,
+                        ifelse(
+                            is.null(case$by) || length(case$by) == 0,
+                            "",
+                            paste0(" grouped by ", paste(case$by, collapse = ", "))
+                        ),
+                        div_methods[[case$method]]$descr
+                    )
+                ),
+                list(
+                    kind = "image",
+                    src = outplot,
+                    download = outplot_pdf
+                )
+            )
+        ),
+        list(
+            ui = "flat",
+            label = "Diversity Table",
+            contents = list(
+                list(kind = "table", src = outfile, data = list(index_col = 0))
+            )
+        ),
+        ui = "tabs",
+        h2 = ifelse(name == "DEFAULT", "#", name),
+        h1 = "Sample Diversity using TCR clusters"
+    )
 }
@@ -250,14 +398,20 @@ sample_diversity = function(name) {
     # main
     # --------------------------------------------------
     # Load immunarch data
+    log_info("Loading immunarch data ...")
     immdata = readRDS(immfile)
     # Cluster size distribution
+    log_info("Performing cluster size distribution analysis ...")
     sapply(names(cluster_size_cases), cluster_size_distribution)
     # Shared clusters
+    log_info("Performing shared clusters analysis ...")
     sapply(names(shared_clusters_cases), shared_clusters)
     # Diversity
+    log_info("Performing sample diversity analysis ...")
     sapply(names(sample_diversity_cases), sample_diversity)
+    save_report(joboutdir)
 }

biopipen/scripts/tcr/TCRDock.py ADDED Viewed

@@ -0,0 +1,110 @@
+from __future__ import annotations
+import os
+import sys
+from pathlib import Path
+import rtoml
+import pandas as pd
+from tempfile import gettempdir
+from biopipen.utils.misc import logger, run_command
+configfile: str = {{in.configfile | quote}}  # pyright: ignore  # noqa
+outdir = Path({{out.outdir | quote}})  # pyright: ignore
+envs: dict = {{envs | dict | repr}}  # pyright: ignore
+python: str | list[str] = sys.executable
+args = envs.copy()
+config = rtoml.load(Path(configfile))
+args.update(config)
+model_name = args.pop("model_name")
+model_file = Path(args.pop("model_file"))
+data_dir = args.pop("data_dir", None)
+tcrdock: Path | str | None = args.pop("tcrdock", None)
+tmpdir: str = args.pop("tmpdir", gettempdir())
+python = args.pop("python", python)
+if not isinstance(python, (list, tuple)):
+    python = [python]
+if not data_dir:
+    raise ValueError("`envs.data_dir` is required")
+if not tcrdock:
+    logger.info("- `envs.tcrdock` is not provided, cloning the repository ... ")
+    repo_url = "https://github.com/phbradley/TCRdock"
+    commit_id = "c5a7af42eeb0c2a4492a4d4fe803f1f9aafb6193"
+    branch = "main"
+    from git import Repo
+    repo = Repo.clone_from(repo_url, tmpdir, branch=branch, no_checkout=True)
+    repo.git.checkout(commit_id)
+    tcrdock = Path(tmpdir) / "TCRdock"
+    logger.info("- Running download_blast.py ...")
+    cmd = [
+        *python,
+        tcrdock / "download_blast.py",
+    ]
+    run_command(cmd, fg=True, cwd=str(tcrdock))
+tcrdock = str(tcrdock)
+if not model_file.is_absolute():
+    model_file = Path(data_dir) / "params" / model_file
+os.environ['TF_FORCE_UNIFIED_MEMORY'] = '1'
+os.environ['XLA_PYTHON_CLIENT_MEM_FRACTION'] = '4.0'
+logger.info("- Composing targets file ... ")
+targets_file = outdir / "user_targets.tsv"
+targets = pd.DataFrame(
+    [
+        dict(
+            organism=args['organism'],
+            mhc_class=args['mhc_class'],
+            mhc=args['mhc'],
+            peptide=args['peptide'],
+            va=args['va'],
+            ja=args['ja'],
+            cdr3a=args['cdr3a'],
+            vb=args['vb'],
+            jb=args['jb'],
+            cdr3b=args['cdr3b'],
+        )
+    ]
+)
+targets.to_csv(targets_file, sep="\t", index=False)
+logger.info("- Generating inputs for AlphaFold modeling ... ")
+cmd = [
+    *python,
+    tcrdock + "/setup_for_alphafold.py",
+    "--targets_tsvfile", targets_file,
+    "--output_dir", outdir / "user_output",
+    "--new_docking",
+]
+run_command(cmd, fg=True)
+logger.info("- Running AlphaFold modeling ... ")
+cmd = [
+    *python,
+    tcrdock + "/run_prediction.py",
+    "--verbose",
+    "--targets", outdir / "user_output/targets.tsv",
+    "--outfile_prefix", f"{outdir}/{args['peptide']}",
+    "--model_names", model_name,
+    "--data_dir", data_dir,
+    "--model_params_files", model_file,
+]
+run_command(cmd, fg=True, env={"XLA_FLAGS": "--xla_gpu_force_compilation_parallelism=1"})
+logger.info("- Calculating the PAE ... ")
+cmd = [
+    *python,
+    tcrdock + "/add_pmhc_tcr_pae_to_tsvfile.py",
+    "--infile", f"{outdir}/{args['peptide']}_final.tsv",
+    "--outfile", f"{outdir}/{args['peptide']}_w_pae.tsv",
+]
+run_command(cmd, fg=True)

biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl