biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
|
|
3
3
|
library(tibble)
|
|
4
4
|
library(tidyr)
|
|
5
5
|
library(dplyr)
|
|
@@ -7,12 +7,14 @@ library(rlang)
|
|
|
7
7
|
library(immunarch)
|
|
8
8
|
library(ggprism)
|
|
9
9
|
|
|
10
|
-
immfile = {{in.immfile |
|
|
11
|
-
outdir = {{out.outdir |
|
|
10
|
+
immfile = {{in.immfile | r}}
|
|
11
|
+
outdir = {{out.outdir | r}}
|
|
12
12
|
cluster_size_envs = {{envs.cluster_size | r}}
|
|
13
13
|
shared_clusters_envs = {{envs.shared_clusters | r}}
|
|
14
14
|
sample_diversity_envs = {{envs.sample_diversity | r}}
|
|
15
|
+
joboutdir = {{job.outdir | r}}
|
|
15
16
|
|
|
17
|
+
log_info("Expanding analysis cases ...")
|
|
16
18
|
expand_cases = function(envs) {
|
|
17
19
|
cases = envs$cases
|
|
18
20
|
envs$cases = NULL
|
|
@@ -51,8 +53,9 @@ shared_clusters_cases = expand_cases(shared_clusters_envs)
|
|
|
51
53
|
sample_diversity_cases = expand_cases(sample_diversity_envs)
|
|
52
54
|
|
|
53
55
|
cluster_size_distribution = function(name) {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
+
log_info("- Working on cluster size distribution: {name}")
|
|
57
|
+
|
|
58
|
+
odir = file.path(outdir, "ClusterSizeDistribution", slugify(name))
|
|
56
59
|
dir.create(odir, showWarnings = FALSE, recursive = TRUE)
|
|
57
60
|
case = cluster_size_cases[[name]]
|
|
58
61
|
|
|
@@ -66,6 +69,7 @@ cluster_size_distribution = function(name) {
|
|
|
66
69
|
|
|
67
70
|
outfile = file.path(odir, "cluster_size_distribution.txt")
|
|
68
71
|
outplot = file.path(odir, "cluster_size_distribution.png")
|
|
72
|
+
outplot_pdf = file.path(odir, "cluster_size_distribution.pdf")
|
|
69
73
|
write.table(clsizes, outfile, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
|
|
70
74
|
|
|
71
75
|
plotGG(
|
|
@@ -75,16 +79,29 @@ cluster_size_distribution = function(name) {
|
|
|
75
79
|
ggs = c(
|
|
76
80
|
"theme_prism()",
|
|
77
81
|
"scale_y_continuous(trans='log10')",
|
|
78
|
-
"labs(x='TCR cluster size', y='Count')"
|
|
82
|
+
"labs(x='TCR cluster size', y='Count')",
|
|
83
|
+
"scale_fill_biopipen()"
|
|
79
84
|
),
|
|
80
85
|
devpars = case$devpars,
|
|
81
|
-
outfile = outplot
|
|
86
|
+
outfile = c(outplot, outplot_pdf)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
add_report(
|
|
90
|
+
list(
|
|
91
|
+
src = outplot,
|
|
92
|
+
name = ifelse(name == "DEFAULT", FALSE, name),
|
|
93
|
+
descr = paste0("Cluster size distribution for each ", case$by),
|
|
94
|
+
download = outplot_pdf
|
|
95
|
+
),
|
|
96
|
+
ui = "table_of_images",
|
|
97
|
+
h1 = "Cluster Size Distribution"
|
|
82
98
|
)
|
|
83
99
|
}
|
|
84
100
|
|
|
85
101
|
shared_clusters = function(name) {
|
|
86
|
-
|
|
87
|
-
|
|
102
|
+
log_info("- Working on shared clusters: {name}")
|
|
103
|
+
|
|
104
|
+
odir = file.path(outdir, "SharedClusters", slugify(name))
|
|
88
105
|
dir.create(odir, showWarnings = FALSE, recursive = TRUE)
|
|
89
106
|
case = shared_clusters_cases[[name]]
|
|
90
107
|
if (!is.null(case$grouping)) {
|
|
@@ -115,13 +132,40 @@ shared_clusters = function(name) {
|
|
|
115
132
|
row.names=TRUE, col.names=TRUE, quote=FALSE, sep="\t"
|
|
116
133
|
)
|
|
117
134
|
|
|
135
|
+
if (!is.null(case$sample_order) && length(case$sample_order) > 0) {
|
|
136
|
+
if (length(case$sample_order) == 1) {
|
|
137
|
+
case$sample_order = trimws(strsplit(case$sample_order, ",")[[1]])
|
|
138
|
+
}
|
|
139
|
+
nonexisting = setdiff(case$sample_order, samples)
|
|
140
|
+
if (length(nonexisting) > 0) {
|
|
141
|
+
stop(paste(" The following samples do not exist in `sample_order`:", paste(nonexisting, collapse=", ")))
|
|
142
|
+
}
|
|
143
|
+
plotdata = plotdata[, case$sample_order, drop=FALSE]
|
|
144
|
+
}
|
|
145
|
+
|
|
118
146
|
if (is.null(case$heatmap_meta) || length(case$heatmap_meta) == 0) {
|
|
119
147
|
anno = NULL
|
|
120
148
|
} else {
|
|
121
|
-
anno = as.list(
|
|
149
|
+
anno = as.list(
|
|
150
|
+
immdata$meta[
|
|
151
|
+
match(colnames(plotdata), immdata$meta$Sample),
|
|
152
|
+
case$heatmap_meta,
|
|
153
|
+
drop=FALSE
|
|
154
|
+
])
|
|
122
155
|
anno = do_call(ComplexHeatmap::HeatmapAnnotation, anno)
|
|
123
156
|
}
|
|
124
157
|
|
|
158
|
+
cluster_rows = case$cluster_rows && nrow(plotdata) > 2
|
|
159
|
+
col_samples = colnames(plotdata)
|
|
160
|
+
if (!cluster_rows) {
|
|
161
|
+
plotdata = plotdata[col_samples, ]
|
|
162
|
+
row_samples = col_samples
|
|
163
|
+
} else {
|
|
164
|
+
row_samples = samples
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
hmplot = file.path(odir, "shared_clusters.png")
|
|
168
|
+
hmplot_pdf = file.path(odir, "shared_clusters.pdf")
|
|
125
169
|
# Plot heatmap
|
|
126
170
|
plotHeatmap(
|
|
127
171
|
plotdata,
|
|
@@ -129,22 +173,32 @@ shared_clusters = function(name) {
|
|
|
129
173
|
name = "Shared TCR Clusters",
|
|
130
174
|
col = c("#ffe1e1", "red3"),
|
|
131
175
|
cluster_columns = FALSE,
|
|
132
|
-
cluster_rows =
|
|
176
|
+
cluster_rows = cluster_rows,
|
|
133
177
|
top_annotation = anno,
|
|
134
178
|
cell_fun = if (
|
|
135
179
|
is.null(case$numbers_on_heatmap) || !case$numbers_on_heatmap
|
|
136
180
|
) NULL else function(j, i, x, y, width, height, fill) {
|
|
137
|
-
grid.text(
|
|
181
|
+
grid.text(row_samples[i], col_samples[j], x, y, gp = gpar(fontsize = 10))
|
|
138
182
|
}
|
|
139
183
|
),
|
|
140
184
|
devpars = case$devpars,
|
|
141
|
-
outfile =
|
|
185
|
+
outfile = c(hmplot, hmplot_pdf)
|
|
186
|
+
)
|
|
142
187
|
|
|
188
|
+
add_report(
|
|
189
|
+
list(
|
|
190
|
+
src = hmplot,
|
|
191
|
+
download = hmplot_pdf,
|
|
192
|
+
name = ifelse(name == "DEFAULT", FALSE, name),
|
|
193
|
+
descr = paste0("Shared TCR clusters across samples")
|
|
194
|
+
),
|
|
195
|
+
ui = "table_of_images",
|
|
196
|
+
h1 = "Shared TCR Clusters"
|
|
143
197
|
)
|
|
144
198
|
}
|
|
145
199
|
|
|
146
200
|
shared_clusters_by_grouping = function(name) {
|
|
147
|
-
odir = file.path(outdir, "SharedClusters", name)
|
|
201
|
+
odir = file.path(outdir, "SharedClusters", slugify(name))
|
|
148
202
|
case = shared_clusters_cases[[name]]
|
|
149
203
|
|
|
150
204
|
data = list()
|
|
@@ -170,18 +224,55 @@ shared_clusters_by_grouping = function(name) {
|
|
|
170
224
|
}
|
|
171
225
|
|
|
172
226
|
outfile = file.path(odir, "shared_clusters.png")
|
|
227
|
+
outfile_pdf = file.path(odir, "shared_clusters.pdf")
|
|
173
228
|
plotVenn(
|
|
174
229
|
data,
|
|
175
230
|
ggs = 'ggtitle("Shared TCR Clusters")',
|
|
176
231
|
devpars = case$devpars,
|
|
177
|
-
outfile = outfile
|
|
232
|
+
outfile = c(outfile, outfile_pdf)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
add_report(
|
|
236
|
+
list(
|
|
237
|
+
src = outfile,
|
|
238
|
+
download = outfile_pdf,
|
|
239
|
+
name = ifelse(name == "DEFAULT", FALSE, name),
|
|
240
|
+
descr = paste0("Shared TCR clusters across ", grouping)
|
|
241
|
+
),
|
|
242
|
+
ui = "table_of_images",
|
|
243
|
+
h1 = "Shared TCR Clusters"
|
|
178
244
|
)
|
|
179
245
|
}
|
|
180
246
|
|
|
181
247
|
|
|
248
|
+
div_methods = list(
|
|
249
|
+
gini = list(
|
|
250
|
+
name = "The Gini coefficient",
|
|
251
|
+
descr = "The Gini coefficient is a measure of statistical dispersion intended to represent the income or wealth distribution of a nation's residents, and is the most commonly used measurement of inequality."
|
|
252
|
+
),
|
|
253
|
+
gini.simp = list(
|
|
254
|
+
name = "The Gini-Simpson index",
|
|
255
|
+
descr = "The Gini-Simpson index is a measure of diversity. It is one of the most commonly used in ecology. It is also known as the Simpson index, the Simpson concentration index, the Simpson dominance index, or the Simpson diversity index."
|
|
256
|
+
),
|
|
257
|
+
inv.simp = list(
|
|
258
|
+
name = "The inverse Simpson index",
|
|
259
|
+
descr = "It is the effective number of types that is obtained when
|
|
260
|
+
the weighted arithmetic mean is used to quantify average
|
|
261
|
+
proportional abundance of types in the dataset of interest."
|
|
262
|
+
),
|
|
263
|
+
div = list(
|
|
264
|
+
name = "The true diversity",
|
|
265
|
+
descr = "It refers to the number of equally abundant types needed
|
|
266
|
+
for the average proportional abundance of the types to
|
|
267
|
+
equal that observed in the dataset of interest where all
|
|
268
|
+
types may not be equally abundant."
|
|
269
|
+
)
|
|
270
|
+
)
|
|
271
|
+
|
|
182
272
|
sample_diversity = function(name) {
|
|
183
|
-
|
|
184
|
-
|
|
273
|
+
log_info("- Working on sample diversity: {name}")
|
|
274
|
+
|
|
275
|
+
odir = file.path(outdir, "SampleDiversity", slugify(name))
|
|
185
276
|
dir.create(odir, showWarnings = FALSE, recursive = TRUE)
|
|
186
277
|
case = sample_diversity_cases[[name]]
|
|
187
278
|
|
|
@@ -191,8 +282,21 @@ sample_diversity = function(name) {
|
|
|
191
282
|
}
|
|
192
283
|
outfile = file.path(odir, "diversity.txt")
|
|
193
284
|
outplot = file.path(odir, "diversity.png")
|
|
285
|
+
outplot_pdf = file.path(odir, "diversity.pdf")
|
|
194
286
|
div = repDiversity(data, .method = case$method)
|
|
195
|
-
write.table(
|
|
287
|
+
write.table(
|
|
288
|
+
if (ncol(div) == 1) {
|
|
289
|
+
as.data.frame(div) %>% rownames_to_column("Sample")
|
|
290
|
+
} else {
|
|
291
|
+
div
|
|
292
|
+
},
|
|
293
|
+
outfile,
|
|
294
|
+
row.names=TRUE,
|
|
295
|
+
col.names=TRUE,
|
|
296
|
+
quote=FALSE,
|
|
297
|
+
sep="\t"
|
|
298
|
+
)
|
|
299
|
+
|
|
196
300
|
if (case$method == "gini") {
|
|
197
301
|
div = as.data.frame(div) %>% rownames_to_column("Sample")
|
|
198
302
|
colnames(div)[2] = "gini"
|
|
@@ -201,7 +305,8 @@ sample_diversity = function(name) {
|
|
|
201
305
|
mapping = aes(x = Sample, y = gini, fill = Sample)
|
|
202
306
|
ggs = c(
|
|
203
307
|
"theme_prism(axis_text_angle = 90)",
|
|
204
|
-
"labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')"
|
|
308
|
+
"labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')",
|
|
309
|
+
"scale_fill_biopipen()"
|
|
205
310
|
)
|
|
206
311
|
if (is.null(case$by) || length(case$by) == 0) {
|
|
207
312
|
|
|
@@ -223,9 +328,8 @@ sample_diversity = function(name) {
|
|
|
223
328
|
args = list(mapping = mapping),
|
|
224
329
|
ggs = ggs,
|
|
225
330
|
devpars = case$devpars,
|
|
226
|
-
outfile = outplot
|
|
331
|
+
outfile = c(outplot, outplot_pdf)
|
|
227
332
|
)
|
|
228
|
-
|
|
229
333
|
} else {
|
|
230
334
|
if (is.null(case$by) || length(case$by) == 0) {
|
|
231
335
|
p = vis(div)
|
|
@@ -242,7 +346,51 @@ sample_diversity = function(name) {
|
|
|
242
346
|
)
|
|
243
347
|
print(p)
|
|
244
348
|
dev.off()
|
|
349
|
+
|
|
350
|
+
pdf(
|
|
351
|
+
outplot_pdf,
|
|
352
|
+
width=case$devpars$width / case$devpars$res,
|
|
353
|
+
height=case$devpars$height / case$devpars$res
|
|
354
|
+
)
|
|
355
|
+
print(p)
|
|
356
|
+
dev.off()
|
|
245
357
|
}
|
|
358
|
+
|
|
359
|
+
add_report(
|
|
360
|
+
list(
|
|
361
|
+
ui = "flat",
|
|
362
|
+
label = "Diversity Plot",
|
|
363
|
+
contents = list(
|
|
364
|
+
list(
|
|
365
|
+
kind = "descr",
|
|
366
|
+
content = paste(
|
|
367
|
+
div_methods[[case$method]]$name,
|
|
368
|
+
ifelse(
|
|
369
|
+
is.null(case$by) || length(case$by) == 0,
|
|
370
|
+
"",
|
|
371
|
+
paste0(" grouped by ", paste(case$by, collapse = ", "))
|
|
372
|
+
),
|
|
373
|
+
div_methods[[case$method]]$descr
|
|
374
|
+
)
|
|
375
|
+
),
|
|
376
|
+
list(
|
|
377
|
+
kind = "image",
|
|
378
|
+
src = outplot,
|
|
379
|
+
download = outplot_pdf
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
),
|
|
383
|
+
list(
|
|
384
|
+
ui = "flat",
|
|
385
|
+
label = "Diversity Table",
|
|
386
|
+
contents = list(
|
|
387
|
+
list(kind = "table", src = outfile, data = list(index_col = 0))
|
|
388
|
+
)
|
|
389
|
+
),
|
|
390
|
+
ui = "tabs",
|
|
391
|
+
h2 = ifelse(name == "DEFAULT", "#", name),
|
|
392
|
+
h1 = "Sample Diversity using TCR clusters"
|
|
393
|
+
)
|
|
246
394
|
}
|
|
247
395
|
|
|
248
396
|
|
|
@@ -250,14 +398,20 @@ sample_diversity = function(name) {
|
|
|
250
398
|
# main
|
|
251
399
|
# --------------------------------------------------
|
|
252
400
|
# Load immunarch data
|
|
401
|
+
log_info("Loading immunarch data ...")
|
|
253
402
|
immdata = readRDS(immfile)
|
|
254
403
|
|
|
255
404
|
# Cluster size distribution
|
|
405
|
+
log_info("Performing cluster size distribution analysis ...")
|
|
256
406
|
sapply(names(cluster_size_cases), cluster_size_distribution)
|
|
257
407
|
|
|
258
408
|
# Shared clusters
|
|
409
|
+
log_info("Performing shared clusters analysis ...")
|
|
259
410
|
sapply(names(shared_clusters_cases), shared_clusters)
|
|
260
411
|
|
|
261
412
|
# Diversity
|
|
413
|
+
log_info("Performing sample diversity analysis ...")
|
|
262
414
|
sapply(names(sample_diversity_cases), sample_diversity)
|
|
415
|
+
|
|
416
|
+
save_report(joboutdir)
|
|
263
417
|
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import rtoml
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from tempfile import gettempdir
|
|
10
|
+
from biopipen.utils.misc import logger, run_command
|
|
11
|
+
|
|
12
|
+
configfile: str = {{in.configfile | quote}} # pyright: ignore # noqa
|
|
13
|
+
outdir = Path({{out.outdir | quote}}) # pyright: ignore
|
|
14
|
+
envs: dict = {{envs | dict | repr}} # pyright: ignore
|
|
15
|
+
python: str | list[str] = sys.executable
|
|
16
|
+
|
|
17
|
+
args = envs.copy()
|
|
18
|
+
config = rtoml.load(Path(configfile))
|
|
19
|
+
args.update(config)
|
|
20
|
+
model_name = args.pop("model_name")
|
|
21
|
+
model_file = Path(args.pop("model_file"))
|
|
22
|
+
data_dir = args.pop("data_dir", None)
|
|
23
|
+
tcrdock: Path | str | None = args.pop("tcrdock", None)
|
|
24
|
+
tmpdir: str = args.pop("tmpdir", gettempdir())
|
|
25
|
+
python = args.pop("python", python)
|
|
26
|
+
|
|
27
|
+
if not isinstance(python, (list, tuple)):
|
|
28
|
+
python = [python]
|
|
29
|
+
|
|
30
|
+
if not data_dir:
|
|
31
|
+
raise ValueError("`envs.data_dir` is required")
|
|
32
|
+
|
|
33
|
+
if not tcrdock:
|
|
34
|
+
logger.info("- `envs.tcrdock` is not provided, cloning the repository ... ")
|
|
35
|
+
repo_url = "https://github.com/phbradley/TCRdock"
|
|
36
|
+
commit_id = "c5a7af42eeb0c2a4492a4d4fe803f1f9aafb6193"
|
|
37
|
+
branch = "main"
|
|
38
|
+
|
|
39
|
+
from git import Repo
|
|
40
|
+
repo = Repo.clone_from(repo_url, tmpdir, branch=branch, no_checkout=True)
|
|
41
|
+
repo.git.checkout(commit_id)
|
|
42
|
+
tcrdock = Path(tmpdir) / "TCRdock"
|
|
43
|
+
|
|
44
|
+
logger.info("- Running download_blast.py ...")
|
|
45
|
+
cmd = [
|
|
46
|
+
*python,
|
|
47
|
+
tcrdock / "download_blast.py",
|
|
48
|
+
]
|
|
49
|
+
run_command(cmd, fg=True, cwd=str(tcrdock))
|
|
50
|
+
|
|
51
|
+
tcrdock = str(tcrdock)
|
|
52
|
+
|
|
53
|
+
if not model_file.is_absolute():
|
|
54
|
+
model_file = Path(data_dir) / "params" / model_file
|
|
55
|
+
|
|
56
|
+
os.environ['TF_FORCE_UNIFIED_MEMORY'] = '1'
|
|
57
|
+
os.environ['XLA_PYTHON_CLIENT_MEM_FRACTION'] = '4.0'
|
|
58
|
+
|
|
59
|
+
logger.info("- Composing targets file ... ")
|
|
60
|
+
targets_file = outdir / "user_targets.tsv"
|
|
61
|
+
targets = pd.DataFrame(
|
|
62
|
+
[
|
|
63
|
+
dict(
|
|
64
|
+
organism=args['organism'],
|
|
65
|
+
mhc_class=args['mhc_class'],
|
|
66
|
+
mhc=args['mhc'],
|
|
67
|
+
peptide=args['peptide'],
|
|
68
|
+
va=args['va'],
|
|
69
|
+
ja=args['ja'],
|
|
70
|
+
cdr3a=args['cdr3a'],
|
|
71
|
+
vb=args['vb'],
|
|
72
|
+
jb=args['jb'],
|
|
73
|
+
cdr3b=args['cdr3b'],
|
|
74
|
+
)
|
|
75
|
+
]
|
|
76
|
+
)
|
|
77
|
+
targets.to_csv(targets_file, sep="\t", index=False)
|
|
78
|
+
|
|
79
|
+
logger.info("- Generating inputs for AlphaFold modeling ... ")
|
|
80
|
+
cmd = [
|
|
81
|
+
*python,
|
|
82
|
+
tcrdock + "/setup_for_alphafold.py",
|
|
83
|
+
"--targets_tsvfile", targets_file,
|
|
84
|
+
"--output_dir", outdir / "user_output",
|
|
85
|
+
"--new_docking",
|
|
86
|
+
]
|
|
87
|
+
run_command(cmd, fg=True)
|
|
88
|
+
|
|
89
|
+
logger.info("- Running AlphaFold modeling ... ")
|
|
90
|
+
cmd = [
|
|
91
|
+
*python,
|
|
92
|
+
tcrdock + "/run_prediction.py",
|
|
93
|
+
"--verbose",
|
|
94
|
+
"--targets", outdir / "user_output/targets.tsv",
|
|
95
|
+
"--outfile_prefix", f"{outdir}/{args['peptide']}",
|
|
96
|
+
"--model_names", model_name,
|
|
97
|
+
"--data_dir", data_dir,
|
|
98
|
+
"--model_params_files", model_file,
|
|
99
|
+
]
|
|
100
|
+
run_command(cmd, fg=True, env={"XLA_FLAGS": "--xla_gpu_force_compilation_parallelism=1"})
|
|
101
|
+
|
|
102
|
+
logger.info("- Calculating the PAE ... ")
|
|
103
|
+
cmd = [
|
|
104
|
+
*python,
|
|
105
|
+
tcrdock + "/add_pmhc_tcr_pae_to_tsvfile.py",
|
|
106
|
+
"--infile", f"{outdir}/{args['peptide']}_final.tsv",
|
|
107
|
+
"--outfile", f"{outdir}/{args['peptide']}_w_pae.tsv",
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
run_command(cmd, fg=True)
|