biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -1,5 +1,5 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/plot.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
3
3
  library(tibble)
4
4
  library(tidyr)
5
5
  library(dplyr)
@@ -7,12 +7,14 @@ library(rlang)
7
7
  library(immunarch)
8
8
  library(ggprism)
9
9
 
10
- immfile = {{in.immfile | quote}}
11
- outdir = {{out.outdir | quote}}
10
+ immfile = {{in.immfile | r}}
11
+ outdir = {{out.outdir | r}}
12
12
  cluster_size_envs = {{envs.cluster_size | r}}
13
13
  shared_clusters_envs = {{envs.shared_clusters | r}}
14
14
  sample_diversity_envs = {{envs.sample_diversity | r}}
15
+ joboutdir = {{job.outdir | r}}
15
16
 
17
+ log_info("Expanding analysis cases ...")
16
18
  expand_cases = function(envs) {
17
19
  cases = envs$cases
18
20
  envs$cases = NULL
@@ -51,8 +53,9 @@ shared_clusters_cases = expand_cases(shared_clusters_envs)
51
53
  sample_diversity_cases = expand_cases(sample_diversity_envs)
52
54
 
53
55
  cluster_size_distribution = function(name) {
54
- print(paste0("- Working on cluster size distribution: ", name))
55
- odir = file.path(outdir, "ClusterSizeDistribution", name)
56
+ log_info("- Working on cluster size distribution: {name}")
57
+
58
+ odir = file.path(outdir, "ClusterSizeDistribution", slugify(name))
56
59
  dir.create(odir, showWarnings = FALSE, recursive = TRUE)
57
60
  case = cluster_size_cases[[name]]
58
61
 
@@ -66,6 +69,7 @@ cluster_size_distribution = function(name) {
66
69
 
67
70
  outfile = file.path(odir, "cluster_size_distribution.txt")
68
71
  outplot = file.path(odir, "cluster_size_distribution.png")
72
+ outplot_pdf = file.path(odir, "cluster_size_distribution.pdf")
69
73
  write.table(clsizes, outfile, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
70
74
 
71
75
  plotGG(
@@ -75,16 +79,29 @@ cluster_size_distribution = function(name) {
75
79
  ggs = c(
76
80
  "theme_prism()",
77
81
  "scale_y_continuous(trans='log10')",
78
- "labs(x='TCR cluster size', y='Count')"
82
+ "labs(x='TCR cluster size', y='Count')",
83
+ "scale_fill_biopipen()"
79
84
  ),
80
85
  devpars = case$devpars,
81
- outfile = outplot
86
+ outfile = c(outplot, outplot_pdf)
87
+ )
88
+
89
+ add_report(
90
+ list(
91
+ src = outplot,
92
+ name = ifelse(name == "DEFAULT", FALSE, name),
93
+ descr = paste0("Cluster size distribution for each ", case$by),
94
+ download = outplot_pdf
95
+ ),
96
+ ui = "table_of_images",
97
+ h1 = "Cluster Size Distribution"
82
98
  )
83
99
  }
84
100
 
85
101
  shared_clusters = function(name) {
86
- print(paste0("- Working on shared clusters: ", name))
87
- odir = file.path(outdir, "SharedClusters", name)
102
+ log_info("- Working on shared clusters: {name}")
103
+
104
+ odir = file.path(outdir, "SharedClusters", slugify(name))
88
105
  dir.create(odir, showWarnings = FALSE, recursive = TRUE)
89
106
  case = shared_clusters_cases[[name]]
90
107
  if (!is.null(case$grouping)) {
@@ -115,13 +132,40 @@ shared_clusters = function(name) {
115
132
  row.names=TRUE, col.names=TRUE, quote=FALSE, sep="\t"
116
133
  )
117
134
 
135
+ if (!is.null(case$sample_order) && length(case$sample_order) > 0) {
136
+ if (length(case$sample_order) == 1) {
137
+ case$sample_order = trimws(strsplit(case$sample_order, ",")[[1]])
138
+ }
139
+ nonexisting = setdiff(case$sample_order, samples)
140
+ if (length(nonexisting) > 0) {
141
+ stop(paste(" The following samples do not exist in `sample_order`:", paste(nonexisting, collapse=", ")))
142
+ }
143
+ plotdata = plotdata[, case$sample_order, drop=FALSE]
144
+ }
145
+
118
146
  if (is.null(case$heatmap_meta) || length(case$heatmap_meta) == 0) {
119
147
  anno = NULL
120
148
  } else {
121
- anno = as.list(immdata$meta[, case$heatmap_meta, drop=FALSE])
149
+ anno = as.list(
150
+ immdata$meta[
151
+ match(colnames(plotdata), immdata$meta$Sample),
152
+ case$heatmap_meta,
153
+ drop=FALSE
154
+ ])
122
155
  anno = do_call(ComplexHeatmap::HeatmapAnnotation, anno)
123
156
  }
124
157
 
158
+ cluster_rows = case$cluster_rows && nrow(plotdata) > 2
159
+ col_samples = colnames(plotdata)
160
+ if (!cluster_rows) {
161
+ plotdata = plotdata[col_samples, ]
162
+ row_samples = col_samples
163
+ } else {
164
+ row_samples = samples
165
+ }
166
+
167
+ hmplot = file.path(odir, "shared_clusters.png")
168
+ hmplot_pdf = file.path(odir, "shared_clusters.pdf")
125
169
  # Plot heatmap
126
170
  plotHeatmap(
127
171
  plotdata,
@@ -129,22 +173,32 @@ shared_clusters = function(name) {
129
173
  name = "Shared TCR Clusters",
130
174
  col = c("#ffe1e1", "red3"),
131
175
  cluster_columns = FALSE,
132
- cluster_rows = nrow(plotdata) > 2,
176
+ cluster_rows = cluster_rows,
133
177
  top_annotation = anno,
134
178
  cell_fun = if (
135
179
  is.null(case$numbers_on_heatmap) || !case$numbers_on_heatmap
136
180
  ) NULL else function(j, i, x, y, width, height, fill) {
137
- grid.text(plotdata[samples[i], samples[j]], x, y, gp = gpar(fontsize = 10))
181
+ grid.text(row_samples[i], col_samples[j], x, y, gp = gpar(fontsize = 10))
138
182
  }
139
183
  ),
140
184
  devpars = case$devpars,
141
- outfile = file.path(odir, "shared_clusters.png")
185
+ outfile = c(hmplot, hmplot_pdf)
186
+ )
142
187
 
188
+ add_report(
189
+ list(
190
+ src = hmplot,
191
+ download = hmplot_pdf,
192
+ name = ifelse(name == "DEFAULT", FALSE, name),
193
+ descr = paste0("Shared TCR clusters across samples")
194
+ ),
195
+ ui = "table_of_images",
196
+ h1 = "Shared TCR Clusters"
143
197
  )
144
198
  }
145
199
 
146
200
  shared_clusters_by_grouping = function(name) {
147
- odir = file.path(outdir, "SharedClusters", name)
201
+ odir = file.path(outdir, "SharedClusters", slugify(name))
148
202
  case = shared_clusters_cases[[name]]
149
203
 
150
204
  data = list()
@@ -170,18 +224,55 @@ shared_clusters_by_grouping = function(name) {
170
224
  }
171
225
 
172
226
  outfile = file.path(odir, "shared_clusters.png")
227
+ outfile_pdf = file.path(odir, "shared_clusters.pdf")
173
228
  plotVenn(
174
229
  data,
175
230
  ggs = 'ggtitle("Shared TCR Clusters")',
176
231
  devpars = case$devpars,
177
- outfile = outfile
232
+ outfile = c(outfile, outfile_pdf)
233
+ )
234
+
235
+ add_report(
236
+ list(
237
+ src = outfile,
238
+ download = outfile_pdf,
239
+ name = ifelse(name == "DEFAULT", FALSE, name),
240
+ descr = paste0("Shared TCR clusters across ", grouping)
241
+ ),
242
+ ui = "table_of_images",
243
+ h1 = "Shared TCR Clusters"
178
244
  )
179
245
  }
180
246
 
181
247
 
248
+ div_methods = list(
249
+ gini = list(
250
+ name = "The Gini coefficient",
251
+ descr = "The Gini coefficient is a measure of statistical dispersion intended to represent the income or wealth distribution of a nation's residents, and is the most commonly used measurement of inequality."
252
+ ),
253
+ gini.simp = list(
254
+ name = "The Gini-Simpson index",
255
+ descr = "The Gini-Simpson index is a measure of diversity. It is one of the most commonly used in ecology. It is also known as the Simpson index, the Simpson concentration index, the Simpson dominance index, or the Simpson diversity index."
256
+ ),
257
+ inv.simp = list(
258
+ name = "The inverse Simpson index",
259
+ descr = "It is the effective number of types that is obtained when
260
+ the weighted arithmetic mean is used to quantify average
261
+ proportional abundance of types in the dataset of interest."
262
+ ),
263
+ div = list(
264
+ name = "The true diversity",
265
+ descr = "It refers to the number of equally abundant types needed
266
+ for the average proportional abundance of the types to
267
+ equal that observed in the dataset of interest where all
268
+ types may not be equally abundant."
269
+ )
270
+ )
271
+
182
272
  sample_diversity = function(name) {
183
- print(paste0("- Working on sample diversity: ", name))
184
- odir = file.path(outdir, "SampleDiversity", name)
273
+ log_info("- Working on sample diversity: {name}")
274
+
275
+ odir = file.path(outdir, "SampleDiversity", slugify(name))
185
276
  dir.create(odir, showWarnings = FALSE, recursive = TRUE)
186
277
  case = sample_diversity_cases[[name]]
187
278
 
@@ -191,8 +282,21 @@ sample_diversity = function(name) {
191
282
  }
192
283
  outfile = file.path(odir, "diversity.txt")
193
284
  outplot = file.path(odir, "diversity.png")
285
+ outplot_pdf = file.path(odir, "diversity.pdf")
194
286
  div = repDiversity(data, .method = case$method)
195
- write.table(div, outfile, row.names=TRUE, col.names=TRUE, quote=FALSE, sep="\t")
287
+ write.table(
288
+ if (ncol(div) == 1) {
289
+ as.data.frame(div) %>% rownames_to_column("Sample")
290
+ } else {
291
+ div
292
+ },
293
+ outfile,
294
+ row.names=TRUE,
295
+ col.names=TRUE,
296
+ quote=FALSE,
297
+ sep="\t"
298
+ )
299
+
196
300
  if (case$method == "gini") {
197
301
  div = as.data.frame(div) %>% rownames_to_column("Sample")
198
302
  colnames(div)[2] = "gini"
@@ -201,7 +305,8 @@ sample_diversity = function(name) {
201
305
  mapping = aes(x = Sample, y = gini, fill = Sample)
202
306
  ggs = c(
203
307
  "theme_prism(axis_text_angle = 90)",
204
- "labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')"
308
+ "labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')",
309
+ "scale_fill_biopipen()"
205
310
  )
206
311
  if (is.null(case$by) || length(case$by) == 0) {
207
312
 
@@ -223,9 +328,8 @@ sample_diversity = function(name) {
223
328
  args = list(mapping = mapping),
224
329
  ggs = ggs,
225
330
  devpars = case$devpars,
226
- outfile = outplot
331
+ outfile = c(outplot, outplot_pdf)
227
332
  )
228
-
229
333
  } else {
230
334
  if (is.null(case$by) || length(case$by) == 0) {
231
335
  p = vis(div)
@@ -242,7 +346,51 @@ sample_diversity = function(name) {
242
346
  )
243
347
  print(p)
244
348
  dev.off()
349
+
350
+ pdf(
351
+ outplot_pdf,
352
+ width=case$devpars$width / case$devpars$res,
353
+ height=case$devpars$height / case$devpars$res
354
+ )
355
+ print(p)
356
+ dev.off()
245
357
  }
358
+
359
+ add_report(
360
+ list(
361
+ ui = "flat",
362
+ label = "Diversity Plot",
363
+ contents = list(
364
+ list(
365
+ kind = "descr",
366
+ content = paste(
367
+ div_methods[[case$method]]$name,
368
+ ifelse(
369
+ is.null(case$by) || length(case$by) == 0,
370
+ "",
371
+ paste0(" grouped by ", paste(case$by, collapse = ", "))
372
+ ),
373
+ div_methods[[case$method]]$descr
374
+ )
375
+ ),
376
+ list(
377
+ kind = "image",
378
+ src = outplot,
379
+ download = outplot_pdf
380
+ )
381
+ )
382
+ ),
383
+ list(
384
+ ui = "flat",
385
+ label = "Diversity Table",
386
+ contents = list(
387
+ list(kind = "table", src = outfile, data = list(index_col = 0))
388
+ )
389
+ ),
390
+ ui = "tabs",
391
+ h2 = ifelse(name == "DEFAULT", "#", name),
392
+ h1 = "Sample Diversity using TCR clusters"
393
+ )
246
394
  }
247
395
 
248
396
 
@@ -250,14 +398,20 @@ sample_diversity = function(name) {
250
398
  # main
251
399
  # --------------------------------------------------
252
400
  # Load immunarch data
401
+ log_info("Loading immunarch data ...")
253
402
  immdata = readRDS(immfile)
254
403
 
255
404
  # Cluster size distribution
405
+ log_info("Performing cluster size distribution analysis ...")
256
406
  sapply(names(cluster_size_cases), cluster_size_distribution)
257
407
 
258
408
  # Shared clusters
409
+ log_info("Performing shared clusters analysis ...")
259
410
  sapply(names(shared_clusters_cases), shared_clusters)
260
411
 
261
412
  # Diversity
413
+ log_info("Performing sample diversity analysis ...")
262
414
  sapply(names(sample_diversity_cases), sample_diversity)
415
+
416
+ save_report(joboutdir)
263
417
  }
@@ -0,0 +1,110 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ import rtoml
8
+ import pandas as pd
9
+ from tempfile import gettempdir
10
+ from biopipen.utils.misc import logger, run_command
11
+
12
+ configfile: str = {{in.configfile | quote}} # pyright: ignore # noqa
13
+ outdir = Path({{out.outdir | quote}}) # pyright: ignore
14
+ envs: dict = {{envs | dict | repr}} # pyright: ignore
15
+ python: str | list[str] = sys.executable
16
+
17
+ args = envs.copy()
18
+ config = rtoml.load(Path(configfile))
19
+ args.update(config)
20
+ model_name = args.pop("model_name")
21
+ model_file = Path(args.pop("model_file"))
22
+ data_dir = args.pop("data_dir", None)
23
+ tcrdock: Path | str | None = args.pop("tcrdock", None)
24
+ tmpdir: str = args.pop("tmpdir", gettempdir())
25
+ python = args.pop("python", python)
26
+
27
+ if not isinstance(python, (list, tuple)):
28
+ python = [python]
29
+
30
+ if not data_dir:
31
+ raise ValueError("`envs.data_dir` is required")
32
+
33
+ if not tcrdock:
34
+ logger.info("- `envs.tcrdock` is not provided, cloning the repository ... ")
35
+ repo_url = "https://github.com/phbradley/TCRdock"
36
+ commit_id = "c5a7af42eeb0c2a4492a4d4fe803f1f9aafb6193"
37
+ branch = "main"
38
+
39
+ from git import Repo
40
+ repo = Repo.clone_from(repo_url, tmpdir, branch=branch, no_checkout=True)
41
+ repo.git.checkout(commit_id)
42
+ tcrdock = Path(tmpdir) / "TCRdock"
43
+
44
+ logger.info("- Running download_blast.py ...")
45
+ cmd = [
46
+ *python,
47
+ tcrdock / "download_blast.py",
48
+ ]
49
+ run_command(cmd, fg=True, cwd=str(tcrdock))
50
+
51
+ tcrdock = str(tcrdock)
52
+
53
+ if not model_file.is_absolute():
54
+ model_file = Path(data_dir) / "params" / model_file
55
+
56
+ os.environ['TF_FORCE_UNIFIED_MEMORY'] = '1'
57
+ os.environ['XLA_PYTHON_CLIENT_MEM_FRACTION'] = '4.0'
58
+
59
+ logger.info("- Composing targets file ... ")
60
+ targets_file = outdir / "user_targets.tsv"
61
+ targets = pd.DataFrame(
62
+ [
63
+ dict(
64
+ organism=args['organism'],
65
+ mhc_class=args['mhc_class'],
66
+ mhc=args['mhc'],
67
+ peptide=args['peptide'],
68
+ va=args['va'],
69
+ ja=args['ja'],
70
+ cdr3a=args['cdr3a'],
71
+ vb=args['vb'],
72
+ jb=args['jb'],
73
+ cdr3b=args['cdr3b'],
74
+ )
75
+ ]
76
+ )
77
+ targets.to_csv(targets_file, sep="\t", index=False)
78
+
79
+ logger.info("- Generating inputs for AlphaFold modeling ... ")
80
+ cmd = [
81
+ *python,
82
+ tcrdock + "/setup_for_alphafold.py",
83
+ "--targets_tsvfile", targets_file,
84
+ "--output_dir", outdir / "user_output",
85
+ "--new_docking",
86
+ ]
87
+ run_command(cmd, fg=True)
88
+
89
+ logger.info("- Running AlphaFold modeling ... ")
90
+ cmd = [
91
+ *python,
92
+ tcrdock + "/run_prediction.py",
93
+ "--verbose",
94
+ "--targets", outdir / "user_output/targets.tsv",
95
+ "--outfile_prefix", f"{outdir}/{args['peptide']}",
96
+ "--model_names", model_name,
97
+ "--data_dir", data_dir,
98
+ "--model_params_files", model_file,
99
+ ]
100
+ run_command(cmd, fg=True, env={"XLA_FLAGS": "--xla_gpu_force_compilation_parallelism=1"})
101
+
102
+ logger.info("- Calculating the PAE ... ")
103
+ cmd = [
104
+ *python,
105
+ tcrdock + "/add_pmhc_tcr_pae_to_tsvfile.py",
106
+ "--infile", f"{outdir}/{args['peptide']}_final.tsv",
107
+ "--outfile", f"{outdir}/{args['peptide']}_w_pae.tsv",
108
+ ]
109
+
110
+ run_command(cmd, fg=True)