biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -1,467 +1,746 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/mutate_helpers.R")
3
-
4
1
  library(rlang)
5
2
  library(dplyr)
6
- library(tidyr)
7
- library(tibble)
8
3
  library(Seurat)
9
- library(enrichR)
10
- library(ggplot2)
11
- library(ggprism)
12
- library(ggrepel)
13
- library(future)
14
4
  library(tidyseurat)
15
- library(ggVennDiagram)
16
- library(UpSetR)
5
+ library(plotthis)
6
+ library(biopipen.utils)
7
+
8
+ log <- get_logger()
9
+ reporter <- get_reporter()
17
10
 
18
- log_info("Setting up EnrichR ...")
19
- setEnrichrSite("Enrichr")
11
+ srtfile <- {{ in.srtobj | r }}
12
+ outdir <- {{ out.outdir | r }}
13
+ joboutdir <- {{ job.outdir | r }}
20
14
 
21
- srtfile <- {{ in.srtobj | quote }}
22
- outdir <- {{ out.outdir | quote }}
23
15
  ncores <- {{ envs.ncores | int }}
24
16
  mutaters <- {{ envs.mutaters | r }}
25
- ident.1 <- {{ envs["ident-1"] | r }}
26
- ident.2 <- {{ envs["ident-2"] | r }}
27
- group.by <- {{ envs["group-by"] | r }}
17
+ group_by <- {{ envs.group_by | default: envs["group-by"] | default: None | r }}
18
+ ident_1 <- {{ envs.ident_1 | default: envs["ident-1"] | default: None | r }}
19
+ ident_2 <- {{ envs.ident_2 | default: envs["ident-2"] | default: None | r }}
28
20
  each <- {{ envs.each | r }}
29
- prefix_each <- {{ envs.prefix_each | r }}
30
- section <- {{ envs.section | r }}
31
21
  dbs <- {{ envs.dbs | r }}
32
- assay <- {{ envs.assay | r }}
33
22
  sigmarkers <- {{ envs.sigmarkers | r }}
34
- volcano_genes <- {{ envs.volcano_genes | r }}
23
+ enrich_style <- {{ envs.enrich_style | r }}
24
+ assay <- {{ envs.assay | r }}
25
+ error <- {{ envs.error | r }}
35
26
  subset <- {{ envs.subset | r }}
27
+ cache <- {{ envs.cache | r }}
36
28
  rest <- {{ envs.rest | r: todot="-" }}
37
- dotplot <- {{ envs.dotplot | r: todot="-" }}
38
- cases <- {{ envs.cases | r: todot="-" }}
39
- overlap <- {{ envs.overlap | r }}
40
-
41
- overlaps <- list()
29
+ allmarker_plots_defaults <- {{ envs.allmarker_plots_defaults | r }}
30
+ allmarker_plots <- {{ envs.allmarker_plots | r }}
31
+ allenrich_plots_defaults <- {{ envs.allenrich_plots_defaults | r }}
32
+ allenrich_plots <- {{ envs.allenrich_plots | r }}
33
+ marker_plots_defaults <- {{ envs.marker_plots_defaults | r }}
34
+ marker_plots <- {{ envs.marker_plots | r }}
35
+ enrich_plots_defaults <- {{ envs.enrich_plots_defaults | r }}
36
+ enrich_plots <- {{ envs.enrich_plots | r }}
37
+ overlaps_defaults <- {{ envs.overlaps_defaults | r }}
38
+ overlaps <- {{ envs.overlaps | r }}
39
+ cases <- {{ envs.cases | r: todot="-", skip=1 }}
42
40
 
43
- if (is.character(volcano_genes) && length(volcano_genes) == 1) {
44
- volcano_genes <- trimws(strsplit(volcano_genes, ",")[[1]])
45
- }
41
+ if (isTRUE(cache)) { cache <- joboutdir }
46
42
 
47
43
  set.seed(8525)
48
44
  if (ncores > 1) {
49
- options(future.globals.maxSize = 80000 * 1024^2)
45
+ options(future.globals.maxSize = Inf)
50
46
  plan(strategy = "multicore", workers = ncores)
51
47
  }
52
48
 
53
- log_info("Reading Seurat object ...")
54
- srtobj <- readRDS(srtfile)
49
+ log$info("Reading Seurat object ...")
50
+ srtobj <- read_obj(srtfile)
51
+
55
52
 
56
- log_info("Mutate meta data if needed ...")
57
53
  if (!is.null(mutaters) && length(mutaters) > 0) {
54
+ log$info("Mutating meta data ...")
58
55
  srtobj@meta.data <- srtobj@meta.data %>%
59
56
  mutate(!!!lapply(mutaters, parse_expr))
60
57
  }
61
58
 
62
- log_info("Expanding cases ...")
63
- if (is.null(cases) || length(cases) == 0) {
64
- cases <- list(
65
- DEFAULT = list(
66
- ident.1 = ident.1,
67
- ident.2 = ident.2,
68
- group.by = group.by,
69
- each = each,
70
- prefix_each = prefix_each,
71
- section = section,
72
- dbs = dbs,
73
- assay = assay,
74
- subset = subset,
75
- sigmarkers = sigmarkers,
76
- volcano_genes = volcano_genes,
77
- dotplot = dotplot,
78
- rest = rest
59
+ allmarker_plots <- lapply(allmarker_plots, function(x) {
60
+ list_update(allmarker_plots_defaults, x)
61
+ })
62
+ allenrich_plots <- lapply(allenrich_plots, function(x) {
63
+ list_update(allenrich_plots_defaults, x)
64
+ })
65
+ marker_plots <- lapply(marker_plots, function(x) {
66
+ list_update(marker_plots_defaults, x)
67
+ })
68
+ enrich_plots <- lapply(enrich_plots, function(x) {
69
+ list_update(enrich_plots_defaults, x)
70
+ })
71
+ overlaps <- lapply(overlaps, function(x) {
72
+ list_update(overlaps_defaults, x)
73
+ })
74
+
75
+ defaults <- list(
76
+ group_by = group_by,
77
+ ident_1 = ident_1,
78
+ ident_2 = ident_2,
79
+ dbs = dbs,
80
+ sigmarkers = sigmarkers,
81
+ enrich_style = enrich_style,
82
+ assay = assay %||% DefaultAssay(srtobj),
83
+ each = each,
84
+ error = error,
85
+ subset = subset,
86
+ allmarker_plots_defaults = allmarker_plots_defaults,
87
+ allmarker_plots = allmarker_plots,
88
+ allenrich_plots_defaults = allenrich_plots_defaults,
89
+ allenrich_plots = allenrich_plots,
90
+ marker_plots_defaults = marker_plots_defaults,
91
+ marker_plots = marker_plots,
92
+ enrich_plots_defaults = enrich_plots_defaults,
93
+ enrich_plots = enrich_plots,
94
+ overlaps_defaults = overlaps_defaults,
95
+ overlaps = overlaps,
96
+ cache = cache,
97
+ rest = rest
98
+ )
99
+
100
+ log$info("Expanding cases ...")
101
+
102
+ post_casing <- function(name, case) {
103
+ outcases <- list()
104
+
105
+ case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
106
+
107
+ if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
108
+ # single cases, no need to expand
109
+ if (length(case$ident_1) > 0 && length(case$overlaps) > 0) {
110
+ stop("Cannot perform 'overlaps' with a single comparison (ident-1 is set) in case '", name, "'")
111
+ }
112
+ if (length(case$ident_1) > 0 && length(case$allmarker_plots) > 0) {
113
+ stop("Cannot perform 'allmarker_plots' with a single comparison (ident-1 is set) in case '", name, "'")
114
+ }
115
+ if (length(case$ident_1) > 0 && length(case$allenrich_plots) > 0) {
116
+ stop("Cannot perform 'allenrich_plots' with a single comparison (ident-1 is set) in case '", name, "'")
117
+ }
118
+
119
+ case$allmarker_plots <- lapply(
120
+ case$allmarker_plots,
121
+ function(x) { list_update(case$allmarker_plots_defaults, x) }
79
122
  )
80
- )
81
- } else {
82
- for (name in names(cases)) {
83
- case <- list_setdefault(
84
- cases[[name]],
85
- ident.1 = ident.1,
86
- ident.2 = ident.2,
87
- group.by = group.by,
88
- each = each,
89
- prefix_each = prefix_each,
90
- section = section,
91
- dbs = dbs,
92
- assay = assay,
93
- subset = subset,
94
- sigmarkers = sigmarkers,
95
- volcano_genes = volcano_genes,
96
- dotplot = dotplot,
97
- rest = rest
123
+ case$allmarker_plots_defaults <- NULL
124
+
125
+ case$allenrich_plots <- lapply(
126
+ case$allenrich_plots,
127
+ function(x) { list_update(case$allenrich_plots_defaults, x) }
98
128
  )
99
- case$rest <- list_update(rest, case$rest)
100
- case$dotplot$devpars <- list_update(dotplot$devpars, case$dotplot$devpars)
101
- cases[[name]] <- case
102
- }
103
- }
104
- # Expand each and with ident.1
105
- # list(Cluster0 = list(each = "Sample", group.by = "seurat_clusters", ident.1 = "0"))
106
- # to
107
- # list(
108
- # `Sample-Sample1:Cluster0` = list(...),
109
- # `Sample-Sample2:Cluster0` = list(...),
110
- # ...
111
- # )
112
- # Expand each and without ident.1
113
- # list(Cluster = list(each = "Sample", group.by = "seurat_clusters"))
114
- # to
115
- # list(
116
- # `Sample-Sample1-Cluster:0` = list(...),
117
- # `Sample-Sample1-Cluster:1` = list(...),
118
- # ...
119
- # `Sample2-Cluster:0` = list(...),
120
- # `Sample2-Cluster:1` = list(...),
121
- # ...
122
- # )
123
- # If no each, and not ident.1
124
- # list(Cluster = list(group.by = "seurat_clusters"))
125
- # to
126
- # list(
127
- # `Cluster:0` = list(...),
128
- # `Cluster:1` = list(...),
129
- # ...
130
- # )
131
- # Otherwise if section is specified, the case name will be changed to `section:case`
132
-
133
- newcases <- list()
134
- for (name in names(cases)) {
135
- case <- cases[[name]]
136
- if (is.null(case$each) && !is.null(case$ident.1)) {
137
- newcases[[paste0(case$section, ":", name)]] <- case
138
- } else if (is.null(case$each)) {
139
- # is.null(case$ident.1)
140
- idents <- srtobj@meta.data %>% pull(case$group.by) %>% unique() %>% na.omit()
141
- for (ident in idents) {
142
- newcases[[paste0(name, ":", ident)]] <- case
143
- newcases[[paste0(name, ":", ident)]]$ident.1 <- ident
129
+ case$allenrich_plots_defaults <- NULL
130
+
131
+ case$marker_plots <- lapply(
132
+ case$marker_plots,
133
+ function(x) { list_update(case$marker_plots_defaults, x) }
134
+ )
135
+ case$marker_plots_defaults <- NULL
136
+
137
+ case$enrich_plots <- lapply(
138
+ case$enrich_plots,
139
+ function(x) { list_update(case$enrich_plots_defaults, x) }
140
+ )
141
+ case$enrich_plots_defaults <- NULL
142
+
143
+ case$overlaps <- lapply(
144
+ case$overlaps,
145
+ function(x) { list_update(case$overlaps_defaults, x) }
146
+ )
147
+ case$overlaps_defaults <- NULL
148
+
149
+ outcases[[name]] <- case
150
+ } else { # !no_each
151
+ eachs <- if (!is.null(case$subset)) {
152
+ srtobj@meta.data %>%
153
+ filter(!!parse_expr(case$subset)) %>%
154
+ pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
155
+ } else {
156
+ srtobj@meta.data %>%
157
+ pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
144
158
  }
145
- } else {
146
- eachs <- srtobj@meta.data %>% pull(case$each) %>% unique() %>% na.omit()
159
+ if (length(case$overlaps) > 0 && is.null(case$ident_1)) {
160
+ stop("Cannot perform 'overlaps' analysis with 'each' and without 'ident_1' in case '", name, "'")
161
+ }
162
+
163
+ if (length(cases) == 0 && name == "Marker Discovery") {
164
+ name <- case$each
165
+ } else {
166
+ name <- paste0(name, " (", case$each, ")")
167
+ }
168
+
147
169
  for (each in eachs) {
148
- by = make.names(paste0(".", name, "_", each))
149
- srtobj@meta.data = srtobj@meta.data %>% mutate(
150
- !!sym(by) := if_else(
151
- !!sym(case$each) == each,
152
- !!sym(case$group.by),
153
- NA
154
- )
155
- )
156
- if (is.null(case$ident.1)) {
157
- idents <- srtobj@meta.data %>% pull(case$group.by) %>% unique() %>% na.omit()
158
- for (ident in idents) {
159
- kname <- if (name == "DEFAULT") "" else paste0("-", name)
160
- key <- paste0(each, kname, ":", ident)
161
- if (case$prefix_each) {
162
- key <- paste0(case$each, "-", key)
163
- }
164
- newcases[[key]] <- case
165
- newcases[[key]]$ident.1 <- ident
166
- newcases[[key]]$group.by <- by
167
- }
170
+ newname <- paste0(name, "::", each)
171
+ newcase <- case
172
+
173
+ newcase$original_case <- name
174
+ newcase$each_name <- case$each
175
+ newcase$each <- each
176
+ newcase$original_subset <- case$subset
177
+
178
+ if (!is.null(case$subset)) {
179
+ newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
168
180
  } else {
169
- key <- paste0(case$each, ":", each)
170
- if (name != "DEFAULT") {
171
- key <- paste0(key, " - ", name)
172
- }
173
- newcases[[key]] <- case
174
- newcases[[key]]$group.by <- by
181
+ newcase$subset <- paste0(bQuote(case$each), " == '", each, "'")
175
182
  }
183
+
184
+ newcase$marker_plots <- lapply(
185
+ case$marker_plots,
186
+ function(x) { list_update(case$marker_plots_defaults, x) }
187
+ )
188
+ newcase$marker_plots_defaults <- NULL
189
+
190
+ newcase$enrich_plots <- lapply(
191
+ case$enrich_plots,
192
+ function(x) { list_update(case$enrich_plots_defaults, x) }
193
+ )
194
+ newcase$enrich_plots_defaults <- NULL
195
+
196
+ # Will be processed by the case itself, which collects the markers
197
+ newcase$allmarker_plots <- NULL
198
+ newcase$allmarker_plots_defaults <- NULL
199
+ newcase$allenrich_plots <- NULL
200
+ newcase$allenrich_plots_defaults <- NULL
201
+ newcase$overlaps <- NULL
202
+ newcase$overlaps_defaults <- NULL
203
+
204
+ outcases[[newname]] <- newcase
176
205
  }
177
- }
178
- }
179
- cases <- newcases
180
-
181
- plot_volcano = function(markers, volfile, sig, volgenes) {
182
- # markers
183
- # gene p_val avg_log2FC pct.1 pct.2 p_val_adj
184
- # 1 CCL5 1.883596e-11 -4.8282535 0.359 0.927 4.332270e-09
185
- # 2 HLA-DQB1 3.667713e-09 6.1543174 0.718 0.098 8.435740e-07
186
- # 3 HLA-DRB5 1.242993e-07 3.9032231 0.744 0.195 2.858885e-05
187
- # 4 CD79B 2.036731e-07 4.2748835 0.692 0.146 4.684482e-05
188
- log_info("- Plotting volcano plot ...")
189
- markers = markers %>%
190
- mutate(
191
- Significant = if_else(
192
- !!parse_expr(sig),
193
- if_else(avg_log2FC > 0, "Up", "Down"),
194
- "No"
195
- ),
196
- Label = if_else(
197
- Significant != "No" & (isTRUE(volgenes) | (gene %in% volgenes)),
198
- gene,
199
- ""
206
+
207
+ if (length(case$overlaps) > 0 || length(case$allmarker_plots) > 0 || length(case$allenrich_plots) > 0) {
208
+ ovcase <- case
209
+
210
+ ovcase$markers <- list()
211
+ ovcase$allmarker_plots <- lapply(
212
+ ovcase$allmarker_plots,
213
+ function(x) { list_update(ovcase$allmarker_plots_defaults, x) }
200
214
  )
201
- )
215
+ ovcase$allmarker_plots_defaults <- NULL
202
216
 
203
- p_vol = ggplot(markers, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
204
- geom_point(aes(color = Significant), alpha = 0.75) +
205
- scale_color_manual(
206
- values = c(Up = "#FF3333", Down = "#3333FF", No = "#AAAAAA"),
207
- labels = c(Up = "Up", Down = "Down", No = "Non-Significant")
208
- ) +
209
- geom_text_repel(
210
- aes(label = Label),
211
- size = 3,
212
- color = "#000000",
213
- box.padding = unit(0.35, "lines"),
214
- point.padding = unit(0.5, "lines"),
215
- segment.color = "#000000"
216
- ) +
217
- theme_prism() +
218
- theme(legend.title=element_blank(), plot.margin=unit(c(1,1,1,1), "cm")) +
219
- labs(
220
- x = "log2 Fold Change",
221
- y = "-log10 Adjusted P-value"
222
- )
217
+ ovcase$enriches <- list()
218
+ ovcase$allenrich_plots <- lapply(
219
+ ovcase$allenrich_plots,
220
+ function(x) { list_update(ovcase$allenrich_plots_defaults, x) }
221
+ )
222
+ ovcase$allenrich_plots_defaults <- NULL
223
223
 
224
- png(volfile, res = 100, height = 800, width = 900)
225
- print(p_vol)
226
- dev.off()
224
+ ovcase$overlaps <- lapply(
225
+ ovcase$overlaps,
226
+ function(x) { list_update(ovcase$overlaps_defaults, x) }
227
+ )
228
+ ovcase$overlaps_defaults <- NULL
229
+ outcases[[name]] <- ovcase
230
+ }
231
+ }
232
+ outcases
227
233
  }
234
+ cases <- expand_cases(cases, defaults, post_casing, default_case = "Marker Discovery")
228
235
 
229
- # Do enrichment analysis for a case using Enrichr
230
- # Args:
231
- # case: case name
232
- # markers: markers dataframe
233
- # sig: The expression to filter significant markers
234
- do_enrich <- function(case, markers, sig, volgenes) {
235
- log_info("- Running enrichment for case: {case}")
236
- parts <- strsplit(case, ":")[[1]]
237
- sec <- parts[1]
238
- case <- paste0(parts[-1], collapse = ":")
239
- casedir <- file.path(outdir, sec, case)
240
- dir.create(casedir, showWarnings = FALSE, recursive = TRUE)
241
- if (nrow(markers) == 0) {
242
- log_warn(" No markers found for case: {case}")
243
- cat("No markers found.", file = file.path(casedir, "error.txt"))
244
- return()
245
- }
246
- plot_volcano(markers, file.path(casedir, "volcano.png"), sig, volgenes)
247
- markers_sig <- markers %>% filter(!!parse_expr(sig))
248
- if (nrow(markers_sig) == 0) {
249
- log_warn(" No significant markers found for case: {case}")
250
- cat("No significant markers.", file = file.path(casedir, "error.txt"))
251
- return()
252
- }
253
- write.table(
254
- markers_sig,
255
- file.path(casedir, "markers.txt"),
256
- sep = "\t",
257
- row.names = FALSE,
258
- col.names = TRUE,
259
- quote = FALSE
260
- )
261
- if (nrow(markers_sig) < 5) {
262
- for (db in dbs) {
263
- write.table(
264
- data.frame(Warning = "Not enough significant markers."),
265
- file.path(casedir, paste0("Enrichr-", db, ".txt")),
266
- sep = "\t",
267
- row.names = FALSE,
268
- col.names = TRUE,
269
- quote = FALSE
236
+ log$info("Running cases ...")
237
+
238
+ process_markers <- function(markers, info, case) {
239
+ ## Attributes lost
240
+ # markers <- markers %>%
241
+ # mutate(gene = as.character(gene)) %>%
242
+ # arrange(p_val_adj, desc(abs(avg_log2FC)))
243
+ markers$gene <- as.character(markers$gene)
244
+ markers <- markers[order(markers$p_val_adj, -abs(markers$avg_log2FC)), ]
245
+
246
+ # Save markers
247
+ write.table(markers, file.path(info$prefix, "markers.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
248
+
249
+ sigmarkers <- markers %>% filter(!!parse_expr(case$sigmarkers))
250
+ write.table(sigmarkers, file.path(info$prefix, "sigmarkers.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
251
+ reporter$add2(
252
+ list(
253
+ name = "Table",
254
+ contents = list(
255
+ list(kind = "descr", content = paste0(
256
+ "Showing top 100 markers ordered by p_val_adj ascendingly, then abs(avg_log2FC) descendingly. ",
257
+ "Use 'Download the entire data' button to download all significant markers by '",
258
+ html_escape(case$sigmarkers), "'."
259
+ )),
260
+ list(kind = "table", src = file.path(info$prefix, "sigmarkers.tsv"), data = list(nrows = 100))
270
261
  )
271
- png(
272
- file.path(casedir, paste0("Enrichr-", db, ".png")),
273
- res = 100, height = 200, width = 1000
262
+ ),
263
+ hs = c(info$section, info$name),
264
+ hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
265
+ ui = "tabs"
266
+ )
267
+
268
+ if (nrow(markers) > 0) {
269
+ for (plotname in names(case$marker_plots)) {
270
+ plotargs <- case$marker_plots[[plotname]]
271
+ plotargs$markers <- markers
272
+ plotargs$object <- case$object
273
+ plotargs$comparison_by <- case$group_by
274
+ plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
275
+ do_call(VizDEGs, plotargs)
276
+ reporter$add2(
277
+ list(
278
+ name = plotname,
279
+ contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
280
+ hs = c(info$section, info$name),
281
+ hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
282
+ ui = "tabs"
274
283
  )
275
- print(
276
- ggplot() +
277
- annotate(
278
- "text",
279
- x = 1,
280
- y = 1,
281
- label = "Not enough significant markers."
282
- ) +
283
- theme_classic()
284
+ }
285
+ }
286
+
287
+ # Do enrichment analysis
288
+ significant_markers <- unique(sigmarkers$gene)
289
+ empty <- if (case$enrich_style == "enrichr") {
290
+ data.frame(
291
+ Database = character(0),
292
+ Term = character(0),
293
+ Overlap = character(0),
294
+ P.value = numeric(0),
295
+ Adjusted.P.value = numeric(0),
296
+ Odds.Ratio = numeric(0),
297
+ Combined.Score = numeric(0),
298
+ Genes = character(0),
299
+ Rank = numeric(0)
300
+ )
301
+ } else { # clusterProfiler
302
+ data.frame(
303
+ ID = character(0),
304
+ Description = character(0),
305
+ GeneRatio = character(0),
306
+ BgRatio = character(0),
307
+ Count = integer(0),
308
+ pvalue = numeric(0),
309
+ p.adjust = numeric(0),
310
+ qvalue = numeric(0),
311
+ geneID = character(0),
312
+ Database = character(0)
313
+ )
314
+ }
315
+
316
+ if (length(significant_markers) < 5) {
317
+ if (case$error) {
318
+ stop("Error: Not enough significant markers with '", case$sigmarkers, "' in case '", info$name, "' found (< 5) for enrichment analysis.")
319
+ } else {
320
+ message <- paste0("Not enough significant markers with '", case$sigmarkers, "' found (< 5) for enrichment analysis.")
321
+ log$warn(" ! Error: {message}")
322
+ reporter$add2(
323
+ list(
324
+ name = "Warning",
325
+ contents = list(list(kind = "error", content = message, kind_ = "warning"))),
326
+ hs = c(info$section, info$name),
327
+ hs2 = "Enrichment Analysis",
328
+ ui = "tabs"
284
329
  )
285
- dev.off()
286
330
  }
331
+ return(empty)
287
332
  } else {
288
- enriched <- enrichr(markers_sig$gene, dbs)
289
- for (db in dbs) {
290
- write.table(
291
- enriched[[db]],
292
- file.path(casedir, paste0("Enrichr-", db, ".txt")),
293
- sep = "\t",
294
- row.names = FALSE,
295
- col.names = TRUE,
296
- quote = FALSE
297
- )
298
- png(
299
- file.path(casedir, paste0("Enrichr-", db, ".png")),
300
- res = 100, height = 1000, width = 1000
333
+ tryCatch({
334
+ enrich <- RunEnrichment(
335
+ significant_markers,
336
+ dbs = case$dbs, style = case$enrich_style)
337
+
338
+ write.table(enrich, file.path(info$prefix, "enrich.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
339
+ reporter$add2(
340
+ list(
341
+ name = "Table",
342
+ contents = list(list(kind = "table", src = file.path(info$prefix, "enrich.tsv"), data = list(nrows = 100)))
343
+ ),
344
+ hs = c(info$section, info$name),
345
+ hs2 = "Enrichment Analysis",
346
+ ui = "tabs"
301
347
  )
302
- print(plotEnrich(enriched[[db]], showTerms = 20, title = db))
303
- dev.off()
304
- }
348
+
349
+ # Visualize enriched terms
350
+ if (length(case$enrich_plots) > 0) {
351
+ for (db in case$dbs) {
352
+ plots <- list()
353
+ for (plotname in names(case$enrich_plots)) {
354
+ plotargs <- extract_vars(case$enrich_plots[[plotname]], "descr", allow_nonexisting = TRUE)
355
+ plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
356
+
357
+ p <- tryCatch(
358
+ do_call(VizEnrichment, plotargs),
359
+ error = function(e) {
360
+ stop("Failed to plot enrichment for database '", db, "' with plot '", plotname, "': ", e$message)
361
+ }
362
+ )
363
+
364
+ if (plotargs$plot_type == "bar") {
365
+ attr(p, "height") <- attr(p, "height") / 1.5
366
+ descr <- descr %||% glue::glue(
367
+ "The bar plot shows the top enriched terms in database '{db}', ",
368
+ "the x-axis shows the -log10 of the adjusted p-values, ",
369
+ "and the y-axis shows the term names. The number next to each bar indicates the overlap gene count."
370
+ )
371
+ }
372
+ outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
373
+ save_plot(p, outprefix, plotargs$devpars, formats = "png")
374
+ if (!is.null(descr)) {
375
+ plots[[length(plots) + 1]] <- list(kind = "descr", content = glue::glue(descr))
376
+ }
377
+ plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
378
+ }
379
+ reporter$add2(
380
+ list(name = db, contents = plots),
381
+ hs = c(info$section, info$name),
382
+ hs2 = "Enrichment Analysis",
383
+ ui = "tabs"
384
+ )
385
+ }
386
+ }
387
+ return(enrich)
388
+ }, error = function(e) {
389
+ if (case$error) {
390
+ stop("Error: ", e$message)
391
+ } else {
392
+ log$warn(" ! Error: {e$message}")
393
+ reporter$add2(
394
+ list(
395
+ name = "Warning",
396
+ contents = list(list(kind = "error", content = e$message, kind_ = "warning"))),
397
+ hs = c(info$section, info$name),
398
+ hs2 = "Enrichment Analysis",
399
+ ui = "tabs"
400
+ )
401
+ }
402
+ return(empty)
403
+ })
305
404
  }
306
405
  }
307
406
 
407
+ process_allmarkers <- function(markers, object, comparison_by, plotcases, casename, groupname, subset_by_group = TRUE) {
408
+ name <- paste0(casename, "::", paste0(groupname, " (All Markers)"))
409
+ info <- case_info(name, outdir, create = TRUE)
308
410
 
309
- do_case <- function(casename) {
310
- log_info("Dealing with case: {casename}...")
311
- sec_case_names <- strsplit(casename, ":")[[1]]
312
- cname <- paste(sec_case_names[-1], collapse = ":")
313
- case <- cases[[casename]]
314
- # ident1
315
- # ident2
316
- # groupby
317
- # each # expanded
318
- # prefix_each
319
- # dbs
320
- # sigmarkers
321
- # rest
322
- args <- case$rest
323
- args$group.by <- case$group.by
324
- args$ident.1 <- case$ident.1
325
- args$ident.2 <- case$ident.2
326
- if (is.null(args$logfc.threshold)) {
327
- args$locfc.threshold <- 0
328
- }
329
- if (is.null(args$min.cells.group)) {
330
- args$min.cells.group <- 1
331
- }
332
- if (is.null(args$min.cells.feature)) {
333
- args$min.cells.feature <- 1
334
- }
335
- if (is.null(args$min.pct)) {
336
- args$min.pct <- 0
411
+ for (plotname in names(plotcases)) {
412
+ log$info(" {plotname} ...")
413
+ plotargs <- plotcases[[plotname]]
414
+ plotargs$markers <- markers
415
+ plotargs$object <- object
416
+ plotargs$comparison_by <- comparison_by
417
+ if (subset_by_group)
418
+ plotargs$subset_by <- groupname
419
+ plotargs$outprefix <- file.path(info$prefix, slugify(plotname))
420
+ do_call(VizDEGs, plotargs)
421
+ reporter$add2(
422
+ list(
423
+ name = plotname,
424
+ contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))
425
+ ),
426
+ hs = c(info$section, info$name),
427
+ ui = "tabs"
428
+ )
337
429
  }
338
- if (!is.null(case$subset)) {
339
- args$object <- srtobj %>% filter(!!parse_expr(case$subset) & filter(!is.na(!!sym(case$group.by))))
340
- } else {
341
- args$object <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
430
+ }
431
+
432
+ process_allenriches <- function(enriches, plotcases, casename, groupname) {
433
+ name <- paste0(casename, "::", paste0(groupname, " (All Enrichments)"))
434
+ info <- case_info(name, outdir, create = TRUE)
435
+ dbs <- unique(as.character(enriches$Database))
436
+
437
+ for (db in dbs) {
438
+ plots <- list()
439
+ for (plotname in names(plotcases)) {
440
+ log$info(" {plotname} ({db}) ...")
441
+ plotargs <- plotcases[[plotname]]
442
+ plotargs <- extract_vars(plotargs, "devpars")
443
+ plotargs$data <- enriches[enriches$Database == db, , drop = FALSE]
444
+ if (plotargs$plot_type == "heatmap") {
445
+ plotargs$group_by <- groupname
446
+ plotargs$show_row_names = plotargs$show_row_names %||% TRUE
447
+ plotargs$show_column_names = plotargs$show_column_names %||% TRUE
448
+ }
449
+
450
+ p <- do_call(VizEnrichment, plotargs)
451
+
452
+ if (plotargs$plot_type == "bar") {
453
+ attr(p, "height") <- attr(p, "height") / 1.5
454
+ }
455
+ outprefix <- file.path(info$prefix, paste0("allenrich.", slugify(db), ".", slugify(plotname)))
456
+ save_plot(p, outprefix, devpars, formats = "png")
457
+ plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
458
+ }
459
+ reporter$add2(
460
+ list(name = db, contents = plots),
461
+ hs = c(info$section, info$name),
462
+ hs2 = plotname,
463
+ ui = "tabs"
464
+ )
342
465
  }
343
- markers <- tryCatch({
344
- do_call(FindMarkers, args) %>% rownames_to_column("gene")
345
- }, error = function(e) {
346
- warning(e$message, immediate. = TRUE)
347
- data.frame(
348
- gene = character(),
349
- p_val = numeric(),
350
- avg_log2FC = numeric(),
351
- pct.1 = numeric(),
352
- pct.2 = numeric(),
353
- p_val_adj=numeric()
466
+ }
467
+
468
+ process_overlaps <- function(markers, ovcases, casename, groupname) {
469
+ name <- paste0(casename, "::", paste0(groupname, " (Overlaps)"))
470
+ info <- case_info(name, outdir, create = TRUE)
471
+
472
+ for (plotname in names(ovcases)) {
473
+ log$info(" {plotname} ...")
474
+ args <- extract_vars(
475
+ ovcases[[plotname]],
476
+ sigm = "sigmarkers", "more_formats", "save_code", "devpars", "plot_type",
477
+ allow_nonexisting = TRUE
354
478
  )
355
- })
356
- do_enrich(casename, markers, case$sigmarkers, case$volcano_genes)
357
-
358
- siggenes <- markers %>%
359
- filter(!!parse_expr(case$sigmarkers)) %>%
360
- pull(gene) %>%
361
- unique()
362
-
363
- if (length(siggenes) > 0) {
364
- dotplot_devpars <- case$dotplot$devpars
365
- if (is.null(args$ident.2)) {
366
- case$dotplot$object <- args$object
367
- case$dotplot$object@meta.data <- case$dotplot$object@meta.data %>%
368
- mutate(
369
- !!sym(args$group.by) := if_else(
370
- !!sym(args$group.by) == args$ident.1,
371
- args$ident.1,
372
- ".Other"
373
- ),
374
- !!sym(args$group.by) := factor(
375
- !!sym(args$group.by),
376
- levels = c(args$ident.1, ".Other")
377
- )
479
+
480
+ sigm <- sigm %||% sigmarkers
481
+ ugroups <- unique(markers[[groupname]])
482
+ m <- lapply(ugroups, function(g) {
483
+ markers[markers[[groupname]] == g, , drop = FALSE] %>%
484
+ filter(!!parse_expr(sigm)) %>%
485
+ pull("gene") %>% unique()
486
+ })
487
+ names(m) <- ugroups
488
+
489
+ if (plot_type == "venn") {
490
+ args$data <- m
491
+ args$in_form <- "list"
492
+ prefix <- file.path(info$prefix, slugify(plotname))
493
+ p <- do_call(gglogger::register(VennDiagram), args)
494
+ save_plot(p, prefix, devpars, formats = c("png", more_formats))
495
+ if (save_code) {
496
+ save_plotcode(
497
+ p, prefix,
498
+ c("library(plotthis)", "load('data.RData')", "invisible(list2env(args, .GlobalEnv))"),
499
+ "args",
500
+ auto_data_setup = FALSE
378
501
  )
502
+ }
379
503
  } else {
380
- case$dotplot$object <- args$object %>%
381
- filter(!!sym(args$group.by) %in% c(args$ident.1, args$ident.2)) %>%
382
- mutate(!!sym(args$group.by) := factor(
383
- !!sym(args$group.by),
384
- levels = c(args$ident.1, args$ident.2)
385
- ))
504
+ args$data <- m
505
+ args$in_form <- "list"
506
+ prefix <- file.path(info$prefix, slugify(plotname))
507
+ p <- do_call(gglogger::register(UpsetPlot), args)
508
+ save_plot(p, prefix, devpars, formats = c("png", more_formats))
509
+ if (save_code) {
510
+ save_plotcode(
511
+ p, prefix,
512
+ c("library(plotthis)", "load('data.RData')", "invisible(list2env(args, .GlobalEnv))"),
513
+ "args",
514
+ auto_data_setup = FALSE
515
+ )
516
+ }
386
517
  }
387
- case$dotplot$devpars <- NULL
388
- case$dotplot$features <- siggenes
389
- case$dotplot$group.by <- args$group.by
390
- case$dotplot$assay <- case$assay
391
- dotplot_width = ifelse(
392
- is.null(dotplot_devpars$width),
393
- if (length(siggenes) <= 20) length(siggenes) * 60 else length(siggenes) * 30,
394
- dotplot_devpars$width
395
- )
396
- dotplot_height = ifelse(is.null(dotplot_devpars$height), 600, dotplot_devpars$height)
397
- dotplot_res = ifelse(is.null(dotplot_devpars$res), 100, dotplot_devpars$res)
398
- dotplot_file <- file.path(outdir, sec_case_names[1], cname, "dotplot.png")
399
- png(dotplot_file, res = dotplot_res, width = dotplot_height, height = dotplot_width)
400
- # rotate x axis labels
401
- print(
402
- do_call(DotPlot, case$dotplot) +
403
- theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
404
- coord_flip()
518
+
519
+ reporter$add2(
520
+ list(
521
+ name = plotname,
522
+ contents = list(reporter$image(prefix, more_formats, save_code))
523
+ ),
524
+ hs = c(info$section, info$name),
525
+ ui = "tabs"
405
526
  )
406
- dev.off()
407
527
  }
528
+ }
529
+
530
+ run_case <- function(name) {
531
+ case <- cases[[name]]
532
+ log$info("Case: {name} ...")
533
+
534
+ case <- extract_vars(
535
+ case,
536
+ "dbs", "sigmarkers", "allmarker_plots", "allenrich_plots", "marker_plots", "enrich_plots",
537
+ "overlaps", "original_case", "markers", "enriches", "each_name", "each", "enrich_style", "original_subset",
538
+ subset_ = "subset",
539
+ allow_nonexisting = TRUE
540
+ )
541
+
542
+ if (!is.null(markers) || !is.null(enriches)) {
543
+ if (!is.null(markers)) { # It is the overlap/allmarker case
544
+ log$info("- Summarizing markers in subcases (by each: {each}) ...")
545
+ # handle the overlaps / allmarkers analysis here
546
+ if (!is.data.frame(markers)) {
547
+ each_levels <- names(markers)
548
+ markers <- do_call(rbind, lapply(each_levels, function(x) {
549
+ markers_df <- markers[[x]]
550
+ if (nrow(markers_df) > 0) {
551
+ markers_df[[each]] <- x
552
+ } else {
553
+ markers_df[[each]] <- character(0) # Empty case
554
+ }
555
+ markers_df
556
+ }))
557
+ markers[[each]] <- factor(markers[[each]], levels = each_levels)
558
+ }
559
+ # gene, p_val, avg_log2FC, pct.1, pct.2, p_val_adj, diff_pct, <each>
560
+
561
+ if (length(allmarker_plots) > 0) {
562
+ log$info("- Visualizing all markers together ...")
563
+ if (is.null(original_subset)) {
564
+ attr(markers, "object") <- srtobj
565
+ } else {
566
+ attr(markers, "object") <- filter(srtobj, !!parse_expr(original_subset))
567
+ }
568
+ attr(markers, "group_by") <- each
569
+ attr(markers, "ident_1") <- NULL
570
+ attr(markers, "ident_2") <- NULL
571
+ if (!is.null(markers) && nrow(markers) > 0) {
572
+ process_allmarkers(
573
+ markers,
574
+ object = if (is.null(original_subset)) srtobj else filter(srtobj, !!parse_expr(original_subset)),
575
+ comparison_by = group_by,
576
+ allmarker_plots,
577
+ name,
578
+ each
579
+ )
580
+ }
581
+ }
582
+
583
+ if (length(overlaps) > 0) {
584
+ log$info("- Visualizing overlaps between subcases ...")
585
+ process_overlaps(markers, overlaps, name, each)
586
+ }
408
587
 
409
- if (sec_case_names[1] %in% overlap) {
410
- if (is.null(overlaps[[sec_case_names[1]]])) {
411
- overlaps[[sec_case_names[1]]] <<- list()
412
588
  }
413
- overlaps[[sec_case_names[1]]][[cname]] <<- siggenes
414
- }
415
- }
416
589
 
417
- do_overlap <- function(section) {
418
- log_info("Dealing with overlap: {section}...")
590
+ if (!is.null(enriches) && length(enriches) > 0) {
591
+ log$info("- Summarizing enrichments in subcases (by each: {each}) ...")
592
+ if (!is.data.frame(enriches)) {
593
+ each_levels <- names(enriches)
594
+ enriches <- do_call(rbind, lapply(each_levels, function(x) {
595
+ enrich_df <- enriches[[x]]
596
+ if (nrow(enrich_df) > 0) {
597
+ enrich_df[[each]] <- x
598
+ } else {
599
+ enrich_df[[each]] <- character(0) # Empty case
600
+ }
601
+ enrich_df
602
+ }))
603
+ enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
604
+ }
419
605
 
420
- ov_dir <- file.path(outdir, "OVERLAPS", section)
421
- dir.create(ov_dir, showWarnings = FALSE, recursive = TRUE)
606
+ if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
607
+ log$info("- Visualizing all enrichments together ...")
608
+ # add other metadata columns if any by mapping groupname
609
+ # only add the metadata columns from object if there is a single value mapped
610
+ metacols <- srtobj@meta.data %>% group_by(!!sym(each)) %>%
611
+ summarize(across(everything(), ~ n_distinct(.) == 1), .groups = "keep") %>%
612
+ select(where(~ all(. == TRUE))) %>%
613
+ colnames()
422
614
 
423
- ov_cases <- overlaps[[section]]
424
- if (length(ov_cases) < 2) {
425
- stop(sprintf(" Not enough cases for overlap: %s", section))
426
- }
615
+ if (length(metacols) > 1) {
616
+ metadf <- srtobj@meta.data[, metacols, drop = FALSE] %>%
617
+ distinct(!!sym(each), .keep_all = TRUE)
618
+
619
+ for (col in setdiff(metacols, each)) {
620
+ if (col %in% colnames(enriches)) {
621
+ warning("Column name conflict: {col}, adding with suffix '_meta'", immediate. = TRUE)
622
+ metadf[[paste0(col, "_meta")]] <- metadf[[col]]
623
+ metadf[[col]] <- NULL
624
+ }
625
+ }
626
+
627
+ enriches <- left_join(enriches, metadf, by = each)
628
+ }
629
+
630
+ process_allenriches(enriches, allenrich_plots, name, each)
631
+ }
632
+ }
427
633
 
428
- if (length(ov_cases) <= 4) {
429
- venn_plot <- file.path(ov_dir, "venn.png")
430
- venn_p <- ggVennDiagram(ov_cases, label_percent_digit = 1) +
431
- scale_fill_distiller(palette = "Reds", direction = 1) +
432
- scale_x_continuous(expand = expansion(mult = .2))
433
- png(venn_plot, res = 100, width = 1000, height = 600)
434
- print(venn_p)
435
- dev.off()
634
+ return(invisible())
436
635
  }
437
636
 
438
- df_markers <- fromList(ov_cases)
439
- # A B MARKERS
440
- # 1 0 G1
441
- # 1 0 G2
442
- # 0 1 G3
443
- # 0 1 G4
444
- # 1 1 G5
445
- df_markers$MARKERS = Reduce(union, ov_cases)
446
- df_markers = df_markers %>%
447
- group_by(across(-MARKERS)) %>%
448
- summarise(MARKERS = paste0(MARKERS, collapse = ","), .groups = "drop")
449
-
450
- write.table(
451
- df_markers,
452
- file.path(ov_dir, "markers.txt"),
453
- sep = "\t",
454
- row.names = FALSE,
455
- col.names = TRUE,
456
- quote = FALSE
457
- )
637
+ # Let RunSeuratDEAnalysis handle the subset
638
+ case$subset <- subset_
639
+ case$object <- srtobj
640
+ markers <- do_call(RunSeuratDEAnalysis, case)
641
+ case$object <- NULL # Release memory
642
+ gc()
643
+
644
+ subobj <- if (is.null(subset_)) srtobj else filter(srtobj, !!parse_expr(subset_))
645
+
646
+ if (is.null(case$ident_1)) {
647
+ all_idents <- unique(as.character(markers[[case$group_by]]))
648
+ enriches <- list()
649
+ for (ident in all_idents) {
650
+ log$info("- {case$group_by}: {ident} ...")
651
+ ident_markers <- markers[markers[[case$group_by]] == ident, , drop = TRUE]
652
+ casename <- paste0(name, "::", paste0(case$group_by, ": ", ident))
653
+ info <- case_info(casename, outdir, create = TRUE)
654
+
655
+ attr(ident_markers, "ident_1") <- ident
656
+ enrich <- process_markers(ident_markers, info = info, case = list(
657
+ object = subobj,
658
+ dbs = dbs,
659
+ group_by = case$group_by,
660
+ sigmarkers = sigmarkers,
661
+ enrich_style = enrich_style,
662
+ marker_plots = marker_plots,
663
+ enrich_plots = enrich_plots,
664
+ error = case$error,
665
+ ident = NULL
666
+ ))
667
+ enriches[[ident]] <- enrich
668
+ }
458
669
 
459
- upset_plot <- file.path(ov_dir, "upset.png")
460
- upset_p <- upset(fromList(ov_cases))
461
- png(upset_plot, res = 100, width = 800, height = 600)
462
- print(upset_p)
463
- dev.off()
670
+ if (length(allmarker_plots) > 0) {
671
+ log$info("- Visualizing all markers together ...")
672
+ process_allmarkers(
673
+ markers,
674
+ object = subobj,
675
+ comparison_by = case$group_by,
676
+ plotcases = allmarker_plots,
677
+ casename = name,
678
+ groupname = case$group_by,
679
+ subset_by_group = FALSE)
680
+ }
681
+
682
+ if (length(overlaps) > 0) {
683
+ log$info("- Visualizing overlaps between subcases ...")
684
+ process_overlaps(markers, overlaps, name, case$group_by)
685
+ }
686
+
687
+ if (length(allenrich_plots) > 0) {
688
+ log$info("- Visualizing all enrichments together ...")
689
+ # add other metadata columns if any by mapping groupname
690
+ # only add the metadata columns from object if there is a single value mapped
691
+ metacols <- subobj@meta.data %>% group_by(!!sym(case$group_by)) %>%
692
+ summarize(across(everything(), ~ n_distinct(.) == 1), .groups = "keep") %>%
693
+ select(where(~ all(. == TRUE))) %>%
694
+ colnames()
695
+
696
+ if (length(metacols) > 1) {
697
+ metadf <- subobj@meta.data[, metacols, drop = FALSE] %>%
698
+ distinct(!!sym(case$group_by), .keep_all = TRUE)
699
+
700
+ for (col in setdiff(metacols, case$group_by)) {
701
+ if (col %in% colnames(enriches[[1]])) {
702
+ warning("Column name conflict: {col}, adding with suffix '_meta'", immediate. = TRUE)
703
+ metadf[[paste0(col, "_meta")]] <- metadf[[col]]
704
+ metadf[[col]] <- NULL
705
+ }
706
+ }
707
+
708
+ for (ne in names(enriches)) {
709
+ if (!case$group_by %in% colnames(enriches[[ne]])) {
710
+ enriches[[ne]][[case$group_by]] <- ne
711
+ }
712
+ enriches[[ne]] <- left_join(enriches[[ne]], metadf, by = case$group_by)
713
+ }
714
+ }
715
+ enriches <- do_call(rbind, enriches)
716
+ process_allenriches(enriches, allenrich_plots, name, case$group_by)
717
+ }
718
+ } else {
719
+ info <- case_info(name, outdir, create = TRUE)
720
+ enrich <- process_markers(markers, info = info, case = list(
721
+ object = subobj,
722
+ dbs = dbs,
723
+ group_by = case$group_by,
724
+ sigmarkers = sigmarkers,
725
+ enrich_style = enrich_style,
726
+ marker_plots = marker_plots,
727
+ enrich_plots = enrich_plots,
728
+ error = case$error,
729
+ ident = if (is.null(case$ident_2)) case$ident_1 else paste0(case$ident_1, " vs ", case$ident_2)
730
+ ))
731
+
732
+ if (!is.null(original_case) && !is.null(cases[[original_case]])) {
733
+ if (nrow(markers) > 0) {
734
+ markers[[each_name]] <- each
735
+ }
736
+ cases[[original_case]]$markers[[each]] <<- markers
737
+ cases[[original_case]]$enriches[[each]] <<- enrich
738
+ }
739
+ }
740
+
741
+ invisible()
464
742
  }
465
743
 
466
- sapply(sort(names(cases)), do_case)
467
- sapply(sort(names(overlaps)), do_overlap)
744
+ sapply(names(cases), run_case)
745
+
746
+ reporter$save(joboutdir)