biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -1,5 +1,5 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/mutate_helpers.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
3
3
 
4
4
  library(rlang)
5
5
  library(dplyr)
@@ -15,13 +15,15 @@ library(tidyseurat)
15
15
 
16
16
  setEnrichrSite("Enrichr")
17
17
 
18
- srtfile <- {{ in.srtobj | quote }}
19
- outdir <- {{ out.outdir | quote }}
18
+ srtfile <- {{ in.srtobj | r }}
19
+ outdir <- {{ out.outdir | r }}
20
+ joboutdir <- {{ job.outdir | r }}
20
21
  ncores <- {{ envs.ncores | int }}
21
22
  mutaters <- {{ envs.mutaters | r }}
22
23
  idents <- {{ envs.idents | r }}
23
24
  group_by <- {{ envs["group-by"] | r }}
24
25
  each <- {{ envs.each | r }}
26
+ subset <- {{ envs.subset | r }}
25
27
  prefix_each <- {{ envs.prefix_each | r }}
26
28
  p_adjust <- {{ envs.p_adjust | r }}
27
29
  section <- {{ envs.section | r }}
@@ -32,65 +34,72 @@ cases <- {{ envs.cases | r: todot = "-" }}
32
34
 
33
35
  set.seed(8525)
34
36
 
35
- print("- Reading Seurat object ...")
37
+ log_info("- Reading Seurat object ...")
36
38
  srtobj <- readRDS(srtfile)
39
+ if (DefaultAssay(srtobj) == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@commands)) {
40
+ log_warn("- SCTransform used but PrepSCTFindMarkers not applied, running ...")
37
41
 
38
- print("- Mutate meta data if needed ...")
42
+ srtobj <- PrepSCTFindMarkers(srtobj)
43
+ # compose a new SeuratCommand to record it to srtobj@commands
44
+ commands <- names(pbmc_small@commands)
45
+ scommand <- pbmc_small@commands[[commands[length(commands)]]]
46
+ scommand@name <- "PrepSCTFindMarkers"
47
+ scommand@time.stamp <- Sys.time()
48
+ scommand@assay.used <- "SCT"
49
+ scommand@call.string <- "PrepSCTFindMarkers(object = srtobj)"
50
+ scommand@params <- list()
51
+ srtobj@commands$PrepSCTFindMarkers <- scommand
52
+ }
53
+
54
+ log_info("- Mutate meta data if needed ...")
39
55
  if (!is.null(mutaters) && length(mutaters)) {
40
56
  srtobj@meta.data <- srtobj@meta.data %>% mutate(!!!lapply(mutaters, parse_expr))
41
57
  }
42
58
 
43
- print("- Expanding cases ...")
44
- if (is.null(cases) || length(cases) == 0) {
45
- cases <- list(
46
- DEFAULT = list(
47
- idents = idents,
48
- group_by = group_by,
49
- each = each,
50
- prefix_each = prefix_each,
51
- p_adjust = p_adjust,
52
- section = section,
53
- dbs = dbs,
54
- sigmarkers = sigmarkers,
55
- method = method
56
- )
57
- )
58
- } else {
59
- for (name in names(cases)) {
60
- case <- list_setdefault(
61
- cases[[name]],
62
- idents = idents,
63
- group_by = group_by,
64
- each = each,
65
- prefix_each = prefix_each,
66
- p_adjust = p_adjust,
67
- section = section,
68
- dbs = dbs,
69
- sigmarkers = sigmarkers,
70
- method = method
71
- )
72
- cases[[name]] <- case
73
- }
74
- }
59
+ defaults <- list(
60
+ idents = idents,
61
+ group_by = group_by,
62
+ each = each,
63
+ prefix_each = prefix_each,
64
+ p_adjust = p_adjust,
65
+ subset = subset,
66
+ section = section,
67
+ dbs = dbs,
68
+ sigmarkers = sigmarkers,
69
+ method = method
70
+ )
75
71
 
76
- newcases <- list()
77
- for (name in names(cases)) {
78
- case <- cases[[name]]
79
- if (is.null(case$each)) {
80
- newcases[[paste0(case$section, ":", name)]] <- case
72
+ expand_each <- function(name, case) {
73
+ outcases <- list()
74
+ if (is.null(case$each) || nchar(case$each) == 0) {
75
+ if (is.null(case$section) || case$section == "DEFAULT") {
76
+ outcases[[name]] <- case
77
+ } else {
78
+ outcases[[paste0(case$section, "::", name)]] <- case
79
+ }
81
80
  } else {
82
- eachs <- srtobj@meta.data %>% pull(case$each) %>% unique() %>% na.omit()
81
+ if (!is.null(case$section) && case$section != "DEFAULT") {
82
+ log_warn(" Ignoring `section` in case `{name}` when `each` is set.")
83
+ case$section <- NULL
84
+ }
85
+ if (is.null(case$subset)) {
86
+ eachs <- srtobj@meta.data %>%
87
+ pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
88
+ } else {
89
+ eachs <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
90
+ pull(case$each) %>% unique() %>% na.omit()
91
+ }
83
92
  for (each in eachs) {
84
- by = make.names(paste0(".", name, "_", case$each, "_", each))
93
+ by = make.names(paste0("..", name, "_", case$each, "_", each))
85
94
  idents <- case$idents
86
95
  if (is.null(idents) || length(idents) == 0) {
87
- srtobj@meta.data = srtobj@meta.data %>%
96
+ srtobj@meta.data <<- srtobj@meta.data %>%
88
97
  mutate(
89
98
  !!sym(by) := if_else(!!sym(case$each) == each, !!sym(case$group_by), NA)
90
99
  )
91
100
  idents <- srtobj@meta.data %>% pull(case$group_by) %>% unique() %>% na.omit()
92
101
  } else {
93
- srtobj@meta.data = srtobj@meta.data %>%
102
+ srtobj@meta.data <<- srtobj@meta.data %>%
94
103
  mutate(
95
104
  !!sym(by) := if_else(
96
105
  !!sym(case$each) == each & !!sym(case$group_by) %in% case$idents,
@@ -100,158 +109,253 @@ for (name in names(cases)) {
100
109
  )
101
110
  }
102
111
 
103
- key <- paste0(case$each, ":", each)
104
- if (name != "DEFAULT") {
105
- key <- paste0(key, " - ", name)
112
+ if (isTRUE(case$prefix_each)) {
113
+ key <- paste0(name, "::", case$each, " - ", each)
114
+ } else {
115
+ key <- paste0(name, "::", each)
106
116
  }
107
- newcases[[key]] <- case
108
- newcases[[key]]$group_by <- by
109
- newcases[[key]]$idents <- idents
117
+ outcases[[key]] <- case
118
+ outcases[[key]]$section <- name
119
+ outcases[[key]]$group_by <- by
110
120
  }
111
121
  }
122
+ outcases
112
123
  }
113
- cases <- newcases
114
124
 
125
+ log_info("- Expanding cases ...")
126
+ cases <- expand_cases(cases, defaults, expand_each)
115
127
 
116
128
  # Do enrichment analysis for a case using Enrichr
117
129
  # Args:
118
130
  # case: case name
119
131
  # markers: markers dataframe
120
132
  # sig: The expression to filter significant markers
121
- do_enrich <- function(case, markers, sig) {
122
- print(paste(" Running enrichment for case:", case))
123
- parts <- strsplit(case, ":")[[1]]
124
- sec <- parts[1]
125
- case <- paste0(parts[-1], collapse = ":")
126
- casedir <- file.path(outdir, sec, case)
127
- dir.create(casedir, showWarnings = FALSE, recursive = TRUE)
133
+ do_enrich <- function(info, markers, sig) {
134
+ log_info(" Running enrichment for case: {info$casename}")
128
135
  if (nrow(markers) == 0) {
129
- print(paste(" No markers found for case:", case))
130
- cat("No markers found.", file = file.path(casedir, "error.txt"))
131
- return()
136
+ msg <- paste0("No markers found for case: ", info$casename)
137
+ return(msg)
132
138
  }
133
139
  markers_sig <- markers %>% filter(!!parse_expr(sig))
134
140
  if (nrow(markers_sig) == 0) {
135
- print(paste(" No significant markers found for case:", case))
136
- cat("No significant markers.", file = file.path(casedir, "error.txt"))
137
- return()
141
+ msg <- paste0("No significant markers found for case: ", info$casename)
142
+ return(msg)
138
143
  }
139
144
  write.table(
140
145
  markers_sig,
141
- file.path(casedir, "markers.txt"),
146
+ file.path(info$casedir, "markers.txt"),
142
147
  sep = "\t",
143
148
  row.names = FALSE,
144
149
  col.names = TRUE,
145
150
  quote = FALSE
146
151
  )
152
+
147
153
  if (nrow(markers_sig) < 5) {
148
- for (db in dbs) {
149
- write.table(
150
- data.frame(Warning = "Not enough significant markers."),
151
- file.path(casedir, paste0("Enrichr-", db, ".txt")),
152
- sep = "\t",
153
- row.names = FALSE,
154
- col.names = TRUE,
155
- quote = FALSE
156
- )
157
- png(
158
- file.path(casedir, paste0("Enrichr-", db, ".png")),
159
- res = 100, height = 200, width = 1000
160
- )
161
- print(
162
- ggplot() +
163
- annotate(
164
- "text",
165
- x = 1,
166
- y = 1,
167
- label = "Not enough significant markers."
168
- ) +
169
- theme_classic()
170
- )
171
- dev.off()
172
- }
173
- } else {
174
- enriched <- enrichr(markers_sig$gene, dbs)
175
- for (db in dbs) {
176
- write.table(
177
- enriched[[db]],
178
- file.path(casedir, paste0("Enrichr-", db, ".txt")),
179
- sep = "\t",
180
- row.names = FALSE,
181
- col.names = TRUE,
182
- quote = FALSE
183
- )
184
- png(
185
- file.path(casedir, paste0("Enrichr-", db, ".png")),
186
- res = 100, height = 1000, width = 1000
187
- )
188
- print(plotEnrich(enriched[[db]], showTerms = 20, title = db))
189
- dev.off()
154
+ msg <- paste0("Too few significant markers found for case: ", info$casename)
155
+ return(msg)
156
+ }
157
+
158
+ enriched <- enrichr(markers_sig$gene, dbs)
159
+ for (db in dbs) {
160
+ write.table(
161
+ enriched[[db]],
162
+ file.path(info$casedir, paste0("Enrichr-", db, ".txt")),
163
+ sep = "\t",
164
+ row.names = FALSE,
165
+ col.names = TRUE,
166
+ quote = FALSE
167
+ )
168
+
169
+ if (nrow(enriched[[db]]) == 0) {
170
+ log_info(paste0(" No enriched terms for ", db))
171
+ next
190
172
  }
173
+
174
+ p <- plotEnrich(enriched[[db]], showTerms = 20, title = db) +
175
+ theme_prism()
176
+
177
+ plotfile <- file.path(info$casedir, paste0("Enrichr-", db, ".png"))
178
+ png(plotfile, res = 100, height = 600, width = 800)
179
+ print(p)
180
+ dev.off()
181
+
182
+ plotfile_pdf <- gsub(".png$", ".pdf", plotfile)
183
+ pdf(plotfile_pdf, height = 6, width = 8)
184
+ print(p)
185
+ dev.off()
191
186
  }
192
187
  }
193
188
 
189
+ ensure_sobj <- function(expr, allow_empty) {
190
+ tryCatch({ expr }, error = function(e) {
191
+ if (allow_empty) {
192
+ log_warn(" Ignoring this case: {e$message}")
193
+ return(NULL)
194
+ } else {
195
+ stop(e)
196
+ }
197
+ })
198
+ }
194
199
 
195
200
  do_case <- function(casename) {
196
- cat(paste("- Dealing with case:", casename, "...\n"))
201
+ log_info("- Dealing with case: {casename} ...")
202
+ info <- casename_info(casename, cases, outdir, create = TRUE)
197
203
  case <- cases[[casename]]
198
- sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)))
199
- df <- GetAssayData(sobj, slot = "data", assay = "RNA")
200
- genes <- rownames(df)
201
- # rows: cells, cols: genes
202
- df <- cbind(as.data.frame(scale(Matrix::t(df))), sobj@meta.data[, case$group_by])
203
- colnames(df)[ncol(df)] <- "GROUP"
204
+ allow_empty = startsWith(case$group_by, "..")
204
205
 
205
- cat(paste(" Running tests for case...\n"))
206
- test_result <- mclapply(genes, function(gene) {
207
- fm <- as.formula(paste(bQuote(gene), "~ GROUP"))
208
- res <- tryCatch({
209
- if (case$method == "anova") {
210
- r <- summary(aov(fm, data = df))[[1]]
211
- data.frame(
212
- statistic = r[1, "F value"],
213
- p.value = r[1, "Pr(>F)"],
214
- sumsq = r[1, "Sum Sq"],
215
- meansq = r[1, "Mean Sq"]
216
- )
206
+ if (sum(!is.na(srtobj@meta.data[[case$group_by]])) == 0) {
207
+ msg = "Not enough cells to run tests."
208
+ } else {
209
+ sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group_by))) }, allow_empty)
210
+ if (is.null(sobj)) { return() }
211
+ if (!is.null(case$subset)) {
212
+ sobj <- ensure_sobj({ sobj %>% filter(!!parse_expr(case$subset)) }, allow_empty)
213
+ if (is.null(sobj)) { return() }
214
+ }
215
+ df <- tryCatch({
216
+ GetAssayData(sobj, layer = "data")
217
+ }, error = function(e) {
218
+ log_warn(" Error when fetching assay data: {e}")
219
+ NULL
220
+ })
221
+ if (is.null(df)) {
222
+ msg <- "No markers found. May be due to too few cells or features."
223
+ } else {
224
+ df <- df[
225
+ apply(df, 1, function(x) !all(is.na(x)) && !all(x == x[1])), ,
226
+ drop = FALSE
227
+ ]
228
+ genes <- rownames(df)
229
+ # rows: cells, cols: genes
230
+ df <- cbind(as.data.frame(scale(Matrix::t(df))), sobj@meta.data[, case$group_by])
231
+ colnames(df)[ncol(df)] <- "GROUP"
232
+
233
+ log_info(" Running tests for case...")
234
+ warn_count <- 0
235
+ test_result <- mclapply(genes, function(gene) {
236
+ fm <- as.formula(paste(bQuote(gene), "~ GROUP"))
237
+ res <- tryCatch({
238
+ if (case$method == "anova") {
239
+ r <- summary(aov(fm, data = df))[[1]]
240
+ data.frame(
241
+ statistic = r[1, "F value"],
242
+ p.value = r[1, "Pr(>F)"],
243
+ sumsq = r[1, "Sum Sq"],
244
+ meansq = r[1, "Mean Sq"]
245
+ )
246
+ } else {
247
+ r <- kruskal.test(fm, data = df)
248
+ data.frame(statistic = r$statistic, p.value = r$p.value)
249
+ }
250
+ }, error = function(e) {
251
+ warn_count <<- warn_count + 1
252
+ if (warn_count < 10) {
253
+ log_warn(" Error when testing gene: {gene}")
254
+ log_warn(" {e}")
255
+ } else if (warn_count == 10) {
256
+ log_warn(" Too many errors, will not print more.")
257
+ }
258
+ NULL
259
+ })
260
+ if (is.null(res)) {
261
+ return(NULL)
262
+ }
263
+ res$gene <- gene
264
+ res$method <- case$method
265
+ rownames(res) <- NULL
266
+ res
267
+ }, mc.cores = ncores)
268
+ markers <- do_call(rbind, test_result)
269
+ if (is.null(markers)) {
270
+ msg <- "No markers found. May be due to too few cells."
217
271
  } else {
218
- r <- kruskal.test(fm, data = df)
219
- data.frame(statistic = r$statistic, p.value = r$p.value)
272
+ markers$p_adjust <- p.adjust(markers$p.value, method = case$p_adjust)
273
+ markers <- markers %>% arrange(p_adjust)
274
+
275
+ msg <- do_enrich(info, markers, case$sigmarkers)
220
276
  }
221
- }, error = function(e) NULL)
222
- if (is.null(res)) {
223
- return(NULL)
224
277
  }
225
- res$gene <- gene
226
- res$method <- case$method
227
- rownames(res) <- NULL
228
- res
229
- }, mc.cores = ncores)
230
- markers <- do_call(rbind, test_result)
231
- markers$p_adjust <- p.adjust(markers$p.value, method = case$p_adjust)
232
- markers <- markers %>% arrange(p_adjust)
233
- do_enrich(casename, markers, case$sigmarkers)
278
+ }
279
+ if (is.null(msg)) {
280
+ log_info(" Plotting top 10 genes ...")
281
+ markers <- markers %>% head(10)
282
+ plotdir <- file.path(info$casedir, "expr_plots")
283
+ dir.create(plotdir, showWarnings = FALSE, recursive = TRUE)
234
284
 
235
- print(paste(" Plotting top 10 genes ...\n"))
236
- markers <- markers %>% head(10)
237
- parts <- strsplit(casename, ":")[[1]]
238
- sec <- parts[1]
239
- casename <- paste0(parts[-1], collapse = ":")
240
- plotdir <- file.path(outdir, sec, casename, "plots")
241
- dir.create(plotdir, showWarnings = FALSE, recursive = TRUE)
285
+ # Plot the top 10 genes in each group with violin plots
286
+ geneplots <- list()
287
+ for (gene in markers$gene) {
288
+ outfile <- file.path(plotdir, paste0(slugify(gene), ".png"))
289
+ p <- ggplot(df, aes_string(x="GROUP", y=bQuote(gene), fill="GROUP")) +
290
+ geom_violin(alpha = .8) +
291
+ geom_boxplot(width=0.1, fill="white") +
292
+ theme_prism() +
293
+ ylab(paste0("Expression of ", gene))
294
+ png(outfile, res = 100, height = 600, width = 800)
295
+ print(p)
296
+ dev.off()
242
297
 
243
- # Plot the top 10 genes in each group with violin plots
244
- for (gene in markers$gene) {
245
- outfile = file.path(plotdir, paste0(gene, ".png"))
246
- p = ggplot(df, aes_string(x="GROUP", y=bQuote(gene), fill="GROUP")) +
247
- geom_violin(alpha = .8) +
248
- geom_boxplot(width=0.1, fill="white") +
249
- theme_prism() +
250
- ylab(paste0("Expression of ", gene))
251
- png(outfile, res = 100, height = 800, width = 1000)
252
- print(p)
253
- dev.off()
298
+ outfile_pdf <- gsub(".png$", ".pdf", outfile)
299
+ pdf(outfile_pdf, height = 6, width = 8)
300
+ print(p)
301
+ dev.off()
302
+
303
+ geneplots[[length(geneplots) + 1]] <- list(
304
+ kind = "table_image",
305
+ src = outfile,
306
+ download = outfile_pdf,
307
+ name = gene
308
+ )
309
+ }
310
+
311
+ add_report(
312
+ list(
313
+ kind = "descr",
314
+ content = paste0(
315
+ "Top 100 genes selected by ",
316
+ "<code>", case$method, "</code> across ",
317
+ "<code>", case$group_by, "</code> and filtered by ",
318
+ "<code>", html_escape(case$sigmarkers), "</code>"
319
+ )
320
+ ),
321
+ h1 = info$h1,
322
+ h2 = ifelse(info$h2 == "#", "Meta-Markers", info$h2),
323
+ h3 = ifelse(info$h2 == "#", "#", "Meta-Markers")
324
+ )
325
+ add_report(
326
+ list(
327
+ name = "Meta-Markers",
328
+ contents = list(list(
329
+ kind = "table",
330
+ src = file.path(info$casedir, "markers.txt"),
331
+ data = list(nrows = 100)
332
+ ))
333
+ ),
334
+ list(
335
+ name = "Volin Plots (Top 10)",
336
+ ui = "table_of_images:4",
337
+ contents = geneplots
338
+ ),
339
+ h1 = info$h1,
340
+ h2 = ifelse(info$h2 == "#", "Meta-Markers", info$h2),
341
+ h3 = ifelse(info$h2 == "#", "#", "Meta-Markers"),
342
+ ui = "tabs"
343
+ )
344
+ add_report(
345
+ list(kind = "enrichr", dir = info$casedir),
346
+ h1 = info$h1,
347
+ h2 = ifelse(info$h2 == "#", "Enrichment Analysis", info$h2),
348
+ h3 = ifelse(info$h2 == "#", "#", "Enrichment Analysis")
349
+ )
350
+ } else {
351
+ log_warn(" {msg}")
352
+ add_report(
353
+ list(kind = "error", content = msg),
354
+ h1 = info$h1,
355
+ h2 = info$h2
356
+ )
254
357
  }
255
358
  }
256
359
 
257
360
  sapply(sort(names(cases)), do_case)
361
+ save_report(joboutdir)
@@ -1,15 +1,19 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- library(Seurat)
1
+ library(rlang)
3
2
  library(dplyr)
3
+ library(Seurat)
4
+ library(biopipen.utils)
4
5
 
5
6
  sobjfile <- {{in.srtobj | r}}
6
7
  outfile <- {{out.rdsfile | r}}
7
8
  defaults <- {{envs.defaults | r}}
8
9
  modules <- {{envs.modules | r}}
10
+ post_mutaters <- {{envs.post_mutaters | r}}
11
+
12
+ log <- get_logger()
9
13
 
10
14
  # load seurat object
11
- print("Loading Seurat object ...")
12
- sobj <- readRDS(sobjfile)
15
+ log$info("Loading Seurat object ...")
16
+ sobj <- read_obj(sobjfile)
13
17
 
14
18
  aggs <- list(
15
19
  mean = mean,
@@ -27,7 +31,6 @@ for (key in names(modules)) {
27
31
  }
28
32
 
29
33
  module <- list_update(defaults, modules[[key]])
30
- module$object <- sobj
31
34
  if (is.null(module$features) || length(module$features) == 0) {
32
35
  stop(paste0("Module '", key, "' has no features"))
33
36
  }
@@ -36,21 +39,67 @@ for (key in names(modules)) {
36
39
  agg <- aggs[[module$agg]]
37
40
  module$keep <- NULL
38
41
  module$agg <- NULL
39
- print(paste0("Calculating module '", key, "' ..."))
42
+ log$info("Calculating module '{key}' ...")
40
43
  is_cc <- FALSE
41
- if (module$features == "cc.genes") {
44
+ if (!is.null(module$kind) && module$kind %in% c("diffmap", "diffusion_map")) {
45
+ library(destiny)
46
+ features <- module$features
47
+ if (is.null(features)) { features <- 2 }
48
+ if (is.null(module$verbose)) { module$verbose <- TRUE }
49
+ module$features <- NULL
50
+ module$kind <- NULL
51
+
52
+ if (!is.null(module$n_pcs)) {
53
+ log$info("- Using cell embeddings from PCA reduction ...")
54
+ module$data <- Embeddings(sobj, reduction = "pca")
55
+ if (module$n_pcs > ncol(module$data)) {
56
+ log$warn("- `n_pcs` ({module$n_pcs}) is larger than the number of PCs, using all {ncol(module$data)} PCs ...")
57
+ }
58
+ module$data <- module$data[, 1:min(module$n_pcs, ncol(module$data))]
59
+ module$n_pcs <- NULL
60
+ } else {
61
+ log$info("- Using assay data ...")
62
+ module$data <- GetAssayData(sobj, layer = "data")
63
+ }
64
+
65
+ log$info("- Calculating diffusion map ...")
66
+ dm <- do_call(DiffusionMap, module)
67
+ ev <- eigenvectors(dm)
68
+
69
+ log$info("- Creating DimReduc object ...")
70
+ sobj[[key]] <- CreateDimReducObject(
71
+ embeddings = data.matrix(as.data.frame(ev[, 1:features])),
72
+ key = paste0(key, "_")
73
+ )
74
+
75
+ # add to meta.data
76
+ log$info("- Adding to meta.data ...")
77
+ sobj <- AddMetaData(
78
+ sobj,
79
+ sobj[[key]]@cell.embeddings,
80
+ col.name = colnames(sobj[[key]]@cell.embeddings)
81
+ )
82
+
83
+ next
84
+ }
85
+
86
+ module$object <- sobj
87
+ if (length(module$features) == 1 && module$features == "cc.genes") {
42
88
  is_cc <- TRUE
43
89
  module$features <- NULL
44
90
  module$s.features <- cc.genes$s.genes
45
91
  module$g2m.features <- cc.genes$g2m.genes
46
- } else if (module$features == "cc.genes.updated.2019") {
92
+ } else if (length(module$features) == 1 && module$features == "cc.genes.updated.2019") {
47
93
  is_cc <- TRUE
48
94
  module$features <- NULL
49
95
  module$s.features <- cc.genes.updated.2019$s.genes
50
96
  module$g2m.features <- cc.genes.updated.2019$g2m.genes
51
97
  } else {
52
98
  module$name <- key
53
- module$features <- trimws(strsplit(module$features, ",")[[1]])
99
+ if (length(module$features) == 1) {
100
+ module$features <- trimws(strsplit(module$features, ",")[[1]])
101
+ }
102
+ module$features <- list(module$features)
54
103
  }
55
104
  if (isTRUE(is_cc)) {
56
105
  sobj <- do_call(CellCycleScoring, module)
@@ -87,6 +136,12 @@ for (key in names(modules)) {
87
136
  }
88
137
  }
89
138
 
139
+ if (!is.null(post_mutaters) && length(post_mutaters) > 0) {
140
+ log$info("Applying post mutaters ...")
141
+ sobj@meta.data <- sobj@meta.data %>%
142
+ mutate(!!!lapply(post_mutaters, parse_expr))
143
+ }
144
+
90
145
  # save seurat object
91
- print("Saving Seurat object ...")
92
- saveRDS(sobj, outfile)
146
+ log$info("Saving Seurat object ...")
147
+ save_obj(sobj, outfile)