biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -1,9 +1,10 @@
1
1
  """Metabolic landscape analysis for scRNA-seq data"""
2
+
2
3
  from __future__ import annotations
3
4
  from pathlib import Path
4
5
  from typing import Type
5
6
 
6
- from diot import Diot
7
+ from diot import Diot # type: ignore
7
8
  from datar.tibble import tibble
8
9
  from pipen.utils import mark
9
10
  from pipen_args import ProcGroup
@@ -13,7 +14,7 @@ from ..core.config import config
13
14
  from ..core.proc import Proc
14
15
 
15
16
 
16
- class _MetabolicPathwayActivity(Proc):
17
+ class MetabolicPathwayActivity(Proc):
17
18
  """This process calculates the pathway activities in different groups and subsets.
18
19
 
19
20
  The cells are first grouped by subsets and then the metabolic activities are
@@ -22,102 +23,108 @@ class _MetabolicPathwayActivity(Proc):
22
23
  For each subset, a heatmap and a violin plot will be generated.
23
24
  The heatmap shows the pathway activities for each group and each metabolic pathway
24
25
 
25
- ![MetabolicPathwayActivity_heatmap](https://pwwang.github.io/immunopipe/processes/images/MetabolicPathwayActivity_heatmap.png){: width="80%"}
26
+ ![MetabolicPathwayActivity_heatmap](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayActivity_heatmap.png){: width="80%"}
26
27
 
27
28
  The violin plot shows the distribution of the pathway activities for each group
28
29
 
29
- ![MetabolicPathwayActivity_violin](https://pwwang.github.io/immunopipe/processes/images/MetabolicPathwayActivity_violin.png){: width="45%"}
30
+ ![MetabolicPathwayActivity_violin](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayActivity_violin.png){: width="45%"}
31
+
32
+ You may also have a merged heatmap to show all subsets in one plot.
33
+
34
+ ![MetabolicPathwayActivity_merged_heatmap](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayActivity_merged_heatmap.png){: width="80%"}
35
+
36
+ Input:
37
+ sobjfile: The Seurat object file.
38
+ It should be loaded as a Seurat object
39
+
40
+ Output:
41
+ outdir: The output directory.
42
+ It will contain the pathway activity score files and plots.
30
43
 
31
44
  Envs:
32
- ntimes (type=int): Number of times to do the permutation
45
+ ntimes (type=int): Number of permutations to estimate the p-values
33
46
  ncores (type=int;pgarg): Number of cores to use for parallelization
34
47
  Defaults to `ScrnaMetabolicLandscape.ncores`
35
- heatmap_devpars (ns): Device parameters for the heatmap
36
- - width (type=int): Width of the heatmap
37
- - height (type=int): Height of the heatmap
38
- - res (type=int): Resolution of the heatmap
39
- violin_devpars (ns): Device parameters for the violin plot
40
- - width (type=int): Width of the violin plot
41
- - height (type=int): Height of the violin plot
42
- - res (type=int): Resolution of the violin plot
43
48
  gmtfile (pgarg): The GMT file with the metabolic pathways.
44
49
  Defaults to `ScrnaMetabolicLandscape.gmtfile`
45
- grouping (type=auto;pgarg;readonly): Defines the basic groups to
46
- investigate the metabolic activity, typically the clusters.
47
- Defaults to `ScrnaMetabolicLandscape.grouping`
48
- grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
49
- names. For example, if we have `grouping_prefix = "cluster"` and
50
- we have `1` and `2` in the `grouping` column, the groups
51
- will be named as `cluster_1` and `cluster_2`.
52
- Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
53
- subsetting (type=auto;pgarg;readonly): How do we subset the data. Other
54
- columns in the metadata to do comparisons. For example,
55
- `"TimePoint"` or `["TimePoint", "Response"]`.
56
- Defaults to `ScrnaMetabolicLandscape.subsetting`
57
- subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
58
- subset names.
59
- For example, if we have `subsetting_prefix = "timepoint"` and
60
- we have `pre` and `post` in the `subsetting` column, the subsets
61
- will be named as `timepoint_pre` and `timepoint_post`.
62
- If `subsetting` is a list, then this should also be a
63
- same-length list. If a single string is given, it will be
64
- repeated to a list with the same length as `subsetting`.
65
- Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
66
-
67
- Requires:
68
- r-scater:
69
- - check: {{proc.lang}} <(echo "library(scater)")
70
- r-reshape2:
71
- - check: {{proc.lang}} <(echo "library(reshape2)")
72
- r-rcolorbrewer:
73
- - check: {{proc.lang}} <(echo "library(RColorBrewer)")
74
- r-ggplot2:
75
- - check: {{proc.lang}} <(echo "library(ggplot2)")
76
- r-ggprism:
77
- - check: {{proc.lang}} <(echo "library(ggprism)")
78
- r-complexheatmap:
79
- - check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
80
- r-parallel:
81
- - check: {{proc.lang}} <(echo "library(parallel)")
50
+ subset_by (pgarg;readonly): Subset the data by the given column in the
51
+ metadata. For example, `Response`.
52
+ `NA` values will be removed in this column.
53
+ Defaults to `ScrnaMetabolicLandscape.subset_by`
54
+ If None, the data will not be subsetted.
55
+ group_by (pgarg;readonly): Group the data by the given column in the
56
+ metadata. For example, `cluster`.
57
+ Defaults to `ScrnaMetabolicLandscape.group_by`
58
+ plots (type=json): The plots to generate.
59
+ Names will be used as the prefix for the output files. Values will be
60
+ a dictionary with the following keys:
61
+ * `plot_type` is the type of plot to generate. One of `heatmap`,
62
+ `box`, `violin` or `merged_heatmap` (all subsets in one plot).
63
+ * `devpars` is a dictionary with the device parameters for the plot.
64
+ * Other arguments for `plotthis::Heatmap()`, `plotthis::BoxPlot()`
65
+ or `plotthis::ViolinPlot()`, depending on the `plot_type`.
66
+ cases (type=json): Multiple cases for the analysis.
67
+ If you only have one case, you can specify the parameters directly to
68
+ `envs.ntimes`, `envs.subset_by`, `envs.group_by`, `envs.group1`,
69
+ `envs.group2`, and `envs.plots`. The name of the case will be
70
+ `envs.subset_by`.
71
+ If you have multiple cases, you can specify the parameters for each case
72
+ in a dictionary. The keys will be the names of the cases and the values
73
+ will be dictionaries with the parameters for each case, where the values
74
+ will be inherited from `envs.ntimes`, `envs.subset_by`, `envs.group_by`,
75
+ `envs.group1`, `envs.group2`, and `envs.plots`.
82
76
  """ # noqa: E501
77
+
83
78
  input = "sobjfile:file"
84
79
  output = "outdir:dir:{{in.sobjfile | stem}}.pathwayactivity"
85
80
  envs = {
86
81
  "ntimes": 5000,
87
82
  "ncores": config.misc.ncores,
88
- "heatmap_devpars": {},
89
- "violin_devpars": {},
90
83
  "gmtfile": None,
91
- "grouping": None,
92
- "grouping_prefix": "",
93
- "subsetting": None,
94
- "subsetting_prefix": "",
84
+ "subset_by": None,
85
+ "group_by": None,
86
+ "plots": {
87
+ "Pathway Activity (violin plot)": {
88
+ "plot_type": "violin",
89
+ "add_box": True,
90
+ "devpars": {"res": 100},
91
+ },
92
+ "Pathway Activity (heatmap)": {
93
+ "plot_type": "heatmap",
94
+ "devpars": {"res": 100},
95
+ },
96
+ },
97
+ "cases": {},
95
98
  }
96
99
  lang = config.lang.rscript
97
100
  script = (
98
- "file://../scripts/"
99
- "scrna_metabolic_landscape/MetabolicPathwayActivity.R"
101
+ "file://../scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R"
100
102
  )
101
103
  plugin_opts = {
102
- "report": (
103
- "file://../reports/"
104
- "scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
105
- )
104
+ "report":
105
+ "file://../reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
106
106
  }
107
107
 
108
108
 
109
- class _MetabolicFeatures(Proc):
109
+ class MetabolicFeatures(Proc):
110
110
  """This process performs enrichment analysis for the metabolic pathways
111
111
  for each group in each subset.
112
112
 
113
113
  The enrichment analysis is done with [`fgsea`](https://bioconductor.org/packages/release/bioc/html/fgsea.html)
114
114
  package or the [`GSEA_R`](https://github.com/GSEA-MSigDB/GSEA_R) package.
115
115
 
116
+ Input:
117
+ sobjfile: The Seurat object file in rds.
118
+ It should be loaded as a Seurat object
119
+
120
+ Output:
121
+ outdir: The output directory.
122
+ It will contain the GSEA results and plots.
123
+
116
124
  Envs:
117
- ncores (type=int;pgarg): Number of cores to use for parallelization.
118
- Defaults to `ScrnaMetabolicLandscape.ncores`
119
- fgsea (flag): Whether to do fast gsea analysis using `fgsea` package.
120
- If `False`, the `GSEA_R` package will be used.
125
+ ncores (type=int;pgarg): Number of cores to use for parallelization for
126
+ the comparisons for each subset and group.
127
+ Defaults to `ScrnaMetabolicLandscape.ncores`.
121
128
  prerank_method (choice): Method to use for gene preranking.
122
129
  Signal to noise: the larger the differences of the means
123
130
  (scaled by the standard deviations); that is, the more distinct
@@ -143,148 +150,85 @@ class _MetabolicFeatures(Proc):
143
150
  - ratio_of_classes: Also referred to as fold change
144
151
  - diff_of_classes: Difference of class means
145
152
  - log2_ratio_of_classes: Log2 ratio of class means
146
- top (type=int): N top of enriched pathways to show
147
153
  gmtfile (pgarg): The GMT file with the metabolic pathways.
148
154
  Defaults to `ScrnaMetabolicLandscape.gmtfile`
149
- grouping (type=auto;pgarg;readonly): Defines the basic groups to
150
- investigate the metabolic activity.
151
- Defaults to `ScrnaMetabolicLandscape.grouping`
152
- grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to
153
- group names.
154
- Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
155
- subsetting (type=auto;pgarg;readonly): How do we subset the data.
156
- Another column(s) in the metadata.
157
- Defaults to `ScrnaMetabolicLandscape.subsetting`
158
- subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
159
- subset names.
160
- Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
161
-
162
- Requires:
163
- r-parallel:
164
- - check: {{proc.lang}} <(echo "library(parallel)")
165
- r-fgsea:
166
- - check: {{proc.lang}} <(echo "library(fgsea)")
155
+ subset_by (pgarg;readonly): Subset the data by the given column in the
156
+ metadata. For example, `Response`.
157
+ `NA` values will be removed in this column.
158
+ Defaults to `ScrnaMetabolicLandscape.subset_by`
159
+ If None, the data will not be subsetted.
160
+ group_by (pgarg;readonly): Group the data by the given column in the
161
+ metadata. For example, `cluster`.
162
+ Defaults to `ScrnaMetabolicLandscape.group_by`
163
+ comparisons (type=list): The comparison groups to use for the analysis.
164
+ If not provided, each group in the `group_by` column will be used
165
+ to compare with the other groups.
166
+ If a single group is provided as an element, it will be used to
167
+ compare with all the other groups.
168
+ For example, if we have `group_by = "cluster"` and we have
169
+ `1`, `2` and `3` in the `group_by` column, we could have
170
+ `comparisons = ["1", "2"]`, which will compare the group `1` with groups
171
+ `2` and `3`, and the group `2` with groups `1` and `3`. We could also
172
+ have `comparisons = ["1:2", "1:3"]`, which will compare the group `1` with
173
+ group `2` and group `1` with group `3`.
174
+ fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
175
+ For example, `{"minSize": 15, "maxSize": 500}`.
176
+ See <https://rdrr.io/bioc/fgsea/man/fgsea.html> for more details.
177
+ plots (type=json): The plots to generate.
178
+ Names will be used as the title for the plot. Values will be the arguments
179
+ passed to `biopipen.utils::VizGSEA()` function.
180
+ See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
181
+ A key `level` is supported to specify the level of the plot.
182
+ Possible values are `case`, which includes all subsets and groups in the
183
+ case; `subset`, which includes all groups in the subset; otherwise, it
184
+ will plot for the groups.
185
+ For `case`/`subset` level plots, current `plot_type` only "dot" is supported
186
+ for now, then the values will be passed to `plotthis::DotPlot()`
187
+ cases (type=json): Multiple cases for the analysis.
188
+ If you only have one case, you can specify the parameters directly to
189
+ `envs.prerank_method`, `envs.subset_by`, `envs.group_by`,
190
+ `envs.comparisons`, `envs.fgsea_args` and `envs.plots`.
191
+ The name of this default case will be `envs.subset_by`.
192
+ If you have multiple cases, you can specify the parameters for each case
193
+ in a dictionary. The keys will be the names of the cases and the values
194
+ will be dictionaries with the parameters for each case, where the values
195
+ will be inherited from `envs.prerank_method`,
196
+ `envs.subset_by`, `envs.group_by`, `envs.comparisons`, `envs.fgsea_args`
197
+ and `envs.plots`.
167
198
  """ # noqa: E501
199
+
168
200
  input = "sobjfile:file"
169
201
  output = "outdir:dir:{{in.sobjfile | stem}}.pathwayfeatures"
170
202
  lang = config.lang.rscript
171
203
  envs = {
172
204
  "ncores": config.misc.ncores,
173
- "fgsea": True,
174
205
  "prerank_method": "signal_to_noise",
175
- "top": 10,
176
- "gmtfile": None,
177
- "grouping": None,
178
- "grouping_prefix": "",
179
- "subsetting": None,
180
- "subsetting_prefix": "",
181
- }
182
- script = (
183
- "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
184
- )
185
- plugin_opts = {
186
- "report": (
187
- "file://../reports/"
188
- "scrna_metabolic_landscape/MetabolicFeatures.svelte"
189
- )
190
- }
191
-
192
-
193
- class _MetabolicFeaturesIntraSubset(Proc):
194
- """Intra-subset metabolic features - Enrichment analysis in details
195
-
196
- Similar to the [`MetabolicFeatures`](./MetabolicFeatures.md) process,
197
- this process performs enrichment analysis for the metabolic pathways for
198
- each subset in each group, instead of each group in each subset.
199
-
200
- See also: [`MetabolicFeatures`](./MetabolicFeatures.md)
201
-
202
- Envs:
203
- ncores (type=int; pgarg): Number of cores to use for parallelization
204
- Defaults to `ScrnaMetabolicLandscape.ncores`
205
- fgsea (flag): Whether to do fast gsea analysis
206
- prerank_method (choice): Method to use for gene preranking
207
- Signal to noise: the larger the differences of the means
208
- (scaled by the standard deviations); that is, the more distinct
209
- the gene expression is in each phenotype and the more the gene
210
- acts as a “class marker.”.
211
- Absolute signal to noise: the absolute value of the signal to
212
- noise.
213
- T test: Uses the difference of means scaled by the standard
214
- deviation and number of samples.
215
- Ratio of classes: Uses the ratio of class means to calculate
216
- fold change for natural scale data.
217
- Diff of classes: Uses the difference of class means to calculate
218
- fold change for nature scale data
219
- Log2 ratio of classes: Uses the log2 ratio of class means to
220
- calculate fold change for natural scale data. This is the
221
- recommended statistic for calculating fold change for log scale
222
- data.
223
- - signal_to_noise: Signal to noise
224
- - s2n: Alias of signal_to_noise
225
- - abs_signal_to_noise: absolute signal to noise
226
- - abs_s2n: Alias of abs_signal_to_noise
227
- - t_test: T test
228
- - ratio_of_classes: Also referred to as fold change
229
- - diff_of_classes: Difference of class means
230
- - log2_ratio_of_classes: Log2 ratio of class means
231
- top (type=int): N top of enriched pathways to show
232
- gmtfile (pgarg): The GMT file with the metabolic pathways.
233
- Defaults to `ScrnaMetabolicLandscape.gmtfile`
234
- grouping (type=auto;pgarg;readonly): Defines the basic groups to
235
- investigate the metabolic activity.
236
- Defaults to `ScrnaMetabolicLandscape.grouping`
237
- grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
238
- names.
239
- Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
240
- subsetting (type=auto;pgarg;readonly): How do we subset the data.
241
- Another column(s) in the metadata.
242
- Defaults to `ScrnaMetabolicLandscape.subsetting`
243
- subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
244
- subset names.
245
- Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
246
- subsetting_comparison (type=json;pgarg;readonly): How do we compare the
247
- subsets.
248
- Defaults to `ScrnaMetabolicLandscape.subsetting_comparison`
249
-
250
- Requires:
251
- r-parallel:
252
- - check: {{proc.lang}} <(echo "library(parallel)")
253
- r-scater:
254
- - check: {{proc.lang}} <(echo "library(scater)")
255
- r-fgsea:
256
- - check: {{proc.lang}} <(echo "library(fgsea)")
257
- """
258
- input = "sobjfile:file"
259
- output = (
260
- "outdir:dir:{{in.sobjfile | stem}}.intra-subset-pathwayfeatures"
261
- )
262
- lang = config.lang.rscript
263
- envs = {
264
- "ncores": config.misc.ncores,
265
206
  "gmtfile": None,
266
- "fgsea": True,
267
- "prerank_method": "signal_to_noise",
268
- "top": 10,
269
- "grouping": None,
270
- "grouping_prefix": "",
271
- "subsetting": None,
272
- "subsetting_prefix": "",
273
- "subsetting_comparison": {},
207
+ "subset_by": None,
208
+ "group_by": None,
209
+ "comparisons": [],
210
+ "fgsea_args": {},
211
+ "plots": {
212
+ "Summary Plot": {
213
+ "plot_type": "summary",
214
+ "top_term": 10,
215
+ "devpars": {"res": 100},
216
+ },
217
+ "Enrichment Plots": {
218
+ "plot_type": "gsea",
219
+ "top_term": 10,
220
+ "devpars": {"res": 100},
221
+ },
222
+ },
223
+ "cases": {},
274
224
  }
275
- script = (
276
- "file://../scripts/scrna_metabolic_landscape/"
277
- "MetabolicFeaturesIntraSubset.R"
278
- )
225
+ script = "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
279
226
  plugin_opts = {
280
- "report": (
281
- "file://../reports/scrna_metabolic_landscape/"
282
- "MetabolicFeaturesIntraSubset.svelte"
283
- )
227
+ "report": "file://../reports/scrna_metabolic_landscape/MetabolicFeatures.svelte"
284
228
  }
285
229
 
286
230
 
287
- class _MetabolicPathwayHeterogeneity(Proc):
231
+ class MetabolicPathwayHeterogeneity(Proc):
288
232
  """Calculate Metabolic Pathway heterogeneity.
289
233
 
290
234
  For each subset, the normalized enrichment score (NES) of each metabolic pathway
@@ -296,8 +240,7 @@ class _MetabolicPathwayHeterogeneity(Proc):
296
240
  The heterogeneity can be reflected by the NES values and the p-values in
297
241
  different groups for the metabolic pathways.
298
242
 
299
- ![MetabolicPathwayHeterogeneity](https://pwwang.github.io/immunopipe/processes/images/MetabolicPathwayHeterogeneity.png)
300
-
243
+ ![MetabolicPathwayHeterogeneity](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayHeterogeneity.png)
301
244
 
302
245
  Envs:
303
246
  gmtfile (pgarg): The GMT file with the metabolic pathways.
@@ -307,43 +250,33 @@ class _MetabolicPathwayHeterogeneity(Proc):
307
250
  the enriched pathways
308
251
  ncores (type=int;pgarg): Number of cores to use for parallelization
309
252
  Defaults to `ScrnaMetabolicLandscape.ncores`
310
- bubble_devpars (ns): The devpars for the bubble plot
311
- - width (type=int): The width of the plot
312
- - height (type=int): The height of the plot
313
- - res (type=int): The resolution of the plot
314
- grouping (type=auto;pgarg;readonly): Defines the basic groups to
315
- investigate the metabolic activity.
316
- Defaults to `ScrnaMetabolicLandscape.grouping`
317
- grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
318
- names.
319
- Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
320
- subsetting (type=auto;pgarg;readonly): How do we subset the data.
321
- Another column(s) in the metadata.
322
- Defaults to `ScrnaMetabolicLandscape.subsetting`
323
- subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
324
- subset names.
325
- Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
326
-
327
- Requires:
328
- r-gtools:
329
- - check: {{proc.lang}} <(echo "library(gtools)")
330
- r-ggplot2:
331
- - check: {{proc.lang}} <(echo "library(ggplot2)")
332
- r-ggprism:
333
- - check: {{proc.lang}} <(echo "library(ggprism)")
334
- r-parallel:
335
- - check: {{proc.lang}} <(echo "library(parallel)")
336
- r-dplyr:
337
- - check: {{proc.lang}} <(echo "library(dplyr)")
338
- r-tibble:
339
- - check: {{proc.lang}} <(echo "library(tibble)")
340
- r-enrichr:
341
- - check: {{proc.lang}} <(echo "library(enrichR)")
342
- r-data.table:
343
- - check: {{proc.lang}} <(echo "library(data.table)")
344
- r-fgsea:
345
- - check: {{proc.lang}} <(echo "library(fgsea)")
253
+ subset_by (pgarg;readonly): Subset the data by the given column in the
254
+ metadata. For example, `Response`.
255
+ `NA` values will be removed in this column.
256
+ Defaults to `ScrnaMetabolicLandscape.subset_by`
257
+ If None, the data will not be subsetted.
258
+ group_by (pgarg;readonly): Group the data by the given column in the
259
+ metadata. For example, `cluster`.
260
+ Defaults to `ScrnaMetabolicLandscape.group_by`
261
+ fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
262
+ For example, `{"minSize": 15, "maxSize": 500}`.
263
+ See <https://rdrr.io/bioc/fgsea/man/fgsea.html> for more details.
264
+ plots (type=json): The plots to generate.
265
+ Names will be used as the title for the plot. Values will be the arguments
266
+ passed to `biopipen.utils::VizGSEA()` function.
267
+ See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
268
+ cases (type=json): Multiple cases for the analysis.
269
+ If you only have one case, you can specify the parameters directly to
270
+ `envs.subset_by`, `envs.group_by`, `envs.fgsea_args`, `envs.plots`,
271
+ `envs.select_pcs`, and `envs.pathway_pval_cutoff`.
272
+ The name of this default case will be `envs.subset_by`.
273
+ If you have multiple cases, you can specify the parameters for each case
274
+ in a dictionary. The keys will be the names of the cases and the values
275
+ will be dictionaries with the parameters for each case, where the values
276
+ will be inherited from `envs.subset_by`, `envs.group_by`, `envs.fgsea_args`,
277
+ `envs.plots`, `envs.select_pcs`, and `envs.pathway_pval_cutoff`.
346
278
  """ # noqa: E501
279
+
347
280
  input = "sobjfile:file"
348
281
  output = "outdir:dir:{{in.sobjfile | stem}}.pathwayhetero"
349
282
  lang = config.lang.rscript
@@ -352,11 +285,16 @@ class _MetabolicPathwayHeterogeneity(Proc):
352
285
  "select_pcs": 0.8,
353
286
  "pathway_pval_cutoff": 0.01,
354
287
  "ncores": config.misc.ncores,
355
- "bubble_devpars": {},
356
- "grouping": None,
357
- "grouping_prefix": "",
358
- "subsetting": None,
359
- "subsetting_prefix": "",
288
+ "subset_by": None,
289
+ "group_by": None,
290
+ "fgsea_args": {"scoreType": "std", "nproc": 1},
291
+ "plots": {
292
+ "Pathway Heterogeneity": {
293
+ "plot_type": "dot",
294
+ "devpars": {"res": 100},
295
+ },
296
+ },
297
+ "cases": {},
360
298
  }
361
299
  script = (
362
300
  "file://../scripts/scrna_metabolic_landscape/"
@@ -401,46 +339,19 @@ class ScrnaMetabolicLandscape(ProcGroup):
401
339
  dependent on other processes, this option will be used to determine
402
340
  whether the input is a seurat object or not.
403
341
  noimpute (flag): Whether to do imputation for the dropouts.
404
- If False, the values will be left as is.
342
+ If True, the values will be left as is.
405
343
  gmtfile: The GMT file with the metabolic pathways. The gene names should
406
344
  match the gene names in the gene list in RNAData or
407
- the Seurat object
408
- grouping: defines the basic groups to investigate the metabolic activity
409
- Typically the clusters.
410
- grouping_prefix: Working as a prefix to group names
411
- For example, if we have `grouping_prefix = "cluster"` and
412
- we have `1` and `2` in the `grouping` column, the groups
413
- will be named as `cluster_1` and `cluster_2`
414
- subsetting (type=auto): How do we subset the data. Other columns in the
415
- metadata to do comparisons. For example, `"TimePoint"` or
416
- `["TimePoint", "Response"]`
417
- subsetting_prefix (type=auto): Working as a prefix to subset names
418
- For example, if we have `subsetting_prefix = "timepoint"` and
419
- we have `pre` and `post` in the `subsetting` column, the subsets
420
- will be named as `timepoint_pre` and `timepoint_post`
421
- If `subsetting` is a list, then this should also be a same-length
422
- list. If a single string is given, it will be repeated to a list
423
- with the same length as `subsetting`
424
- subsetting_comparison (type=json): What kind of comparisons are we
425
- doing to compare cells from different subsets.
426
- It should be dict with keys as the names of the comparisons and
427
- values as the 2 comparison groups from the `subsetting` column.
428
- For example, if we have `pre` and `post` in the `subsetting` column,
429
- we could have
430
- `subsetting_comparison = {"pre_vs_post": ["post", "pre"]}`
431
- The second group will be the control group in the comparison.
432
- If we also have `1`, `2` and `3` in the `grouping` column,
433
- by default, the comparisons are done within each subset for
434
- each group. For example, for group `1`, groups `2` and `3`
435
- will be used as control, and for group `2`, groups `1` and `3`
436
- will be used as control, and for group `3`, groups `1` and `2`
437
- will be used as control. It is similar to `Seurat::FindMarkers`
438
- procedure. With this option, the comparisons are also done to
439
- compare cells from different subsets within each group. With the
440
- example above, we will have `pre_vs_post` comparisons within
441
- each group.
442
- If `subsetting` is a list, this must be a list of dicts with the
443
- same length.
345
+ the Seurat object.
346
+ You can also provide a URL to the GMT file.
347
+ For example, from
348
+ <https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/>.
349
+ subset_by (pgarg;readonly): Subset the data by the given column in the
350
+ metadata. For example, `Response`.
351
+ `NA` values will be removed in this column.
352
+ If None, the data will not be subsetted.
353
+ group_by (pgarg;readonly): Group the data by the given column in the
354
+ metadata. For example, `cluster`.
444
355
  mutaters (type=json): Add new columns to the metadata for
445
356
  grouping/subsetting.
446
357
  They are passed to `sobj@meta.data |> mutate(...)`. For example,
@@ -450,65 +361,25 @@ class ScrnaMetabolicLandscape(ProcGroup):
450
361
  ncores (type=int): Number of cores to use for parallelization for
451
362
  each process
452
363
  """
364
+
453
365
  DEFAULTS = Diot(
454
366
  metafile=None,
455
367
  is_seurat=None,
456
368
  gmtfile=None,
457
- grouping=None,
458
- grouping_prefix="",
459
- subsetting=None,
460
- subsetting_prefix=None,
461
- subsetting_comparison={},
462
369
  mutaters=None,
463
- noimpute=False,
370
+ noimpute=True,
464
371
  ncores=config.misc.ncores,
372
+ subset_by=None,
373
+ group_by=None,
465
374
  )
466
375
 
467
376
  def post_init(self):
468
377
  """Load runtime processes"""
469
378
  if self.opts.metafile:
470
379
  suffix = Path(self.opts.metafile).suffix
471
- self.opts.is_seurat = suffix in (".rds", ".RDS")
472
-
473
- # Make sure the grouping is a list
474
- if self.opts.subsetting and not isinstance(self.opts.subsetting, list):
475
- self.opts.subsetting = [self.opts.subsetting]
476
-
477
- # Make sure the grouping is a list with the same length as subsetting
478
- if (
479
- self.opts.subsetting
480
- and not isinstance(self.opts.subsetting_prefix, list)
481
- ):
482
- self.opts.subsetting_prefix = [
483
- self.opts.subsetting_prefix
484
- ] * len(self.opts.subsetting)
485
-
486
- # Make sure the lengths of subsetting and subsetting_comparison the same
487
- if self.opts.subsetting:
488
- if len(self.opts.subsetting) == 1 and isinstance(
489
- self.opts.subsetting_comparison, dict
490
- ):
491
- self.opts.subsetting_comparison = [
492
- self.opts.subsetting_comparison
493
- ]
494
-
495
- if len(self.opts.subsetting) > 1 and not isinstance(
496
- self.opts.subsetting_comparison, list
497
- ):
498
- raise ValueError(
499
- "The length of `subsetting` is larger than 1, "
500
- "but `subsetting_comparison` is not a list of dicts."
501
- )
502
-
503
- if len(self.opts.subsetting) != len(
504
- self.opts.subsetting_comparison
505
- ):
506
- raise ValueError(
507
- "The length of `subsetting` and `subsetting_comparison` "
508
- "are not the same"
509
- )
510
-
511
- @ProcGroup.add_proc
380
+ self.opts.is_seurat = suffix in (".rds", ".RDS", ".qs", ".qs2")
381
+
382
+ @ProcGroup.add_proc # type: ignore
512
383
  def p_input(self) -> Type[Proc]:
513
384
  """Build MetabolicInputs process"""
514
385
  from .misc import File2Proc
@@ -526,8 +397,8 @@ class ScrnaMetabolicLandscape(ProcGroup):
526
397
 
527
398
  return MetabolicInput
528
399
 
529
- @ProcGroup.add_proc
530
- def p_preparing(self) -> Type[Proc]:
400
+ @ProcGroup.add_proc # type: ignore
401
+ def p_preparing(self) -> Type[Proc] | None:
531
402
  """Build SeuratPreparing process"""
532
403
  if self.opts.is_seurat:
533
404
  return None
@@ -539,11 +410,11 @@ class ScrnaMetabolicLandscape(ProcGroup):
539
410
 
540
411
  return MetabolicSeuratPreparing
541
412
 
542
- @ProcGroup.add_proc
413
+ @ProcGroup.add_proc # type: ignore
543
414
  def p_clustering(self) -> Type[Proc]:
544
415
  """Build SeuratClustering process"""
545
416
  if self.opts.is_seurat:
546
- return self.p_input
417
+ return self.p_input # type: ignore
547
418
 
548
419
  from .scrna import SeuratClustering
549
420
 
@@ -552,11 +423,11 @@ class ScrnaMetabolicLandscape(ProcGroup):
552
423
 
553
424
  return MetabolicSeuratClustering
554
425
 
555
- @ProcGroup.add_proc
426
+ @ProcGroup.add_proc # type: ignore
556
427
  def p_mutater(self) -> Type[Proc]:
557
428
  """Build SeuratMetadataMutater process"""
558
429
  if not self.opts.mutaters:
559
- return self.p_clustering
430
+ return self.p_clustering # type: ignore
560
431
 
561
432
  from .scrna import SeuratMetadataMutater
562
433
 
@@ -565,102 +436,77 @@ class ScrnaMetabolicLandscape(ProcGroup):
565
436
  input_data = lambda ch: tibble(
566
437
  srtobj=ch.iloc[:, 0],
567
438
  metafile=[None],
568
- mutaters=[self.opts.mutaters],
569
439
  )
440
+ envs = {"mutaters": self.opts.mutaters}
570
441
 
571
442
  return MetabolicSeuratMetadataMutater
572
443
 
573
- @ProcGroup.add_proc
444
+ @ProcGroup.add_proc # type: ignore
574
445
  def p_expr_impute(self) -> Type[Proc]:
575
446
  """Build process"""
576
447
  if self.opts.noimpute:
577
- return self.p_mutater
448
+ return self.p_mutater # type: ignore
578
449
 
579
- from .scrna import ExprImpution
450
+ from .scrna import ExprImputation
580
451
 
581
- @annotate.format_doc(indent=3)
582
- class MetabolicExprImpution(ExprImpution):
452
+ @annotate.format_doc(indent=3) # type: ignore
453
+ class MetabolicExprImputation(ExprImputation):
583
454
  """{{Summary}}
584
455
 
585
456
  You can turn off the imputation by setting the `noimpute` option
586
457
  of the process group to `True`.
587
458
  """
459
+
588
460
  requires = self.p_mutater
589
461
 
590
- return MetabolicExprImpution
462
+ return MetabolicExprImputation
591
463
 
592
- @ProcGroup.add_proc
464
+ @ProcGroup.add_proc # type: ignore
593
465
  def p_pathway_activity(self) -> Type[Proc]:
594
466
  """Build MetabolicPathwayActivity process"""
595
- return Proc.from_proc(
596
- _MetabolicPathwayActivity,
467
+ return Proc.from_proc( # type: ignore
468
+ MetabolicPathwayActivity,
597
469
  "MetabolicPathwayActivity",
598
- requires=self.p_expr_impute,
470
+ requires=self.p_expr_impute, # type: ignore
599
471
  order=-1,
472
+ envs_depth=5,
600
473
  envs={
601
474
  "ncores": self.opts.ncores,
602
475
  "gmtfile": self.opts.gmtfile,
603
- "grouping": self.opts.grouping,
604
- "grouping_prefix": self.opts.grouping_prefix,
605
- "subsetting": self.opts.subsetting,
606
- "subsetting_prefix": self.opts.subsetting_prefix,
476
+ "group_by": self.opts.group_by,
477
+ "subset_by": self.opts.subset_by,
607
478
  },
608
479
  )
609
480
 
610
- @ProcGroup.add_proc
481
+ @ProcGroup.add_proc # type: ignore
611
482
  def p_pathway_heterogeneity(self) -> Type[Proc]:
612
483
  """Build MetabolicPathwayHeterogeneity process"""
613
- return Proc.from_proc(
614
- _MetabolicPathwayHeterogeneity,
484
+ return Proc.from_proc( # type: ignore
485
+ MetabolicPathwayHeterogeneity,
615
486
  "MetabolicPathwayHeterogeneity",
616
- requires=self.p_expr_impute,
487
+ requires=self.p_mutater, # type: ignore
488
+ envs_depth=5,
617
489
  envs={
618
490
  "ncores": self.opts.ncores,
619
491
  "gmtfile": self.opts.gmtfile,
620
- "grouping": self.opts.grouping,
621
- "grouping_prefix": self.opts.grouping_prefix,
622
- "subsetting": self.opts.subsetting,
623
- "subsetting_prefix": self.opts.subsetting_prefix,
492
+ "group_by": self.opts.group_by,
493
+ "subset_by": self.opts.subset_by,
624
494
  },
625
495
  )
626
496
 
627
- @ProcGroup.add_proc
497
+ @ProcGroup.add_proc # type: ignore
628
498
  def p_features(self) -> Type[Proc]:
629
499
  """Build MetabolicFeatures process"""
630
- return Proc.from_proc(
631
- _MetabolicFeatures,
500
+ return Proc.from_proc( # type: ignore
501
+ MetabolicFeatures,
632
502
  "MetabolicFeatures",
633
- requires=self.p_expr_impute,
634
- envs={
635
- "ncores": self.opts.ncores,
636
- "gmtfile": self.opts.gmtfile,
637
- "grouping": self.opts.grouping,
638
- "grouping_prefix": self.opts.grouping_prefix,
639
- "subsetting": self.opts.subsetting,
640
- "subsetting_prefix": self.opts.subsetting_prefix,
641
- },
642
- )
643
-
644
- @ProcGroup.add_proc
645
- def p_features_intra_subset(self) -> Type[Proc]:
646
- """Build MetabolicFeaturesIntraSubset process"""
647
- if self.opts.subsetting_comparison and not self.opts.subsetting:
648
- raise ValueError(
649
- "Cannot use `subsetting_comparison` without `subsetting`."
650
- )
651
-
652
- return Proc.from_proc(
653
- _MetabolicFeaturesIntraSubset,
654
- "MetabolicFeaturesIntraSubset",
655
- requires=self.p_expr_impute,
503
+ requires=self.p_expr_impute, # type: ignore
504
+ envs_depth=5,
656
505
  envs={
657
506
  "ncores": self.opts.ncores,
658
507
  "gmtfile": self.opts.gmtfile,
659
- "grouping": self.opts.grouping,
660
- "grouping_prefix": self.opts.grouping_prefix,
661
- "subsetting": self.opts.subsetting,
662
- "subsetting_prefix": self.opts.subsetting_prefix,
663
- "subsetting_comparison": self.opts.subsetting_comparison,
508
+ "group_by": self.opts.group_by,
509
+ "subset_by": self.opts.subset_by,
664
510
  },
665
511
  )
666
512