biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +328 -292
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +481 -215
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +231 -76
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +6 -5
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/ScFGSEA.svelte +0 -16
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0
@@ -1,9 +1,10 @@
1
1
  """Metabolic landscape analysis for scRNA-seq data"""
2
+
2
3
  from __future__ import annotations
3
4
  from pathlib import Path
4
5
  from typing import Type
5
6
 
6
- from diot import Diot
7
+ from diot import Diot # type: ignore
7
8
  from datar.tibble import tibble
8
9
  from pipen.utils import mark
9
10
  from pipen_args import ProcGroup
@@ -28,81 +29,76 @@ class MetabolicPathwayActivity(Proc):
28
29
 
29
30
  ![MetabolicPathwayActivity_violin](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayActivity_violin.png){: width="45%"}
30
31
 
32
+ Input:
33
+ sobjfile: The Seurat object file.
34
+ It should be loaded as a Seurat object
35
+
36
+ Output:
37
+ outdir: The output directory.
38
+ It will contain the pathway activity score files and plots.
39
+
31
40
  Envs:
32
- ntimes (type=int): Number of times to do the permutation
41
+ ntimes (type=int): Number of permutations to estimate the p-values
33
42
  ncores (type=int;pgarg): Number of cores to use for parallelization
34
43
  Defaults to `ScrnaMetabolicLandscape.ncores`
35
- heatmap_devpars (ns): Device parameters for the heatmap
36
- - width (type=int): Width of the heatmap
37
- - height (type=int): Height of the heatmap
38
- - res (type=int): Resolution of the heatmap
39
- violin_devpars (ns): Device parameters for the violin plot
40
- - width (type=int): Width of the violin plot
41
- - height (type=int): Height of the violin plot
42
- - res (type=int): Resolution of the violin plot
43
44
  gmtfile (pgarg): The GMT file with the metabolic pathways.
44
45
  Defaults to `ScrnaMetabolicLandscape.gmtfile`
45
- grouping (type=auto;pgarg;readonly): Defines the basic groups to
46
- investigate the metabolic activity, typically the clusters.
47
- Defaults to `ScrnaMetabolicLandscape.grouping`
48
- grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
49
- names. For example, if we have `grouping_prefix = "cluster"` and
50
- we have `1` and `2` in the `grouping` column, the groups
51
- will be named as `cluster_1` and `cluster_2`.
52
- Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
53
- subsetting (type=auto;pgarg;readonly): How do we subset the data. Other
54
- columns in the metadata to do comparisons. For example,
55
- `"TimePoint"` or `["TimePoint", "Response"]`.
56
- Defaults to `ScrnaMetabolicLandscape.subsetting`
57
- subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
58
- subset names.
59
- For example, if we have `subsetting_prefix = "timepoint"` and
60
- we have `pre` and `post` in the `subsetting` column, the subsets
61
- will be named as `timepoint_pre` and `timepoint_post`.
62
- If `subsetting` is a list, then this should also be a
63
- same-length list. If a single string is given, it will be
64
- repeated to a list with the same length as `subsetting`.
65
- Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
66
-
67
- Requires:
68
- r-scater:
69
- - check: {{proc.lang}} <(echo "library(scater)")
70
- r-reshape2:
71
- - check: {{proc.lang}} <(echo "library(reshape2)")
72
- r-rcolorbrewer:
73
- - check: {{proc.lang}} <(echo "library(RColorBrewer)")
74
- r-ggplot2:
75
- - check: {{proc.lang}} <(echo "library(ggplot2)")
76
- r-ggprism:
77
- - check: {{proc.lang}} <(echo "library(ggprism)")
78
- r-complexheatmap:
79
- - check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
80
- r-parallel:
81
- - check: {{proc.lang}} <(echo "library(parallel)")
46
+ subset_by (pgarg;readonly): Subset the data by the given column in the
47
+ metadata. For example, `Response`.
48
+ `NA` values will be removed in this column.
49
+ Defaults to `ScrnaMetabolicLandscape.subset_by`
50
+ If None, the data will not be subsetted.
51
+ group_by (pgarg;readonly): Group the data by the given column in the
52
+ metadata. For example, `cluster`.
53
+ Defaults to `ScrnaMetabolicLandscape.group_by`
54
+ plots (type=json): The plots to generate.
55
+ Names will be used as the prefix for the output files. Values will be
56
+ a dictionary with the following keys:
57
+ * `plot_type` is the type of plot to generate. One of `heatmap`,
58
+ `box`, `violin` or `merged_heatmap` (all subsets in one plot).
59
+ * `devpars` is a dictionary with the device parameters for the plot.
60
+ * Other arguments for `plotthis::Heatmap()`, `plotthis::BoxPlot()`
61
+ or `plotthis::ViolinPlot()`, depending on the `plot_type`.
62
+ cases (type=json): Multiple cases for the analysis.
63
+ If you only have one case, you can specify the parameters directly to
64
+ `envs.ntimes`, `envs.subset_by`, `envs.group_by`, `envs.group1`,
65
+ `envs.group2`, and `envs.plots`. The name of the case will be
66
+ `envs.subset_by`.
67
+ If you have multiple cases, you can specify the parameters for each case
68
+ in a dictionary. The keys will be the names of the cases and the values
69
+ will be dictionaries with the parameters for each case, where the values
70
+ will be inherited from `envs.ntimes`, `envs.subset_by`, `envs.group_by`,
71
+ `envs.group1`, `envs.group2`, and `envs.plots`.
82
72
  """ # noqa: E501
73
+
83
74
  input = "sobjfile:file"
84
75
  output = "outdir:dir:{{in.sobjfile | stem}}.pathwayactivity"
85
76
  envs = {
86
77
  "ntimes": 5000,
87
78
  "ncores": config.misc.ncores,
88
- "heatmap_devpars": {},
89
- "violin_devpars": {},
90
79
  "gmtfile": None,
91
- "grouping": None,
92
- "grouping_prefix": "",
93
- "subsetting": None,
94
- "subsetting_prefix": "",
80
+ "subset_by": None,
81
+ "group_by": None,
82
+ "plots": {
83
+ "Pathway Activity (violin plot)": {
84
+ "plot_type": "violin",
85
+ "add_box": True,
86
+ "devpars": {"res": 100},
87
+ },
88
+ "Pathway Activity (heatmap)": {
89
+ "plot_type": "heatmap",
90
+ "devpars": {"res": 100},
91
+ },
92
+ },
93
+ "cases": {},
95
94
  }
96
95
  lang = config.lang.rscript
97
96
  script = (
98
- "file://../scripts/"
99
- "scrna_metabolic_landscape/MetabolicPathwayActivity.R"
97
+ "file://../scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R"
100
98
  )
101
99
  plugin_opts = {
102
- "report": (
103
- "file://../reports/"
104
- "scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
105
- )
100
+ "report":
101
+ "file://../reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
106
102
  }
107
103
 
108
104
 
@@ -113,11 +109,18 @@ class MetabolicFeatures(Proc):
113
109
  The enrichment analysis is done with [`fgsea`](https://bioconductor.org/packages/release/bioc/html/fgsea.html)
114
110
  package or the [`GSEA_R`](https://github.com/GSEA-MSigDB/GSEA_R) package.
115
111
 
112
+ Input:
113
+ sobjfile: The Seurat object file in rds.
114
+ It should be loaded as a Seurat object
115
+
116
+ Output:
117
+ outdir: The output directory.
118
+ It will contain the GSEA results and plots.
119
+
116
120
  Envs:
117
- ncores (type=int;pgarg): Number of cores to use for parallelization.
118
- Defaults to `ScrnaMetabolicLandscape.ncores`
119
- fgsea (flag): Whether to do fast gsea analysis using `fgsea` package.
120
- If `False`, the `GSEA_R` package will be used.
121
+ ncores (type=int;pgarg): Number of cores to use for parallelization for
122
+ the comparisons for each subset and group.
123
+ Defaults to `ScrnaMetabolicLandscape.ncores`.
121
124
  prerank_method (choice): Method to use for gene preranking.
122
125
  Signal to noise: the larger the differences of the means
123
126
  (scaled by the standard deviations); that is, the more distinct
@@ -143,142 +146,81 @@ class MetabolicFeatures(Proc):
143
146
  - ratio_of_classes: Also referred to as fold change
144
147
  - diff_of_classes: Difference of class means
145
148
  - log2_ratio_of_classes: Log2 ratio of class means
146
- top (type=int): N top of enriched pathways to show
147
149
  gmtfile (pgarg): The GMT file with the metabolic pathways.
148
150
  Defaults to `ScrnaMetabolicLandscape.gmtfile`
149
- grouping (type=auto;pgarg;readonly): Defines the basic groups to
150
- investigate the metabolic activity.
151
- Defaults to `ScrnaMetabolicLandscape.grouping`
152
- grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to
153
- group names.
154
- Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
155
- subsetting (type=auto;pgarg;readonly): How do we subset the data.
156
- Another column(s) in the metadata.
157
- Defaults to `ScrnaMetabolicLandscape.subsetting`
158
- subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
159
- subset names.
160
- Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
161
-
162
- Requires:
163
- r-parallel:
164
- - check: {{proc.lang}} <(echo "library(parallel)")
165
- r-fgsea:
166
- - check: {{proc.lang}} <(echo "library(fgsea)")
151
+ subset_by (pgarg;readonly): Subset the data by the given column in the
152
+ metadata. For example, `Response`.
153
+ `NA` values will be removed in this column.
154
+ Defaults to `ScrnaMetabolicLandscape.subset_by`
155
+ If None, the data will not be subsetted.
156
+ group_by (pgarg;readonly): Group the data by the given column in the
157
+ metadata. For example, `cluster`.
158
+ Defaults to `ScrnaMetabolicLandscape.group_by`
159
+ comparisons (type=list): The comparison groups to use for the analysis.
160
+ If not provided, each group in the `group_by` column will be used
161
+ to compare with the other groups.
162
+ If a single group is provided as an element, it will be used to
163
+ compare with all the other groups.
164
+ For example, if we have `group_by = "cluster"` and we have
165
+ `1`, `2` and `3` in the `group_by` column, we could have
166
+ `comparisons = ["1", "2"]`, which will compare the group `1` with groups
167
+ `2` and `3`, and the group `2` with groups `1` and `3`. We could also
168
+ have `comparisons = ["1,2", "1,3"]`, which will compare the group `1` with
169
+ group `2` and group `1` with group `3`.
170
+ fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
171
+ For example, `{"minSize": 15, "maxSize": 500}`.
172
+ See <https://rdrr.io/bioc/fgsea/man/fgsea.html> for more details.
173
+ plots (type=json): The plots to generate.
174
+ Names will be used as the title for the plot. Values will be the arguments
175
+ passed to `biopipen.utils::VizGSEA()` function.
176
+ See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
177
+ A key `level` is supported to specify the level of the plot.
178
+ Possible values are `case`, which includes all subsets and groups in the
179
+ case; `subset`, which includes all groups in the subset; otherwise, it
180
+ will plot for the groups.
181
+ For `case`/`subset` level plots, current `plot_type` only "dot" is supported
182
+ for now, then the values will be passed to `plotthis::DotPlot()`
183
+ cases (type=json): Multiple cases for the analysis.
184
+ If you only have one case, you can specify the parameters directly to
185
+ `envs.prerank_method`, `envs.subset_by`, `envs.group_by`,
186
+ `envs.comparisons`, `envs.fgsea_args` and `envs.plots`.
187
+ The name of this default case will be `envs.subset_by`.
188
+ If you have multiple cases, you can specify the parameters for each case
189
+ in a dictionary. The keys will be the names of the cases and the values
190
+ will be dictionaries with the parameters for each case, where the values
191
+ will be inherited from `envs.prerank_method`,
192
+ `envs.subset_by`, `envs.group_by`, `envs.comparisons`, `envs.fgsea_args`
193
+ and `envs.plots`.
167
194
  """ # noqa: E501
195
+
168
196
  input = "sobjfile:file"
169
197
  output = "outdir:dir:{{in.sobjfile | stem}}.pathwayfeatures"
170
198
  lang = config.lang.rscript
171
199
  envs = {
172
200
  "ncores": config.misc.ncores,
173
- "fgsea": True,
174
201
  "prerank_method": "signal_to_noise",
175
- "top": 10,
176
202
  "gmtfile": None,
177
- "grouping": None,
178
- "grouping_prefix": "",
179
- "subsetting": None,
180
- "subsetting_prefix": "",
181
- }
182
- script = (
183
- "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
184
- )
185
- plugin_opts = {
186
- "report": (
187
- "file://../reports/"
188
- "scrna_metabolic_landscape/MetabolicFeatures.svelte"
189
- )
190
- }
191
-
192
-
193
- class MetabolicFeaturesIntraSubset(Proc):
194
- """Intra-subset metabolic features - Enrichment analysis in details
195
-
196
- Similar to the [`MetabolicFeatures`](!!#biopipennsscrna_metabolic_landscapemetabolicfeatures)
197
- process, this process performs enrichment analysis for the metabolic pathways for
198
- each subset in each group, instead of each group in each subset.
199
-
200
- Envs:
201
- ncores (type=int; pgarg): Number of cores to use for parallelization
202
- Defaults to `ScrnaMetabolicLandscape.ncores`
203
- fgsea (flag): Whether to do fast gsea analysis
204
- prerank_method (choice): Method to use for gene preranking
205
- Signal to noise: the larger the differences of the means
206
- (scaled by the standard deviations); that is, the more distinct
207
- the gene expression is in each phenotype and the more the gene
208
- acts as a “class marker.”.
209
- Absolute signal to noise: the absolute value of the signal to
210
- noise.
211
- T test: Uses the difference of means scaled by the standard
212
- deviation and number of samples.
213
- Ratio of classes: Uses the ratio of class means to calculate
214
- fold change for natural scale data.
215
- Diff of classes: Uses the difference of class means to calculate
216
- fold change for nature scale data
217
- Log2 ratio of classes: Uses the log2 ratio of class means to
218
- calculate fold change for natural scale data. This is the
219
- recommended statistic for calculating fold change for log scale
220
- data.
221
- - signal_to_noise: Signal to noise
222
- - s2n: Alias of signal_to_noise
223
- - abs_signal_to_noise: absolute signal to noise
224
- - abs_s2n: Alias of abs_signal_to_noise
225
- - t_test: T test
226
- - ratio_of_classes: Also referred to as fold change
227
- - diff_of_classes: Difference of class means
228
- - log2_ratio_of_classes: Log2 ratio of class means
229
- top (type=int): N top of enriched pathways to show
230
- gmtfile (pgarg): The GMT file with the metabolic pathways.
231
- Defaults to `ScrnaMetabolicLandscape.gmtfile`
232
- grouping (type=auto;pgarg;readonly): Defines the basic groups to
233
- investigate the metabolic activity.
234
- Defaults to `ScrnaMetabolicLandscape.grouping`
235
- grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
236
- names.
237
- Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
238
- subsetting (type=auto;pgarg;readonly): How do we subset the data.
239
- Another column(s) in the metadata.
240
- Defaults to `ScrnaMetabolicLandscape.subsetting`
241
- subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
242
- subset names.
243
- Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
244
- subsetting_comparison (type=json;pgarg;readonly): How do we compare the
245
- subsets.
246
- Defaults to `ScrnaMetabolicLandscape.subsetting_comparison`
247
-
248
- Requires:
249
- r-parallel:
250
- - check: {{proc.lang}} <(echo "library(parallel)")
251
- r-scater:
252
- - check: {{proc.lang}} <(echo "library(scater)")
253
- r-fgsea:
254
- - check: {{proc.lang}} <(echo "library(fgsea)")
255
- """ # noqa: E501
256
- input = "sobjfile:file"
257
- output = (
258
- "outdir:dir:{{in.sobjfile | stem}}.intra-subset-pathwayfeatures"
259
- )
260
- lang = config.lang.rscript
261
- envs = {
262
- "ncores": config.misc.ncores,
263
- "gmtfile": None,
264
- "fgsea": True,
265
- "prerank_method": "signal_to_noise",
266
- "top": 10,
267
- "grouping": None,
268
- "grouping_prefix": "",
269
- "subsetting": None,
270
- "subsetting_prefix": "",
271
- "subsetting_comparison": {},
203
+ "subset_by": None,
204
+ "group_by": None,
205
+ "comparisons": [],
206
+ "fgsea_args": {},
207
+ "plots": {
208
+ "Summary Plot": {
209
+ "plot_type": "summary",
210
+ "top_term": 10,
211
+ "devpars": {"res": 100},
212
+ },
213
+ "Enrichment Plots": {
214
+ "plot_type": "gsea",
215
+ "top_term": 10,
216
+ "devpars": {"res": 100},
217
+ },
218
+ },
219
+ "cases": {},
272
220
  }
273
- script = (
274
- "file://../scripts/scrna_metabolic_landscape/"
275
- "MetabolicFeaturesIntraSubset.R"
276
- )
221
+ script = "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
277
222
  plugin_opts = {
278
- "report": (
279
- "file://../reports/scrna_metabolic_landscape/"
280
- "MetabolicFeaturesIntraSubset.svelte"
281
- )
223
+ "report": "file://../reports/scrna_metabolic_landscape/MetabolicFeatures.svelte"
282
224
  }
283
225
 
284
226
 
@@ -296,7 +238,6 @@ class MetabolicPathwayHeterogeneity(Proc):
296
238
 
297
239
  ![MetabolicPathwayHeterogeneity](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayHeterogeneity.png)
298
240
 
299
-
300
241
  Envs:
301
242
  gmtfile (pgarg): The GMT file with the metabolic pathways.
302
243
  Defaults to `ScrnaMetabolicLandscape.gmtfile`
@@ -305,43 +246,33 @@ class MetabolicPathwayHeterogeneity(Proc):
305
246
  the enriched pathways
306
247
  ncores (type=int;pgarg): Number of cores to use for parallelization
307
248
  Defaults to `ScrnaMetabolicLandscape.ncores`
308
- bubble_devpars (ns): The devpars for the bubble plot
309
- - width (type=int): The width of the plot
310
- - height (type=int): The height of the plot
311
- - res (type=int): The resolution of the plot
312
- grouping (type=auto;pgarg;readonly): Defines the basic groups to
313
- investigate the metabolic activity.
314
- Defaults to `ScrnaMetabolicLandscape.grouping`
315
- grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
316
- names.
317
- Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
318
- subsetting (type=auto;pgarg;readonly): How do we subset the data.
319
- Another column(s) in the metadata.
320
- Defaults to `ScrnaMetabolicLandscape.subsetting`
321
- subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
322
- subset names.
323
- Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
324
-
325
- Requires:
326
- r-gtools:
327
- - check: {{proc.lang}} <(echo "library(gtools)")
328
- r-ggplot2:
329
- - check: {{proc.lang}} <(echo "library(ggplot2)")
330
- r-ggprism:
331
- - check: {{proc.lang}} <(echo "library(ggprism)")
332
- r-parallel:
333
- - check: {{proc.lang}} <(echo "library(parallel)")
334
- r-dplyr:
335
- - check: {{proc.lang}} <(echo "library(dplyr)")
336
- r-tibble:
337
- - check: {{proc.lang}} <(echo "library(tibble)")
338
- r-enrichr:
339
- - check: {{proc.lang}} <(echo "library(enrichR)")
340
- r-data.table:
341
- - check: {{proc.lang}} <(echo "library(data.table)")
342
- r-fgsea:
343
- - check: {{proc.lang}} <(echo "library(fgsea)")
249
+ subset_by (pgarg;readonly): Subset the data by the given column in the
250
+ metadata. For example, `Response`.
251
+ `NA` values will be removed in this column.
252
+ Defaults to `ScrnaMetabolicLandscape.subset_by`
253
+ If None, the data will not be subsetted.
254
+ group_by (pgarg;readonly): Group the data by the given column in the
255
+ metadata. For example, `cluster`.
256
+ Defaults to `ScrnaMetabolicLandscape.group_by`
257
+ fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
258
+ For example, `{"minSize": 15, "maxSize": 500}`.
259
+ See <https://rdrr.io/bioc/fgsea/man/fgsea.html> for more details.
260
+ plots (type=json): The plots to generate.
261
+ Names will be used as the title for the plot. Values will be the arguments
262
+ passed to `biopipen.utils::VizGSEA()` function.
263
+ See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
264
+ cases (type=json): Multiple cases for the analysis.
265
+ If you only have one case, you can specify the parameters directly to
266
+ `envs.subset_by`, `envs.group_by`, `envs.fgsea_args`, `envs.plots`,
267
+ `envs.select_pcs`, and `envs.pathway_pval_cutoff`.
268
+ The name of this default case will be `envs.subset_by`.
269
+ If you have multiple cases, you can specify the parameters for each case
270
+ in a dictionary. The keys will be the names of the cases and the values
271
+ will be dictionaries with the parameters for each case, where the values
272
+ will be inherited from `envs.subset_by`, `envs.group_by`, `envs.fgsea_args`,
273
+ `envs.plots`, `envs.select_pcs`, and `envs.pathway_pval_cutoff`.
344
274
  """ # noqa: E501
275
+
345
276
  input = "sobjfile:file"
346
277
  output = "outdir:dir:{{in.sobjfile | stem}}.pathwayhetero"
347
278
  lang = config.lang.rscript
@@ -350,11 +281,16 @@ class MetabolicPathwayHeterogeneity(Proc):
350
281
  "select_pcs": 0.8,
351
282
  "pathway_pval_cutoff": 0.01,
352
283
  "ncores": config.misc.ncores,
353
- "bubble_devpars": {},
354
- "grouping": None,
355
- "grouping_prefix": "",
356
- "subsetting": None,
357
- "subsetting_prefix": "",
284
+ "subset_by": None,
285
+ "group_by": None,
286
+ "fgsea_args": {"scoreType": "std", "nproc": 1},
287
+ "plots": {
288
+ "Pathway Heterogeneity": {
289
+ "plot_type": "dot",
290
+ "devpars": {"res": 100},
291
+ },
292
+ },
293
+ "cases": {},
358
294
  }
359
295
  script = (
360
296
  "file://../scripts/scrna_metabolic_landscape/"
@@ -399,49 +335,19 @@ class ScrnaMetabolicLandscape(ProcGroup):
399
335
  dependent on other processes, this option will be used to determine
400
336
  whether the input is a seurat object or not.
401
337
  noimpute (flag): Whether to do imputation for the dropouts.
402
- If False, the values will be left as is.
338
+ If True, the values will be left as is.
403
339
  gmtfile: The GMT file with the metabolic pathways. The gene names should
404
340
  match the gene names in the gene list in RNAData or
405
341
  the Seurat object.
406
342
  You can also provide a URL to the GMT file.
407
343
  For example, from
408
344
  <https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/>.
409
- grouping: defines the basic groups to investigate the metabolic activity
410
- Typically the clusters.
411
- grouping_prefix: Working as a prefix to group names
412
- For example, if we have `grouping_prefix = "cluster"` and
413
- we have `1` and `2` in the `grouping` column, the groups
414
- will be named as `cluster_1` and `cluster_2`
415
- subsetting (type=auto): How do we subset the data. Other columns in the
416
- metadata to do comparisons. For example, `"TimePoint"` or
417
- `["TimePoint", "Response"]`
418
- subsetting_prefix (type=auto): Working as a prefix to subset names
419
- For example, if we have `subsetting_prefix = "timepoint"` and
420
- we have `pre` and `post` in the `subsetting` column, the subsets
421
- will be named as `timepoint_pre` and `timepoint_post`
422
- If `subsetting` is a list, then this should also be a same-length
423
- list. If a single string is given, it will be repeated to a list
424
- with the same length as `subsetting`
425
- subsetting_comparison (type=json): What kind of comparisons are we
426
- doing to compare cells from different subsets.
427
- It should be dict with keys as the names of the comparisons and
428
- values as the 2 comparison groups from the `subsetting` column.
429
- For example, if we have `pre` and `post` in the `subsetting` column,
430
- we could have
431
- `subsetting_comparison = {"pre_vs_post": ["post", "pre"]}`
432
- The second group will be the control group in the comparison.
433
- If we also have `1`, `2` and `3` in the `grouping` column,
434
- by default, the comparisons are done within each subset for
435
- each group. For example, for group `1`, groups `2` and `3`
436
- will be used as control, and for group `2`, groups `1` and `3`
437
- will be used as control, and for group `3`, groups `1` and `2`
438
- will be used as control. It is similar to `Seurat::FindMarkers`
439
- procedure. With this option, the comparisons are also done to
440
- compare cells from different subsets within each group. With the
441
- example above, we will have `pre_vs_post` comparisons within
442
- each group.
443
- If `subsetting` is a list, this must be a list of dicts with the
444
- same length.
345
+ subset_by (pgarg;readonly): Subset the data by the given column in the
346
+ metadata. For example, `Response`.
347
+ `NA` values will be removed in this column.
348
+ If None, the data will not be subsetted.
349
+ group_by (pgarg;readonly): Group the data by the given column in the
350
+ metadata. For example, `cluster`.
445
351
  mutaters (type=json): Add new columns to the metadata for
446
352
  grouping/subsetting.
447
353
  They are passed to `sobj@meta.data |> mutate(...)`. For example,
@@ -451,65 +357,25 @@ class ScrnaMetabolicLandscape(ProcGroup):
451
357
  ncores (type=int): Number of cores to use for parallelization for
452
358
  each process
453
359
  """
360
+
454
361
  DEFAULTS = Diot(
455
362
  metafile=None,
456
363
  is_seurat=None,
457
364
  gmtfile=None,
458
- grouping=None,
459
- grouping_prefix="",
460
- subsetting=None,
461
- subsetting_prefix=None,
462
- subsetting_comparison={},
463
365
  mutaters=None,
464
- noimpute=False,
366
+ noimpute=True,
465
367
  ncores=config.misc.ncores,
368
+ subset_by=None,
369
+ group_by=None,
466
370
  )
467
371
 
468
372
  def post_init(self):
469
373
  """Load runtime processes"""
470
374
  if self.opts.metafile:
471
375
  suffix = Path(self.opts.metafile).suffix
472
- self.opts.is_seurat = suffix in (".rds", ".RDS")
473
-
474
- # Make sure the grouping is a list
475
- if self.opts.subsetting and not isinstance(self.opts.subsetting, list):
476
- self.opts.subsetting = [self.opts.subsetting]
477
-
478
- # Make sure the grouping is a list with the same length as subsetting
479
- if (
480
- self.opts.subsetting
481
- and not isinstance(self.opts.subsetting_prefix, list)
482
- ):
483
- self.opts.subsetting_prefix = [
484
- self.opts.subsetting_prefix
485
- ] * len(self.opts.subsetting)
486
-
487
- # Make sure the lengths of subsetting and subsetting_comparison the same
488
- if self.opts.subsetting:
489
- if len(self.opts.subsetting) == 1 and isinstance(
490
- self.opts.subsetting_comparison, dict
491
- ):
492
- self.opts.subsetting_comparison = [
493
- self.opts.subsetting_comparison
494
- ]
495
-
496
- if len(self.opts.subsetting) > 1 and not isinstance(
497
- self.opts.subsetting_comparison, list
498
- ):
499
- raise ValueError(
500
- "The length of `subsetting` is larger than 1, "
501
- "but `subsetting_comparison` is not a list of dicts."
502
- )
503
-
504
- if len(self.opts.subsetting) != len(
505
- self.opts.subsetting_comparison
506
- ):
507
- raise ValueError(
508
- "The length of `subsetting` and `subsetting_comparison` "
509
- "are not the same"
510
- )
511
-
512
- @ProcGroup.add_proc
376
+ self.opts.is_seurat = suffix in (".rds", ".RDS", ".qs", ".qs2")
377
+
378
+ @ProcGroup.add_proc # type: ignore
513
379
  def p_input(self) -> Type[Proc]:
514
380
  """Build MetabolicInputs process"""
515
381
  from .misc import File2Proc
@@ -527,8 +393,8 @@ class ScrnaMetabolicLandscape(ProcGroup):
527
393
 
528
394
  return MetabolicInput
529
395
 
530
- @ProcGroup.add_proc
531
- def p_preparing(self) -> Type[Proc]:
396
+ @ProcGroup.add_proc # type: ignore
397
+ def p_preparing(self) -> Type[Proc] | None:
532
398
  """Build SeuratPreparing process"""
533
399
  if self.opts.is_seurat:
534
400
  return None
@@ -540,11 +406,11 @@ class ScrnaMetabolicLandscape(ProcGroup):
540
406
 
541
407
  return MetabolicSeuratPreparing
542
408
 
543
- @ProcGroup.add_proc
409
+ @ProcGroup.add_proc # type: ignore
544
410
  def p_clustering(self) -> Type[Proc]:
545
411
  """Build SeuratClustering process"""
546
412
  if self.opts.is_seurat:
547
- return self.p_input
413
+ return self.p_input # type: ignore
548
414
 
549
415
  from .scrna import SeuratClustering
550
416
 
@@ -553,11 +419,11 @@ class ScrnaMetabolicLandscape(ProcGroup):
553
419
 
554
420
  return MetabolicSeuratClustering
555
421
 
556
- @ProcGroup.add_proc
422
+ @ProcGroup.add_proc # type: ignore
557
423
  def p_mutater(self) -> Type[Proc]:
558
424
  """Build SeuratMetadataMutater process"""
559
425
  if not self.opts.mutaters:
560
- return self.p_clustering
426
+ return self.p_clustering # type: ignore
561
427
 
562
428
  from .scrna import SeuratMetadataMutater
563
429
 
@@ -571,97 +437,72 @@ class ScrnaMetabolicLandscape(ProcGroup):
571
437
 
572
438
  return MetabolicSeuratMetadataMutater
573
439
 
574
- @ProcGroup.add_proc
440
+ @ProcGroup.add_proc # type: ignore
575
441
  def p_expr_impute(self) -> Type[Proc]:
576
442
  """Build process"""
577
443
  if self.opts.noimpute:
578
- return self.p_mutater
444
+ return self.p_mutater # type: ignore
579
445
 
580
446
  from .scrna import ExprImputation
581
447
 
582
- @annotate.format_doc(indent=3)
448
+ @annotate.format_doc(indent=3) # type: ignore
583
449
  class MetabolicExprImputation(ExprImputation):
584
450
  """{{Summary}}
585
451
 
586
452
  You can turn off the imputation by setting the `noimpute` option
587
453
  of the process group to `True`.
588
454
  """
455
+
589
456
  requires = self.p_mutater
590
457
 
591
458
  return MetabolicExprImputation
592
459
 
593
- @ProcGroup.add_proc
460
+ @ProcGroup.add_proc # type: ignore
594
461
  def p_pathway_activity(self) -> Type[Proc]:
595
462
  """Build MetabolicPathwayActivity process"""
596
- return Proc.from_proc(
463
+ return Proc.from_proc( # type: ignore
597
464
  MetabolicPathwayActivity,
598
465
  "MetabolicPathwayActivity",
599
- requires=self.p_expr_impute,
466
+ requires=self.p_expr_impute, # type: ignore
600
467
  order=-1,
468
+ envs_depth=5,
601
469
  envs={
602
470
  "ncores": self.opts.ncores,
603
471
  "gmtfile": self.opts.gmtfile,
604
- "grouping": self.opts.grouping,
605
- "grouping_prefix": self.opts.grouping_prefix,
606
- "subsetting": self.opts.subsetting,
607
- "subsetting_prefix": self.opts.subsetting_prefix,
472
+ "group_by": self.opts.group_by,
473
+ "subset_by": self.opts.subset_by,
608
474
  },
609
475
  )
610
476
 
611
- @ProcGroup.add_proc
477
+ @ProcGroup.add_proc # type: ignore
612
478
  def p_pathway_heterogeneity(self) -> Type[Proc]:
613
479
  """Build MetabolicPathwayHeterogeneity process"""
614
- return Proc.from_proc(
480
+ return Proc.from_proc( # type: ignore
615
481
  MetabolicPathwayHeterogeneity,
616
482
  "MetabolicPathwayHeterogeneity",
617
- requires=self.p_expr_impute,
483
+ requires=self.p_mutater, # type: ignore
484
+ envs_depth=5,
618
485
  envs={
619
486
  "ncores": self.opts.ncores,
620
487
  "gmtfile": self.opts.gmtfile,
621
- "grouping": self.opts.grouping,
622
- "grouping_prefix": self.opts.grouping_prefix,
623
- "subsetting": self.opts.subsetting,
624
- "subsetting_prefix": self.opts.subsetting_prefix,
488
+ "group_by": self.opts.group_by,
489
+ "subset_by": self.opts.subset_by,
625
490
  },
626
491
  )
627
492
 
628
- @ProcGroup.add_proc
493
+ @ProcGroup.add_proc # type: ignore
629
494
  def p_features(self) -> Type[Proc]:
630
495
  """Build MetabolicFeatures process"""
631
- return Proc.from_proc(
496
+ return Proc.from_proc( # type: ignore
632
497
  MetabolicFeatures,
633
498
  "MetabolicFeatures",
634
- requires=self.p_expr_impute,
635
- envs={
636
- "ncores": self.opts.ncores,
637
- "gmtfile": self.opts.gmtfile,
638
- "grouping": self.opts.grouping,
639
- "grouping_prefix": self.opts.grouping_prefix,
640
- "subsetting": self.opts.subsetting,
641
- "subsetting_prefix": self.opts.subsetting_prefix,
642
- },
643
- )
644
-
645
- @ProcGroup.add_proc
646
- def p_features_intra_subset(self) -> Type[Proc]:
647
- """Build MetabolicFeaturesIntraSubset process"""
648
- if self.opts.subsetting_comparison and not self.opts.subsetting:
649
- raise ValueError(
650
- "Cannot use `subsetting_comparison` without `subsetting`."
651
- )
652
-
653
- return Proc.from_proc(
654
- MetabolicFeaturesIntraSubset,
655
- "MetabolicFeaturesIntraSubset",
656
- requires=self.p_expr_impute,
499
+ requires=self.p_expr_impute, # type: ignore
500
+ envs_depth=5,
657
501
  envs={
658
502
  "ncores": self.opts.ncores,
659
503
  "gmtfile": self.opts.gmtfile,
660
- "grouping": self.opts.grouping,
661
- "grouping_prefix": self.opts.grouping_prefix,
662
- "subsetting": self.opts.subsetting,
663
- "subsetting_prefix": self.opts.subsetting_prefix,
664
- "subsetting_comparison": self.opts.subsetting_comparison,
504
+ "group_by": self.opts.group_by,
505
+ "subset_by": self.opts.subset_by,
665
506
  },
666
507
  )
667
508