biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +290 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
"""Metabolic landscape analysis for scRNA-seq data"""
|
|
2
|
+
|
|
2
3
|
from __future__ import annotations
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Type
|
|
5
6
|
|
|
6
|
-
from diot import Diot
|
|
7
|
+
from diot import Diot # type: ignore
|
|
7
8
|
from datar.tibble import tibble
|
|
8
9
|
from pipen.utils import mark
|
|
9
10
|
from pipen_args import ProcGroup
|
|
@@ -28,81 +29,76 @@ class MetabolicPathwayActivity(Proc):
|
|
|
28
29
|
|
|
29
30
|
{: width="45%"}
|
|
30
31
|
|
|
32
|
+
Input:
|
|
33
|
+
sobjfile: The Seurat object file.
|
|
34
|
+
It should be loaded as a Seurat object
|
|
35
|
+
|
|
36
|
+
Output:
|
|
37
|
+
outdir: The output directory.
|
|
38
|
+
It will contain the pathway activity score files and plots.
|
|
39
|
+
|
|
31
40
|
Envs:
|
|
32
|
-
ntimes (type=int): Number of
|
|
41
|
+
ntimes (type=int): Number of permutations to estimate the p-values
|
|
33
42
|
ncores (type=int;pgarg): Number of cores to use for parallelization
|
|
34
43
|
Defaults to `ScrnaMetabolicLandscape.ncores`
|
|
35
|
-
heatmap_devpars (ns): Device parameters for the heatmap
|
|
36
|
-
- width (type=int): Width of the heatmap
|
|
37
|
-
- height (type=int): Height of the heatmap
|
|
38
|
-
- res (type=int): Resolution of the heatmap
|
|
39
|
-
violin_devpars (ns): Device parameters for the violin plot
|
|
40
|
-
- width (type=int): Width of the violin plot
|
|
41
|
-
- height (type=int): Height of the violin plot
|
|
42
|
-
- res (type=int): Resolution of the violin plot
|
|
43
44
|
gmtfile (pgarg): The GMT file with the metabolic pathways.
|
|
44
45
|
Defaults to `ScrnaMetabolicLandscape.gmtfile`
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
Defaults to `ScrnaMetabolicLandscape.
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
If
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
- check: {{proc.lang}} <(echo "library(reshape2)")
|
|
72
|
-
r-rcolorbrewer:
|
|
73
|
-
- check: {{proc.lang}} <(echo "library(RColorBrewer)")
|
|
74
|
-
r-ggplot2:
|
|
75
|
-
- check: {{proc.lang}} <(echo "library(ggplot2)")
|
|
76
|
-
r-ggprism:
|
|
77
|
-
- check: {{proc.lang}} <(echo "library(ggprism)")
|
|
78
|
-
r-complexheatmap:
|
|
79
|
-
- check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
|
|
80
|
-
r-parallel:
|
|
81
|
-
- check: {{proc.lang}} <(echo "library(parallel)")
|
|
46
|
+
subset_by (pgarg;readonly): Subset the data by the given column in the
|
|
47
|
+
metadata. For example, `Response`.
|
|
48
|
+
`NA` values will be removed in this column.
|
|
49
|
+
Defaults to `ScrnaMetabolicLandscape.subset_by`
|
|
50
|
+
If None, the data will not be subsetted.
|
|
51
|
+
group_by (pgarg;readonly): Group the data by the given column in the
|
|
52
|
+
metadata. For example, `cluster`.
|
|
53
|
+
Defaults to `ScrnaMetabolicLandscape.group_by`
|
|
54
|
+
plots (type=json): The plots to generate.
|
|
55
|
+
Names will be used as the prefix for the output files. Values will be
|
|
56
|
+
a dictionary with the following keys:
|
|
57
|
+
* `plot_type` is the type of plot to generate. One of `heatmap`,
|
|
58
|
+
`box`, `violin` or `merged_heatmap` (all subsets in one plot).
|
|
59
|
+
* `devpars` is a dictionary with the device parameters for the plot.
|
|
60
|
+
* Other arguments for `plotthis::Heatmap()`, `plotthis::BoxPlot()`
|
|
61
|
+
or `plotthis::ViolinPlot()`, depending on the `plot_type`.
|
|
62
|
+
cases (type=json): Multiple cases for the analysis.
|
|
63
|
+
If you only have one case, you can specify the parameters directly to
|
|
64
|
+
`envs.ntimes`, `envs.subset_by`, `envs.group_by`, `envs.group1`,
|
|
65
|
+
`envs.group2`, and `envs.plots`. The name of the case will be
|
|
66
|
+
`envs.subset_by`.
|
|
67
|
+
If you have multiple cases, you can specify the parameters for each case
|
|
68
|
+
in a dictionary. The keys will be the names of the cases and the values
|
|
69
|
+
will be dictionaries with the parameters for each case, where the values
|
|
70
|
+
will be inherited from `envs.ntimes`, `envs.subset_by`, `envs.group_by`,
|
|
71
|
+
`envs.group1`, `envs.group2`, and `envs.plots`.
|
|
82
72
|
""" # noqa: E501
|
|
73
|
+
|
|
83
74
|
input = "sobjfile:file"
|
|
84
75
|
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayactivity"
|
|
85
76
|
envs = {
|
|
86
77
|
"ntimes": 5000,
|
|
87
78
|
"ncores": config.misc.ncores,
|
|
88
|
-
"heatmap_devpars": {},
|
|
89
|
-
"violin_devpars": {},
|
|
90
79
|
"gmtfile": None,
|
|
91
|
-
"
|
|
92
|
-
"
|
|
93
|
-
"
|
|
94
|
-
|
|
80
|
+
"subset_by": None,
|
|
81
|
+
"group_by": None,
|
|
82
|
+
"plots": {
|
|
83
|
+
"Pathway Activity (violin plot)": {
|
|
84
|
+
"plot_type": "violin",
|
|
85
|
+
"add_box": True,
|
|
86
|
+
"devpars": {"res": 100},
|
|
87
|
+
},
|
|
88
|
+
"Pathway Activity (heatmap)": {
|
|
89
|
+
"plot_type": "heatmap",
|
|
90
|
+
"devpars": {"res": 100},
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
"cases": {},
|
|
95
94
|
}
|
|
96
95
|
lang = config.lang.rscript
|
|
97
96
|
script = (
|
|
98
|
-
"file://../scripts/"
|
|
99
|
-
"scrna_metabolic_landscape/MetabolicPathwayActivity.R"
|
|
97
|
+
"file://../scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R"
|
|
100
98
|
)
|
|
101
99
|
plugin_opts = {
|
|
102
|
-
"report":
|
|
103
|
-
|
|
104
|
-
"scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
|
|
105
|
-
)
|
|
100
|
+
"report":
|
|
101
|
+
"file://../reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
|
|
106
102
|
}
|
|
107
103
|
|
|
108
104
|
|
|
@@ -113,11 +109,18 @@ class MetabolicFeatures(Proc):
|
|
|
113
109
|
The enrichment analysis is done with [`fgsea`](https://bioconductor.org/packages/release/bioc/html/fgsea.html)
|
|
114
110
|
package or the [`GSEA_R`](https://github.com/GSEA-MSigDB/GSEA_R) package.
|
|
115
111
|
|
|
112
|
+
Input:
|
|
113
|
+
sobjfile: The Seurat object file in rds.
|
|
114
|
+
It should be loaded as a Seurat object
|
|
115
|
+
|
|
116
|
+
Output:
|
|
117
|
+
outdir: The output directory.
|
|
118
|
+
It will contain the GSEA results and plots.
|
|
119
|
+
|
|
116
120
|
Envs:
|
|
117
|
-
ncores (type=int;pgarg): Number of cores to use for parallelization
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
If `False`, the `GSEA_R` package will be used.
|
|
121
|
+
ncores (type=int;pgarg): Number of cores to use for parallelization for
|
|
122
|
+
the comparisons for each subset and group.
|
|
123
|
+
Defaults to `ScrnaMetabolicLandscape.ncores`.
|
|
121
124
|
prerank_method (choice): Method to use for gene preranking.
|
|
122
125
|
Signal to noise: the larger the differences of the means
|
|
123
126
|
(scaled by the standard deviations); that is, the more distinct
|
|
@@ -143,142 +146,81 @@ class MetabolicFeatures(Proc):
|
|
|
143
146
|
- ratio_of_classes: Also referred to as fold change
|
|
144
147
|
- diff_of_classes: Difference of class means
|
|
145
148
|
- log2_ratio_of_classes: Log2 ratio of class means
|
|
146
|
-
top (type=int): N top of enriched pathways to show
|
|
147
149
|
gmtfile (pgarg): The GMT file with the metabolic pathways.
|
|
148
150
|
Defaults to `ScrnaMetabolicLandscape.gmtfile`
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
151
|
+
subset_by (pgarg;readonly): Subset the data by the given column in the
|
|
152
|
+
metadata. For example, `Response`.
|
|
153
|
+
`NA` values will be removed in this column.
|
|
154
|
+
Defaults to `ScrnaMetabolicLandscape.subset_by`
|
|
155
|
+
If None, the data will not be subsetted.
|
|
156
|
+
group_by (pgarg;readonly): Group the data by the given column in the
|
|
157
|
+
metadata. For example, `cluster`.
|
|
158
|
+
Defaults to `ScrnaMetabolicLandscape.group_by`
|
|
159
|
+
comparisons (type=list): The comparison groups to use for the analysis.
|
|
160
|
+
If not provided, each group in the `group_by` column will be used
|
|
161
|
+
to compare with the other groups.
|
|
162
|
+
If a single group is provided as an element, it will be used to
|
|
163
|
+
compare with all the other groups.
|
|
164
|
+
For example, if we have `group_by = "cluster"` and we have
|
|
165
|
+
`1`, `2` and `3` in the `group_by` column, we could have
|
|
166
|
+
`comparisons = ["1", "2"]`, which will compare the group `1` with groups
|
|
167
|
+
`2` and `3`, and the group `2` with groups `1` and `3`. We could also
|
|
168
|
+
have `comparisons = ["1,2", "1,3"]`, which will compare the group `1` with
|
|
169
|
+
group `2` and group `1` with group `3`.
|
|
170
|
+
fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
|
|
171
|
+
For example, `{"minSize": 15, "maxSize": 500}`.
|
|
172
|
+
See <https://rdrr.io/bioc/fgsea/man/fgsea.html> for more details.
|
|
173
|
+
plots (type=json): The plots to generate.
|
|
174
|
+
Names will be used as the title for the plot. Values will be the arguments
|
|
175
|
+
passed to `biopipen.utils::VizGSEA()` function.
|
|
176
|
+
See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
|
|
177
|
+
A key `level` is supported to specify the level of the plot.
|
|
178
|
+
Possible values are `case`, which includes all subsets and groups in the
|
|
179
|
+
case; `subset`, which includes all groups in the subset; otherwise, it
|
|
180
|
+
will plot for the groups.
|
|
181
|
+
For `case`/`subset` level plots, current `plot_type` only "dot" is supported
|
|
182
|
+
for now, then the values will be passed to `plotthis::DotPlot()`
|
|
183
|
+
cases (type=json): Multiple cases for the analysis.
|
|
184
|
+
If you only have one case, you can specify the parameters directly to
|
|
185
|
+
`envs.prerank_method`, `envs.subset_by`, `envs.group_by`,
|
|
186
|
+
`envs.comparisons`, `envs.fgsea_args` and `envs.plots`.
|
|
187
|
+
The name of this default case will be `envs.subset_by`.
|
|
188
|
+
If you have multiple cases, you can specify the parameters for each case
|
|
189
|
+
in a dictionary. The keys will be the names of the cases and the values
|
|
190
|
+
will be dictionaries with the parameters for each case, where the values
|
|
191
|
+
will be inherited from `envs.prerank_method`,
|
|
192
|
+
`envs.subset_by`, `envs.group_by`, `envs.comparisons`, `envs.fgsea_args`
|
|
193
|
+
and `envs.plots`.
|
|
167
194
|
""" # noqa: E501
|
|
195
|
+
|
|
168
196
|
input = "sobjfile:file"
|
|
169
197
|
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayfeatures"
|
|
170
198
|
lang = config.lang.rscript
|
|
171
199
|
envs = {
|
|
172
200
|
"ncores": config.misc.ncores,
|
|
173
|
-
"fgsea": True,
|
|
174
201
|
"prerank_method": "signal_to_noise",
|
|
175
|
-
"top": 10,
|
|
176
202
|
"gmtfile": None,
|
|
177
|
-
"
|
|
178
|
-
"
|
|
179
|
-
"
|
|
180
|
-
"
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
"
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
"""Intra-subset metabolic features - Enrichment analysis in details
|
|
195
|
-
|
|
196
|
-
Similar to the [`MetabolicFeatures`](!!#biopipennsscrna_metabolic_landscapemetabolicfeatures)
|
|
197
|
-
process, this process performs enrichment analysis for the metabolic pathways for
|
|
198
|
-
each subset in each group, instead of each group in each subset.
|
|
199
|
-
|
|
200
|
-
Envs:
|
|
201
|
-
ncores (type=int; pgarg): Number of cores to use for parallelization
|
|
202
|
-
Defaults to `ScrnaMetabolicLandscape.ncores`
|
|
203
|
-
fgsea (flag): Whether to do fast gsea analysis
|
|
204
|
-
prerank_method (choice): Method to use for gene preranking
|
|
205
|
-
Signal to noise: the larger the differences of the means
|
|
206
|
-
(scaled by the standard deviations); that is, the more distinct
|
|
207
|
-
the gene expression is in each phenotype and the more the gene
|
|
208
|
-
acts as a “class marker.”.
|
|
209
|
-
Absolute signal to noise: the absolute value of the signal to
|
|
210
|
-
noise.
|
|
211
|
-
T test: Uses the difference of means scaled by the standard
|
|
212
|
-
deviation and number of samples.
|
|
213
|
-
Ratio of classes: Uses the ratio of class means to calculate
|
|
214
|
-
fold change for natural scale data.
|
|
215
|
-
Diff of classes: Uses the difference of class means to calculate
|
|
216
|
-
fold change for nature scale data
|
|
217
|
-
Log2 ratio of classes: Uses the log2 ratio of class means to
|
|
218
|
-
calculate fold change for natural scale data. This is the
|
|
219
|
-
recommended statistic for calculating fold change for log scale
|
|
220
|
-
data.
|
|
221
|
-
- signal_to_noise: Signal to noise
|
|
222
|
-
- s2n: Alias of signal_to_noise
|
|
223
|
-
- abs_signal_to_noise: absolute signal to noise
|
|
224
|
-
- abs_s2n: Alias of abs_signal_to_noise
|
|
225
|
-
- t_test: T test
|
|
226
|
-
- ratio_of_classes: Also referred to as fold change
|
|
227
|
-
- diff_of_classes: Difference of class means
|
|
228
|
-
- log2_ratio_of_classes: Log2 ratio of class means
|
|
229
|
-
top (type=int): N top of enriched pathways to show
|
|
230
|
-
gmtfile (pgarg): The GMT file with the metabolic pathways.
|
|
231
|
-
Defaults to `ScrnaMetabolicLandscape.gmtfile`
|
|
232
|
-
grouping (type=auto;pgarg;readonly): Defines the basic groups to
|
|
233
|
-
investigate the metabolic activity.
|
|
234
|
-
Defaults to `ScrnaMetabolicLandscape.grouping`
|
|
235
|
-
grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
|
|
236
|
-
names.
|
|
237
|
-
Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
|
|
238
|
-
subsetting (type=auto;pgarg;readonly): How do we subset the data.
|
|
239
|
-
Another column(s) in the metadata.
|
|
240
|
-
Defaults to `ScrnaMetabolicLandscape.subsetting`
|
|
241
|
-
subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
|
|
242
|
-
subset names.
|
|
243
|
-
Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
|
|
244
|
-
subsetting_comparison (type=json;pgarg;readonly): How do we compare the
|
|
245
|
-
subsets.
|
|
246
|
-
Defaults to `ScrnaMetabolicLandscape.subsetting_comparison`
|
|
247
|
-
|
|
248
|
-
Requires:
|
|
249
|
-
r-parallel:
|
|
250
|
-
- check: {{proc.lang}} <(echo "library(parallel)")
|
|
251
|
-
r-scater:
|
|
252
|
-
- check: {{proc.lang}} <(echo "library(scater)")
|
|
253
|
-
r-fgsea:
|
|
254
|
-
- check: {{proc.lang}} <(echo "library(fgsea)")
|
|
255
|
-
""" # noqa: E501
|
|
256
|
-
input = "sobjfile:file"
|
|
257
|
-
output = (
|
|
258
|
-
"outdir:dir:{{in.sobjfile | stem}}.intra-subset-pathwayfeatures"
|
|
259
|
-
)
|
|
260
|
-
lang = config.lang.rscript
|
|
261
|
-
envs = {
|
|
262
|
-
"ncores": config.misc.ncores,
|
|
263
|
-
"gmtfile": None,
|
|
264
|
-
"fgsea": True,
|
|
265
|
-
"prerank_method": "signal_to_noise",
|
|
266
|
-
"top": 10,
|
|
267
|
-
"grouping": None,
|
|
268
|
-
"grouping_prefix": "",
|
|
269
|
-
"subsetting": None,
|
|
270
|
-
"subsetting_prefix": "",
|
|
271
|
-
"subsetting_comparison": {},
|
|
203
|
+
"subset_by": None,
|
|
204
|
+
"group_by": None,
|
|
205
|
+
"comparisons": [],
|
|
206
|
+
"fgsea_args": {},
|
|
207
|
+
"plots": {
|
|
208
|
+
"Summary Plot": {
|
|
209
|
+
"plot_type": "summary",
|
|
210
|
+
"top_term": 10,
|
|
211
|
+
"devpars": {"res": 100},
|
|
212
|
+
},
|
|
213
|
+
"Enrichment Plots": {
|
|
214
|
+
"plot_type": "gsea",
|
|
215
|
+
"top_term": 10,
|
|
216
|
+
"devpars": {"res": 100},
|
|
217
|
+
},
|
|
218
|
+
},
|
|
219
|
+
"cases": {},
|
|
272
220
|
}
|
|
273
|
-
script =
|
|
274
|
-
"file://../scripts/scrna_metabolic_landscape/"
|
|
275
|
-
"MetabolicFeaturesIntraSubset.R"
|
|
276
|
-
)
|
|
221
|
+
script = "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
|
|
277
222
|
plugin_opts = {
|
|
278
|
-
"report":
|
|
279
|
-
"file://../reports/scrna_metabolic_landscape/"
|
|
280
|
-
"MetabolicFeaturesIntraSubset.svelte"
|
|
281
|
-
)
|
|
223
|
+
"report": "file://../reports/scrna_metabolic_landscape/MetabolicFeatures.svelte"
|
|
282
224
|
}
|
|
283
225
|
|
|
284
226
|
|
|
@@ -296,7 +238,6 @@ class MetabolicPathwayHeterogeneity(Proc):
|
|
|
296
238
|
|
|
297
239
|

|
|
298
240
|
|
|
299
|
-
|
|
300
241
|
Envs:
|
|
301
242
|
gmtfile (pgarg): The GMT file with the metabolic pathways.
|
|
302
243
|
Defaults to `ScrnaMetabolicLandscape.gmtfile`
|
|
@@ -305,43 +246,33 @@ class MetabolicPathwayHeterogeneity(Proc):
|
|
|
305
246
|
the enriched pathways
|
|
306
247
|
ncores (type=int;pgarg): Number of cores to use for parallelization
|
|
307
248
|
Defaults to `ScrnaMetabolicLandscape.ncores`
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
- check: {{proc.lang}} <(echo "library(parallel)")
|
|
334
|
-
r-dplyr:
|
|
335
|
-
- check: {{proc.lang}} <(echo "library(dplyr)")
|
|
336
|
-
r-tibble:
|
|
337
|
-
- check: {{proc.lang}} <(echo "library(tibble)")
|
|
338
|
-
r-enrichr:
|
|
339
|
-
- check: {{proc.lang}} <(echo "library(enrichR)")
|
|
340
|
-
r-data.table:
|
|
341
|
-
- check: {{proc.lang}} <(echo "library(data.table)")
|
|
342
|
-
r-fgsea:
|
|
343
|
-
- check: {{proc.lang}} <(echo "library(fgsea)")
|
|
249
|
+
subset_by (pgarg;readonly): Subset the data by the given column in the
|
|
250
|
+
metadata. For example, `Response`.
|
|
251
|
+
`NA` values will be removed in this column.
|
|
252
|
+
Defaults to `ScrnaMetabolicLandscape.subset_by`
|
|
253
|
+
If None, the data will not be subsetted.
|
|
254
|
+
group_by (pgarg;readonly): Group the data by the given column in the
|
|
255
|
+
metadata. For example, `cluster`.
|
|
256
|
+
Defaults to `ScrnaMetabolicLandscape.group_by`
|
|
257
|
+
fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
|
|
258
|
+
For example, `{"minSize": 15, "maxSize": 500}`.
|
|
259
|
+
See <https://rdrr.io/bioc/fgsea/man/fgsea.html> for more details.
|
|
260
|
+
plots (type=json): The plots to generate.
|
|
261
|
+
Names will be used as the title for the plot. Values will be the arguments
|
|
262
|
+
passed to `biopipen.utils::VizGSEA()` function.
|
|
263
|
+
See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
|
|
264
|
+
cases (type=json): Multiple cases for the analysis.
|
|
265
|
+
If you only have one case, you can specify the parameters directly to
|
|
266
|
+
`envs.subset_by`, `envs.group_by`, `envs.fgsea_args`, `envs.plots`,
|
|
267
|
+
`envs.select_pcs`, and `envs.pathway_pval_cutoff`.
|
|
268
|
+
The name of this default case will be `envs.subset_by`.
|
|
269
|
+
If you have multiple cases, you can specify the parameters for each case
|
|
270
|
+
in a dictionary. The keys will be the names of the cases and the values
|
|
271
|
+
will be dictionaries with the parameters for each case, where the values
|
|
272
|
+
will be inherited from `envs.subset_by`, `envs.group_by`, `envs.fgsea_args`,
|
|
273
|
+
`envs.plots`, `envs.select_pcs`, and `envs.pathway_pval_cutoff`.
|
|
344
274
|
""" # noqa: E501
|
|
275
|
+
|
|
345
276
|
input = "sobjfile:file"
|
|
346
277
|
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayhetero"
|
|
347
278
|
lang = config.lang.rscript
|
|
@@ -350,11 +281,16 @@ class MetabolicPathwayHeterogeneity(Proc):
|
|
|
350
281
|
"select_pcs": 0.8,
|
|
351
282
|
"pathway_pval_cutoff": 0.01,
|
|
352
283
|
"ncores": config.misc.ncores,
|
|
353
|
-
"
|
|
354
|
-
"
|
|
355
|
-
"
|
|
356
|
-
"
|
|
357
|
-
|
|
284
|
+
"subset_by": None,
|
|
285
|
+
"group_by": None,
|
|
286
|
+
"fgsea_args": {"scoreType": "std", "nproc": 1},
|
|
287
|
+
"plots": {
|
|
288
|
+
"Pathway Heterogeneity": {
|
|
289
|
+
"plot_type": "dot",
|
|
290
|
+
"devpars": {"res": 100},
|
|
291
|
+
},
|
|
292
|
+
},
|
|
293
|
+
"cases": {},
|
|
358
294
|
}
|
|
359
295
|
script = (
|
|
360
296
|
"file://../scripts/scrna_metabolic_landscape/"
|
|
@@ -399,49 +335,19 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
399
335
|
dependent on other processes, this option will be used to determine
|
|
400
336
|
whether the input is a seurat object or not.
|
|
401
337
|
noimpute (flag): Whether to do imputation for the dropouts.
|
|
402
|
-
If
|
|
338
|
+
If True, the values will be left as is.
|
|
403
339
|
gmtfile: The GMT file with the metabolic pathways. The gene names should
|
|
404
340
|
match the gene names in the gene list in RNAData or
|
|
405
341
|
the Seurat object.
|
|
406
342
|
You can also provide a URL to the GMT file.
|
|
407
343
|
For example, from
|
|
408
344
|
<https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/>.
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
subsetting (type=auto): How do we subset the data. Other columns in the
|
|
416
|
-
metadata to do comparisons. For example, `"TimePoint"` or
|
|
417
|
-
`["TimePoint", "Response"]`
|
|
418
|
-
subsetting_prefix (type=auto): Working as a prefix to subset names
|
|
419
|
-
For example, if we have `subsetting_prefix = "timepoint"` and
|
|
420
|
-
we have `pre` and `post` in the `subsetting` column, the subsets
|
|
421
|
-
will be named as `timepoint_pre` and `timepoint_post`
|
|
422
|
-
If `subsetting` is a list, then this should also be a same-length
|
|
423
|
-
list. If a single string is given, it will be repeated to a list
|
|
424
|
-
with the same length as `subsetting`
|
|
425
|
-
subsetting_comparison (type=json): What kind of comparisons are we
|
|
426
|
-
doing to compare cells from different subsets.
|
|
427
|
-
It should be dict with keys as the names of the comparisons and
|
|
428
|
-
values as the 2 comparison groups from the `subsetting` column.
|
|
429
|
-
For example, if we have `pre` and `post` in the `subsetting` column,
|
|
430
|
-
we could have
|
|
431
|
-
`subsetting_comparison = {"pre_vs_post": ["post", "pre"]}`
|
|
432
|
-
The second group will be the control group in the comparison.
|
|
433
|
-
If we also have `1`, `2` and `3` in the `grouping` column,
|
|
434
|
-
by default, the comparisons are done within each subset for
|
|
435
|
-
each group. For example, for group `1`, groups `2` and `3`
|
|
436
|
-
will be used as control, and for group `2`, groups `1` and `3`
|
|
437
|
-
will be used as control, and for group `3`, groups `1` and `2`
|
|
438
|
-
will be used as control. It is similar to `Seurat::FindMarkers`
|
|
439
|
-
procedure. With this option, the comparisons are also done to
|
|
440
|
-
compare cells from different subsets within each group. With the
|
|
441
|
-
example above, we will have `pre_vs_post` comparisons within
|
|
442
|
-
each group.
|
|
443
|
-
If `subsetting` is a list, this must be a list of dicts with the
|
|
444
|
-
same length.
|
|
345
|
+
subset_by (pgarg;readonly): Subset the data by the given column in the
|
|
346
|
+
metadata. For example, `Response`.
|
|
347
|
+
`NA` values will be removed in this column.
|
|
348
|
+
If None, the data will not be subsetted.
|
|
349
|
+
group_by (pgarg;readonly): Group the data by the given column in the
|
|
350
|
+
metadata. For example, `cluster`.
|
|
445
351
|
mutaters (type=json): Add new columns to the metadata for
|
|
446
352
|
grouping/subsetting.
|
|
447
353
|
They are passed to `sobj@meta.data |> mutate(...)`. For example,
|
|
@@ -451,65 +357,25 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
451
357
|
ncores (type=int): Number of cores to use for parallelization for
|
|
452
358
|
each process
|
|
453
359
|
"""
|
|
360
|
+
|
|
454
361
|
DEFAULTS = Diot(
|
|
455
362
|
metafile=None,
|
|
456
363
|
is_seurat=None,
|
|
457
364
|
gmtfile=None,
|
|
458
|
-
grouping=None,
|
|
459
|
-
grouping_prefix="",
|
|
460
|
-
subsetting=None,
|
|
461
|
-
subsetting_prefix=None,
|
|
462
|
-
subsetting_comparison={},
|
|
463
365
|
mutaters=None,
|
|
464
|
-
noimpute=
|
|
366
|
+
noimpute=True,
|
|
465
367
|
ncores=config.misc.ncores,
|
|
368
|
+
subset_by=None,
|
|
369
|
+
group_by=None,
|
|
466
370
|
)
|
|
467
371
|
|
|
468
372
|
def post_init(self):
|
|
469
373
|
"""Load runtime processes"""
|
|
470
374
|
if self.opts.metafile:
|
|
471
375
|
suffix = Path(self.opts.metafile).suffix
|
|
472
|
-
self.opts.is_seurat = suffix in (".rds", ".RDS")
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
if self.opts.subsetting and not isinstance(self.opts.subsetting, list):
|
|
476
|
-
self.opts.subsetting = [self.opts.subsetting]
|
|
477
|
-
|
|
478
|
-
# Make sure the grouping is a list with the same length as subsetting
|
|
479
|
-
if (
|
|
480
|
-
self.opts.subsetting
|
|
481
|
-
and not isinstance(self.opts.subsetting_prefix, list)
|
|
482
|
-
):
|
|
483
|
-
self.opts.subsetting_prefix = [
|
|
484
|
-
self.opts.subsetting_prefix
|
|
485
|
-
] * len(self.opts.subsetting)
|
|
486
|
-
|
|
487
|
-
# Make sure the lengths of subsetting and subsetting_comparison the same
|
|
488
|
-
if self.opts.subsetting:
|
|
489
|
-
if len(self.opts.subsetting) == 1 and isinstance(
|
|
490
|
-
self.opts.subsetting_comparison, dict
|
|
491
|
-
):
|
|
492
|
-
self.opts.subsetting_comparison = [
|
|
493
|
-
self.opts.subsetting_comparison
|
|
494
|
-
]
|
|
495
|
-
|
|
496
|
-
if len(self.opts.subsetting) > 1 and not isinstance(
|
|
497
|
-
self.opts.subsetting_comparison, list
|
|
498
|
-
):
|
|
499
|
-
raise ValueError(
|
|
500
|
-
"The length of `subsetting` is larger than 1, "
|
|
501
|
-
"but `subsetting_comparison` is not a list of dicts."
|
|
502
|
-
)
|
|
503
|
-
|
|
504
|
-
if len(self.opts.subsetting) != len(
|
|
505
|
-
self.opts.subsetting_comparison
|
|
506
|
-
):
|
|
507
|
-
raise ValueError(
|
|
508
|
-
"The length of `subsetting` and `subsetting_comparison` "
|
|
509
|
-
"are not the same"
|
|
510
|
-
)
|
|
511
|
-
|
|
512
|
-
@ProcGroup.add_proc
|
|
376
|
+
self.opts.is_seurat = suffix in (".rds", ".RDS", ".qs", ".qs2")
|
|
377
|
+
|
|
378
|
+
@ProcGroup.add_proc # type: ignore
|
|
513
379
|
def p_input(self) -> Type[Proc]:
|
|
514
380
|
"""Build MetabolicInputs process"""
|
|
515
381
|
from .misc import File2Proc
|
|
@@ -527,8 +393,8 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
527
393
|
|
|
528
394
|
return MetabolicInput
|
|
529
395
|
|
|
530
|
-
@ProcGroup.add_proc
|
|
531
|
-
def p_preparing(self) -> Type[Proc]:
|
|
396
|
+
@ProcGroup.add_proc # type: ignore
|
|
397
|
+
def p_preparing(self) -> Type[Proc] | None:
|
|
532
398
|
"""Build SeuratPreparing process"""
|
|
533
399
|
if self.opts.is_seurat:
|
|
534
400
|
return None
|
|
@@ -540,11 +406,11 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
540
406
|
|
|
541
407
|
return MetabolicSeuratPreparing
|
|
542
408
|
|
|
543
|
-
@ProcGroup.add_proc
|
|
409
|
+
@ProcGroup.add_proc # type: ignore
|
|
544
410
|
def p_clustering(self) -> Type[Proc]:
|
|
545
411
|
"""Build SeuratClustering process"""
|
|
546
412
|
if self.opts.is_seurat:
|
|
547
|
-
return self.p_input
|
|
413
|
+
return self.p_input # type: ignore
|
|
548
414
|
|
|
549
415
|
from .scrna import SeuratClustering
|
|
550
416
|
|
|
@@ -553,11 +419,11 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
553
419
|
|
|
554
420
|
return MetabolicSeuratClustering
|
|
555
421
|
|
|
556
|
-
@ProcGroup.add_proc
|
|
422
|
+
@ProcGroup.add_proc # type: ignore
|
|
557
423
|
def p_mutater(self) -> Type[Proc]:
|
|
558
424
|
"""Build SeuratMetadataMutater process"""
|
|
559
425
|
if not self.opts.mutaters:
|
|
560
|
-
return self.p_clustering
|
|
426
|
+
return self.p_clustering # type: ignore
|
|
561
427
|
|
|
562
428
|
from .scrna import SeuratMetadataMutater
|
|
563
429
|
|
|
@@ -571,97 +437,72 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
571
437
|
|
|
572
438
|
return MetabolicSeuratMetadataMutater
|
|
573
439
|
|
|
574
|
-
@ProcGroup.add_proc
|
|
440
|
+
@ProcGroup.add_proc # type: ignore
|
|
575
441
|
def p_expr_impute(self) -> Type[Proc]:
|
|
576
442
|
"""Build process"""
|
|
577
443
|
if self.opts.noimpute:
|
|
578
|
-
return self.p_mutater
|
|
444
|
+
return self.p_mutater # type: ignore
|
|
579
445
|
|
|
580
446
|
from .scrna import ExprImputation
|
|
581
447
|
|
|
582
|
-
@annotate.format_doc(indent=3)
|
|
448
|
+
@annotate.format_doc(indent=3) # type: ignore
|
|
583
449
|
class MetabolicExprImputation(ExprImputation):
|
|
584
450
|
"""{{Summary}}
|
|
585
451
|
|
|
586
452
|
You can turn off the imputation by setting the `noimpute` option
|
|
587
453
|
of the process group to `True`.
|
|
588
454
|
"""
|
|
455
|
+
|
|
589
456
|
requires = self.p_mutater
|
|
590
457
|
|
|
591
458
|
return MetabolicExprImputation
|
|
592
459
|
|
|
593
|
-
@ProcGroup.add_proc
|
|
460
|
+
@ProcGroup.add_proc # type: ignore
|
|
594
461
|
def p_pathway_activity(self) -> Type[Proc]:
|
|
595
462
|
"""Build MetabolicPathwayActivity process"""
|
|
596
|
-
return Proc.from_proc(
|
|
463
|
+
return Proc.from_proc( # type: ignore
|
|
597
464
|
MetabolicPathwayActivity,
|
|
598
465
|
"MetabolicPathwayActivity",
|
|
599
|
-
requires=self.p_expr_impute,
|
|
466
|
+
requires=self.p_expr_impute, # type: ignore
|
|
600
467
|
order=-1,
|
|
468
|
+
envs_depth=5,
|
|
601
469
|
envs={
|
|
602
470
|
"ncores": self.opts.ncores,
|
|
603
471
|
"gmtfile": self.opts.gmtfile,
|
|
604
|
-
"
|
|
605
|
-
"
|
|
606
|
-
"subsetting": self.opts.subsetting,
|
|
607
|
-
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
472
|
+
"group_by": self.opts.group_by,
|
|
473
|
+
"subset_by": self.opts.subset_by,
|
|
608
474
|
},
|
|
609
475
|
)
|
|
610
476
|
|
|
611
|
-
@ProcGroup.add_proc
|
|
477
|
+
@ProcGroup.add_proc # type: ignore
|
|
612
478
|
def p_pathway_heterogeneity(self) -> Type[Proc]:
|
|
613
479
|
"""Build MetabolicPathwayHeterogeneity process"""
|
|
614
|
-
return Proc.from_proc(
|
|
480
|
+
return Proc.from_proc( # type: ignore
|
|
615
481
|
MetabolicPathwayHeterogeneity,
|
|
616
482
|
"MetabolicPathwayHeterogeneity",
|
|
617
|
-
requires=self.
|
|
483
|
+
requires=self.p_mutater, # type: ignore
|
|
484
|
+
envs_depth=5,
|
|
618
485
|
envs={
|
|
619
486
|
"ncores": self.opts.ncores,
|
|
620
487
|
"gmtfile": self.opts.gmtfile,
|
|
621
|
-
"
|
|
622
|
-
"
|
|
623
|
-
"subsetting": self.opts.subsetting,
|
|
624
|
-
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
488
|
+
"group_by": self.opts.group_by,
|
|
489
|
+
"subset_by": self.opts.subset_by,
|
|
625
490
|
},
|
|
626
491
|
)
|
|
627
492
|
|
|
628
|
-
@ProcGroup.add_proc
|
|
493
|
+
@ProcGroup.add_proc # type: ignore
|
|
629
494
|
def p_features(self) -> Type[Proc]:
|
|
630
495
|
"""Build MetabolicFeatures process"""
|
|
631
|
-
return Proc.from_proc(
|
|
496
|
+
return Proc.from_proc( # type: ignore
|
|
632
497
|
MetabolicFeatures,
|
|
633
498
|
"MetabolicFeatures",
|
|
634
|
-
requires=self.p_expr_impute,
|
|
635
|
-
|
|
636
|
-
"ncores": self.opts.ncores,
|
|
637
|
-
"gmtfile": self.opts.gmtfile,
|
|
638
|
-
"grouping": self.opts.grouping,
|
|
639
|
-
"grouping_prefix": self.opts.grouping_prefix,
|
|
640
|
-
"subsetting": self.opts.subsetting,
|
|
641
|
-
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
642
|
-
},
|
|
643
|
-
)
|
|
644
|
-
|
|
645
|
-
@ProcGroup.add_proc
|
|
646
|
-
def p_features_intra_subset(self) -> Type[Proc]:
|
|
647
|
-
"""Build MetabolicFeaturesIntraSubset process"""
|
|
648
|
-
if self.opts.subsetting_comparison and not self.opts.subsetting:
|
|
649
|
-
raise ValueError(
|
|
650
|
-
"Cannot use `subsetting_comparison` without `subsetting`."
|
|
651
|
-
)
|
|
652
|
-
|
|
653
|
-
return Proc.from_proc(
|
|
654
|
-
MetabolicFeaturesIntraSubset,
|
|
655
|
-
"MetabolicFeaturesIntraSubset",
|
|
656
|
-
requires=self.p_expr_impute,
|
|
499
|
+
requires=self.p_expr_impute, # type: ignore
|
|
500
|
+
envs_depth=5,
|
|
657
501
|
envs={
|
|
658
502
|
"ncores": self.opts.ncores,
|
|
659
503
|
"gmtfile": self.opts.gmtfile,
|
|
660
|
-
"
|
|
661
|
-
"
|
|
662
|
-
"subsetting": self.opts.subsetting,
|
|
663
|
-
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
664
|
-
"subsetting_comparison": self.opts.subsetting_comparison,
|
|
504
|
+
"group_by": self.opts.group_by,
|
|
505
|
+
"subset_by": self.opts.subset_by,
|
|
665
506
|
},
|
|
666
507
|
)
|
|
667
508
|
|