biopipen 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.py +0 -5
- biopipen/core/config.toml +4 -4
- biopipen/core/defaults.py +3 -3
- biopipen/core/filters.py +1 -0
- biopipen/core/proc.py +1 -3
- biopipen/core/testing.py +1 -2
- biopipen/ns/bam.py +10 -14
- biopipen/ns/bcftools.py +37 -7
- biopipen/ns/bed.py +9 -16
- biopipen/ns/cnv.py +8 -11
- biopipen/ns/cnvkit.py +32 -59
- biopipen/ns/cnvkit_pipeline.py +266 -310
- biopipen/ns/csv.py +0 -2
- biopipen/ns/gene.py +0 -1
- biopipen/ns/gsea.py +4 -10
- biopipen/ns/misc.py +0 -5
- biopipen/ns/plot.py +2 -4
- biopipen/ns/rnaseq.py +0 -1
- biopipen/ns/scrna.py +78 -120
- biopipen/ns/scrna_metabolic_landscape.py +306 -348
- biopipen/ns/tcgamaf.py +52 -0
- biopipen/ns/tcr.py +5 -15
- biopipen/ns/vcf.py +52 -34
- biopipen/ns/web.py +8 -19
- biopipen/reports/bam/CNAClinic.svelte +1 -1
- biopipen/reports/bam/CNVpytor.svelte +2 -2
- biopipen/reports/bam/ControlFREEC.svelte +1 -1
- biopipen/reports/cnv/AneuploidyScore.svelte +2 -2
- biopipen/reports/cnv/AneuploidyScoreSummary.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/gsea/FGSEA.svelte +1 -1
- biopipen/reports/gsea/GSEA.svelte +2 -2
- biopipen/reports/scrna/CellsDistribution.svelte +1 -1
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +42 -39
- biopipen/reports/scrna/ScFGSEA.svelte +3 -3
- biopipen/reports/scrna/SeuratClusterStats.svelte +3 -3
- biopipen/reports/scrna/SeuratPreparing.svelte +2 -2
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubsets.svelte +2 -2
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +1 -1
- biopipen/reports/tcr/CloneResidency.svelte +4 -4
- biopipen/reports/tcr/Immunarch.svelte +2 -2
- biopipen/reports/tcr/SampleDiversity.svelte +2 -2
- biopipen/reports/tcr/TCRClusteringStats.svelte +3 -3
- biopipen/reports/tcr/VJUsage.svelte +1 -1
- biopipen/reports/utils/gsea.liq +1 -1
- biopipen/reports/utils/misc.liq +1 -1
- biopipen/reports/vcf/TruvariBenchSummary.svelte +1 -1
- biopipen/reports/vcf/TruvariConsistency.svelte +3 -3
- biopipen/scripts/bcftools/BcftoolsSort.py +19 -0
- biopipen/scripts/scrna/MarkersFinder.R +73 -35
- biopipen/scripts/tcgamaf/Maf2Vcf.py +22 -0
- biopipen/scripts/tcgamaf/MafAddChr.py +14 -0
- biopipen/scripts/tcgamaf/maf2vcf.pl +427 -0
- biopipen/scripts/vcf/VcfAnno.py +26 -0
- biopipen/scripts/vcf/VcfFix_utils.py +3 -2
- {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/METADATA +7 -8
- {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/RECORD +65 -59
- {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/WHEEL +1 -1
- {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/entry_points.txt +2 -1
|
@@ -5,211 +5,13 @@ from typing import Type
|
|
|
5
5
|
|
|
6
6
|
from diot import Diot
|
|
7
7
|
from datar.tibble import tibble
|
|
8
|
-
from
|
|
8
|
+
from pipen_args import ProcGroup
|
|
9
9
|
|
|
10
10
|
from ..core.config import config
|
|
11
11
|
from ..core.proc import Proc
|
|
12
12
|
|
|
13
|
-
DEFAULT_OPTS = Diot(
|
|
14
|
-
metafile=None,
|
|
15
|
-
is_seurat=None,
|
|
16
|
-
gmtfile=None,
|
|
17
|
-
grouping=None,
|
|
18
|
-
grouping_prefix="",
|
|
19
|
-
subsetting=None,
|
|
20
|
-
subsetting_prefix=None,
|
|
21
|
-
subsetting_comparison={},
|
|
22
|
-
mutaters=None,
|
|
23
|
-
ncores=config.misc.ncores,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class MetabolicPathwayActivity(Proc):
|
|
28
|
-
"""Pathway activities for each group
|
|
29
|
-
|
|
30
|
-
Requires:
|
|
31
|
-
- name: r-scater
|
|
32
|
-
check: |
|
|
33
|
-
{{proc.lang}} <(echo "library(scater)")
|
|
34
|
-
- name: r-reshape2
|
|
35
|
-
check: |
|
|
36
|
-
{{proc.lang}} <(echo "library(reshape2)")
|
|
37
|
-
- name: r-rcolorbrewer
|
|
38
|
-
check: |
|
|
39
|
-
{{proc.lang}} <(echo "library(RColorBrewer)")
|
|
40
|
-
- name: r-ggplot2
|
|
41
|
-
check: |
|
|
42
|
-
{{proc.lang}} <(echo "library(ggplot2)")
|
|
43
|
-
- name: r-ggprism
|
|
44
|
-
check: |
|
|
45
|
-
{{proc.lang}} <(echo "library(ggprism)")
|
|
46
|
-
- name: r-complexheatmap
|
|
47
|
-
check: |
|
|
48
|
-
{{proc.lang}} <(echo "library(ComplexHeatmap)")
|
|
49
|
-
- name: r-parallel
|
|
50
|
-
check: |
|
|
51
|
-
{{proc.lang}} <(echo "library(parallel)")
|
|
52
|
-
"""
|
|
53
|
-
input = "sobjfile:file"
|
|
54
|
-
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayactivity"
|
|
55
|
-
envs = {
|
|
56
|
-
"ntimes": 5000,
|
|
57
|
-
"ncores": config.misc.ncores,
|
|
58
|
-
"heatmap_devpars": {},
|
|
59
|
-
"violin_devpars": {},
|
|
60
|
-
"gmtfile": None,
|
|
61
|
-
"grouping": None,
|
|
62
|
-
"grouping_prefix": "",
|
|
63
|
-
"subsetting": None,
|
|
64
|
-
"subsetting_prefix": "",
|
|
65
|
-
}
|
|
66
|
-
lang = config.lang.rscript
|
|
67
|
-
script = (
|
|
68
|
-
"file://../scripts/"
|
|
69
|
-
"scrna_metabolic_landscape/MetabolicPathwayActivity.R"
|
|
70
|
-
)
|
|
71
|
-
plugin_opts = {
|
|
72
|
-
"report": (
|
|
73
|
-
"file://../reports/"
|
|
74
|
-
"scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
|
|
75
|
-
)
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
class MetabolicFeatures(Proc):
|
|
80
|
-
"""Inter-subset metabolic features - Enrichment analysis in details
|
|
81
|
-
|
|
82
|
-
Requires:
|
|
83
|
-
- name: r-parallel
|
|
84
|
-
check: |
|
|
85
|
-
{{proc.lang}} <(echo "library(parallel)")
|
|
86
|
-
- name: r-fgsea
|
|
87
|
-
check: |
|
|
88
|
-
{{proc.lang}} <(echo "library(fgsea)")
|
|
89
|
-
"""
|
|
90
|
-
input = "sobjfile:file"
|
|
91
|
-
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayfeatures"
|
|
92
|
-
lang = config.lang.rscript
|
|
93
|
-
envs = {
|
|
94
|
-
"ncores": config.misc.ncores,
|
|
95
|
-
"fgsea": True,
|
|
96
|
-
"prerank_method": "signal_to_noise",
|
|
97
|
-
"top": 10,
|
|
98
|
-
"gmtfile": None,
|
|
99
|
-
"grouping": None,
|
|
100
|
-
"grouping_prefix": "",
|
|
101
|
-
"subsetting": None,
|
|
102
|
-
"subsetting_prefix": "",
|
|
103
|
-
}
|
|
104
|
-
script = "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
|
|
105
|
-
plugin_opts = {
|
|
106
|
-
"report": (
|
|
107
|
-
"file://../reports/"
|
|
108
|
-
"scrna_metabolic_landscape/MetabolicFeatures.svelte"
|
|
109
|
-
)
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
class MetabolicFeaturesIntraSubset(Proc):
|
|
114
|
-
"""Intra-subset metabolic features - Enrichment analysis in details
|
|
115
|
-
|
|
116
|
-
Requires:
|
|
117
|
-
- name: r-parallel
|
|
118
|
-
check: |
|
|
119
|
-
{{proc.lang}} <(echo "library(parallel)")
|
|
120
|
-
- name: r-scater
|
|
121
|
-
check: |
|
|
122
|
-
{{proc.lang}} <(echo "library(scater)")
|
|
123
|
-
- name: r-fgsea
|
|
124
|
-
check: |
|
|
125
|
-
{{proc.lang}} <(echo "library(fgsea)")
|
|
126
|
-
"""
|
|
127
|
-
input = "sobjfile:file"
|
|
128
|
-
output = "outdir:dir:{{in.sobjfile | stem}}.intra-subset-pathwayfeatures"
|
|
129
|
-
lang = config.lang.rscript
|
|
130
|
-
envs = {
|
|
131
|
-
"ncores": config.misc.ncores,
|
|
132
|
-
"gmtfile": None,
|
|
133
|
-
"fgsea": True,
|
|
134
|
-
"prerank_method": "signal_to_noise",
|
|
135
|
-
"top": 10,
|
|
136
|
-
"grouping": None,
|
|
137
|
-
"grouping_prefix": "",
|
|
138
|
-
"subsetting": None,
|
|
139
|
-
"subsetting_prefix": "",
|
|
140
|
-
"subsetting_comparison": {},
|
|
141
|
-
}
|
|
142
|
-
script = (
|
|
143
|
-
"file://../scripts/scrna_metabolic_landscape/"
|
|
144
|
-
"MetabolicFeaturesIntraSubsets.R"
|
|
145
|
-
)
|
|
146
|
-
plugin_opts = {
|
|
147
|
-
"report": (
|
|
148
|
-
"file://../reports/scrna_metabolic_landscape/"
|
|
149
|
-
"MetabolicFeaturesIntraSubsets.svelte"
|
|
150
|
-
)
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
class MetabolicPathwayHeterogeneity(Proc):
|
|
155
|
-
"""Pathway heterogeneity
|
|
156
|
-
|
|
157
|
-
Requires:
|
|
158
|
-
- name: r-gtools
|
|
159
|
-
check: |
|
|
160
|
-
{{proc.lang}} <(echo "library(gtools)")
|
|
161
|
-
- name: r-ggplot2
|
|
162
|
-
check: |
|
|
163
|
-
{{proc.lang}} <(echo "library(ggplot2)")
|
|
164
|
-
- name: r-ggprism
|
|
165
|
-
check: |
|
|
166
|
-
{{proc.lang}} <(echo "library(ggprism)")
|
|
167
|
-
- name: r-parallel
|
|
168
|
-
check: |
|
|
169
|
-
{{proc.lang}} <(echo "library(parallel)")
|
|
170
|
-
- name: r-dplyr
|
|
171
|
-
check: |
|
|
172
|
-
{{proc.lang}} <(echo "library(dplyr)")
|
|
173
|
-
- name: r-tibble
|
|
174
|
-
check: |
|
|
175
|
-
{{proc.lang}} <(echo "library(tibble)")
|
|
176
|
-
- name: r-enrichr
|
|
177
|
-
check: |
|
|
178
|
-
{{proc.lang}} <(echo "library(enrichR)")
|
|
179
|
-
- name: r-data.table
|
|
180
|
-
check: |
|
|
181
|
-
{{proc.lang}} <(echo "library(data.table)")
|
|
182
|
-
- name: r-fgsea
|
|
183
|
-
check: |
|
|
184
|
-
{{proc.lang}} <(echo "library(fgsea)")
|
|
185
|
-
"""
|
|
186
|
-
input = "sobjfile:file"
|
|
187
|
-
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayhetero"
|
|
188
|
-
lang = config.lang.rscript
|
|
189
|
-
envs = {
|
|
190
|
-
"gmtfile": None,
|
|
191
|
-
"select_pcs": 0.8,
|
|
192
|
-
"pathway_pval_cutoff": 0.01,
|
|
193
|
-
"ncores": config.misc.ncores,
|
|
194
|
-
"bubble_devpars": {},
|
|
195
|
-
"grouping": None,
|
|
196
|
-
"grouping_prefix": "",
|
|
197
|
-
"subsetting": None,
|
|
198
|
-
"subsetting_prefix": "",
|
|
199
|
-
}
|
|
200
|
-
script = (
|
|
201
|
-
"file://../scripts/scrna_metabolic_landscape/"
|
|
202
|
-
"MetabolicPathwayHeterogeneity.R"
|
|
203
|
-
)
|
|
204
|
-
plugin_opts = {
|
|
205
|
-
"report": (
|
|
206
|
-
"file://../reports/scrna_metabolic_landscape/"
|
|
207
|
-
"MetabolicPathwayHeterogeneity.svelte"
|
|
208
|
-
)
|
|
209
|
-
}
|
|
210
13
|
|
|
211
|
-
|
|
212
|
-
class ScrnaMetabolicLandscape(Pipeline):
|
|
14
|
+
class ScrnaMetabolicLandscape(ProcGroup):
|
|
213
15
|
"""Metabolic landscape analysis for scRNA-seq data
|
|
214
16
|
|
|
215
17
|
An abstract from
|
|
@@ -224,72 +26,248 @@ class ScrnaMetabolicLandscape(Pipeline):
|
|
|
224
26
|
"Metabolic landscape of the tumor microenvironment at
|
|
225
27
|
single cell resolution." Nature communications 10.1 (2019): 1-12.
|
|
226
28
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
29
|
+
Args:
|
|
30
|
+
metafile: Either a metafile or an rds file of a Seurat object.
|
|
31
|
+
If it is a metafile, it should have two columns: `Sample` and
|
|
32
|
+
`RNADir`. `Sample` should be the first column with unique
|
|
33
|
+
identifiers for the samples and `RNADir` indicates where the
|
|
34
|
+
barcodes, genes, expression matrices are. The data will be loaded
|
|
35
|
+
and an unsupervised clustering will be done.
|
|
36
|
+
Currently only 10X data is supported.
|
|
37
|
+
If it is an rds file, the seurat object will be used directly
|
|
38
|
+
is_seurat: Whether the input `metafile` is a seurat object.
|
|
39
|
+
If `metafile` is specified directly, this option will be ignored
|
|
40
|
+
and will be inferred from the file extension. If `metafile` is
|
|
41
|
+
not specified, meaning `<pipeline>.procs.MetabolicInput` is
|
|
42
|
+
dependent on other processes, this option will be used to determine
|
|
43
|
+
whether the input is a seurat object or not.
|
|
44
|
+
gmtfile: The GMT file with the metabolic pathways. The gene names should
|
|
45
|
+
match the gene names in the gene list in RNADir or the Seurat object
|
|
46
|
+
grouping: defines the basic groups to investigate the metabolic activity
|
|
47
|
+
Typically the clusters.
|
|
48
|
+
grouping_prefix: Working as a prefix to group names
|
|
49
|
+
For example, if we have `grouping_prefix = "cluster"` and
|
|
50
|
+
we have `1` and `2` in the `grouping` column, the groups
|
|
51
|
+
will be named as `cluster_1` and `cluster_2`
|
|
52
|
+
subsetting: How do we subset the data. Another column in the metadata
|
|
53
|
+
to do comparisons.
|
|
54
|
+
subsetting_prefix: Working as a prefix to subset names
|
|
55
|
+
For example, if we have `subsetting_prefix = "timepoint"` and
|
|
56
|
+
we have `pre` and `post` in the `subsetting` column, the subsets
|
|
57
|
+
will be named as `timepoint_pre` and `timepoint_post`
|
|
58
|
+
subsetting_comparison: What kind of comparisons are we doing to compare
|
|
59
|
+
cells from different subsets.
|
|
60
|
+
It should be dict with keys as the names of the comparisons and
|
|
61
|
+
values as the 2 comparison groups from the `subsetting` column.
|
|
62
|
+
For example, if we have `pre` and `post` in the `subsetting` column,
|
|
63
|
+
we could have
|
|
64
|
+
`subsetting_comparison = {"pre_vs_post": ["post", "pre"]}`
|
|
65
|
+
The second group will be the control group in the comparison.
|
|
66
|
+
If we also have `1`, `2` and `3` in the `grouping` column,
|
|
67
|
+
by default, the comparisons are done within each subset for
|
|
68
|
+
each group. For example, for group `1`, groups `2` and `3`
|
|
69
|
+
will be used as control, and for group `2`, groups `1` and `3`
|
|
70
|
+
will be used as control, and for group `3`, groups `1` and `2`
|
|
71
|
+
will be used as control. It is similar to `Seurat::FindMarkers`
|
|
72
|
+
procedure. With this option, the comparisons are also done to
|
|
73
|
+
compare cells from different subsets within each group. With the
|
|
74
|
+
example above, we will have `pre_vs_post` comparisons within
|
|
75
|
+
each group.
|
|
76
|
+
mutaters: Add new columns to the metadata for grouping/subsetting.
|
|
77
|
+
They are passed to `sobj@meta.data |> mutate(...)`. For example,
|
|
78
|
+
`{"timepoint": "if_else(treatment == 'control', 'pre', 'post')"}`
|
|
79
|
+
will add a new column `timepoint` to the metadata with values of
|
|
80
|
+
`pre` and `post` based on the `treatment` column.
|
|
81
|
+
ncores: Number of cores to use for parallelization for each process
|
|
280
82
|
"""
|
|
83
|
+
DEFAULTS = Diot(
|
|
84
|
+
metafile=None,
|
|
85
|
+
is_seurat=None,
|
|
86
|
+
gmtfile=None,
|
|
87
|
+
grouping=None,
|
|
88
|
+
grouping_prefix="",
|
|
89
|
+
subsetting=None,
|
|
90
|
+
subsetting_prefix=None,
|
|
91
|
+
subsetting_comparison={},
|
|
92
|
+
mutaters=None,
|
|
93
|
+
ncores=config.misc.ncores,
|
|
94
|
+
)
|
|
281
95
|
|
|
282
|
-
|
|
96
|
+
class MetabolicPathwayActivity(Proc):
|
|
97
|
+
"""Pathway activities for each group
|
|
98
|
+
|
|
99
|
+
Requires:
|
|
100
|
+
r-scater:
|
|
101
|
+
- check: {{proc.lang}} <(echo "library(scater)")
|
|
102
|
+
r-reshape2:
|
|
103
|
+
- check: {{proc.lang}} <(echo "library(reshape2)")
|
|
104
|
+
r-rcolorbrewer:
|
|
105
|
+
- check: {{proc.lang}} <(echo "library(RColorBrewer)")
|
|
106
|
+
r-ggplot2:
|
|
107
|
+
- check: {{proc.lang}} <(echo "library(ggplot2)")
|
|
108
|
+
r-ggprism:
|
|
109
|
+
- check: {{proc.lang}} <(echo "library(ggprism)")
|
|
110
|
+
r-complexheatmap:
|
|
111
|
+
- check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
|
|
112
|
+
r-parallel:
|
|
113
|
+
- check: {{proc.lang}} <(echo "library(parallel)")
|
|
114
|
+
"""
|
|
115
|
+
input = "sobjfile:file"
|
|
116
|
+
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayactivity"
|
|
117
|
+
envs = {
|
|
118
|
+
"ntimes": 5000,
|
|
119
|
+
"ncores": config.misc.ncores,
|
|
120
|
+
"heatmap_devpars": {},
|
|
121
|
+
"violin_devpars": {},
|
|
122
|
+
"gmtfile": None,
|
|
123
|
+
"grouping": None,
|
|
124
|
+
"grouping_prefix": "",
|
|
125
|
+
"subsetting": None,
|
|
126
|
+
"subsetting_prefix": "",
|
|
127
|
+
}
|
|
128
|
+
lang = config.lang.rscript
|
|
129
|
+
script = (
|
|
130
|
+
"file://../scripts/"
|
|
131
|
+
"scrna_metabolic_landscape/MetabolicPathwayActivity.R"
|
|
132
|
+
)
|
|
133
|
+
plugin_opts = {
|
|
134
|
+
"report": (
|
|
135
|
+
"file://../reports/"
|
|
136
|
+
"scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
|
|
137
|
+
)
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
class MetabolicFeatures(Proc):
|
|
141
|
+
"""Inter-subset metabolic features - Enrichment analysis in details
|
|
142
|
+
|
|
143
|
+
Requires:
|
|
144
|
+
r-parallel:
|
|
145
|
+
- check: {{proc.lang}} <(echo "library(parallel)")
|
|
146
|
+
r-fgsea:
|
|
147
|
+
- check: {{proc.lang}} <(echo "library(fgsea)")
|
|
148
|
+
"""
|
|
149
|
+
input = "sobjfile:file"
|
|
150
|
+
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayfeatures"
|
|
151
|
+
lang = config.lang.rscript
|
|
152
|
+
envs = {
|
|
153
|
+
"ncores": config.misc.ncores,
|
|
154
|
+
"fgsea": True,
|
|
155
|
+
"prerank_method": "signal_to_noise",
|
|
156
|
+
"top": 10,
|
|
157
|
+
"gmtfile": None,
|
|
158
|
+
"grouping": None,
|
|
159
|
+
"grouping_prefix": "",
|
|
160
|
+
"subsetting": None,
|
|
161
|
+
"subsetting_prefix": "",
|
|
162
|
+
}
|
|
163
|
+
script = (
|
|
164
|
+
"file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
|
|
165
|
+
)
|
|
166
|
+
plugin_opts = {
|
|
167
|
+
"report": (
|
|
168
|
+
"file://../reports/"
|
|
169
|
+
"scrna_metabolic_landscape/MetabolicFeatures.svelte"
|
|
170
|
+
)
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
class MetabolicFeaturesIntraSubset(Proc):
|
|
174
|
+
"""Intra-subset metabolic features - Enrichment analysis in details
|
|
175
|
+
|
|
176
|
+
Requires:
|
|
177
|
+
r-parallel:
|
|
178
|
+
- check: {{proc.lang}} <(echo "library(parallel)")
|
|
179
|
+
r-scater:
|
|
180
|
+
- check: {{proc.lang}} <(echo "library(scater)")
|
|
181
|
+
r-fgsea:
|
|
182
|
+
- check: {{proc.lang}} <(echo "library(fgsea)")
|
|
183
|
+
"""
|
|
184
|
+
input = "sobjfile:file"
|
|
185
|
+
output = (
|
|
186
|
+
"outdir:dir:{{in.sobjfile | stem}}.intra-subset-pathwayfeatures"
|
|
187
|
+
)
|
|
188
|
+
lang = config.lang.rscript
|
|
189
|
+
envs = {
|
|
190
|
+
"ncores": config.misc.ncores,
|
|
191
|
+
"gmtfile": None,
|
|
192
|
+
"fgsea": True,
|
|
193
|
+
"prerank_method": "signal_to_noise",
|
|
194
|
+
"top": 10,
|
|
195
|
+
"grouping": None,
|
|
196
|
+
"grouping_prefix": "",
|
|
197
|
+
"subsetting": None,
|
|
198
|
+
"subsetting_prefix": "",
|
|
199
|
+
"subsetting_comparison": {},
|
|
200
|
+
}
|
|
201
|
+
script = (
|
|
202
|
+
"file://../scripts/scrna_metabolic_landscape/"
|
|
203
|
+
"MetabolicFeaturesIntraSubsets.R"
|
|
204
|
+
)
|
|
205
|
+
plugin_opts = {
|
|
206
|
+
"report": (
|
|
207
|
+
"file://../reports/scrna_metabolic_landscape/"
|
|
208
|
+
"MetabolicFeaturesIntraSubsets.svelte"
|
|
209
|
+
)
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
class MetabolicPathwayHeterogeneity(Proc):
|
|
213
|
+
"""Pathway heterogeneity
|
|
214
|
+
|
|
215
|
+
Requires:
|
|
216
|
+
r-gtools:
|
|
217
|
+
- check: {{proc.lang}} <(echo "library(gtools)")
|
|
218
|
+
r-ggplot2:
|
|
219
|
+
- check: {{proc.lang}} <(echo "library(ggplot2)")
|
|
220
|
+
r-ggprism:
|
|
221
|
+
- check: {{proc.lang}} <(echo "library(ggprism)")
|
|
222
|
+
r-parallel:
|
|
223
|
+
- check: {{proc.lang}} <(echo "library(parallel)")
|
|
224
|
+
r-dplyr:
|
|
225
|
+
- check: {{proc.lang}} <(echo "library(dplyr)")
|
|
226
|
+
r-tibble:
|
|
227
|
+
- check: {{proc.lang}} <(echo "library(tibble)")
|
|
228
|
+
r-enrichr:
|
|
229
|
+
- check: {{proc.lang}} <(echo "library(enrichR)")
|
|
230
|
+
r-data.table:
|
|
231
|
+
- check: {{proc.lang}} <(echo "library(data.table)")
|
|
232
|
+
r-fgsea:
|
|
233
|
+
- check: {{proc.lang}} <(echo "library(fgsea)")
|
|
234
|
+
"""
|
|
235
|
+
input = "sobjfile:file"
|
|
236
|
+
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayhetero"
|
|
237
|
+
lang = config.lang.rscript
|
|
238
|
+
envs = {
|
|
239
|
+
"gmtfile": None,
|
|
240
|
+
"select_pcs": 0.8,
|
|
241
|
+
"pathway_pval_cutoff": 0.01,
|
|
242
|
+
"ncores": config.misc.ncores,
|
|
243
|
+
"bubble_devpars": {},
|
|
244
|
+
"grouping": None,
|
|
245
|
+
"grouping_prefix": "",
|
|
246
|
+
"subsetting": None,
|
|
247
|
+
"subsetting_prefix": "",
|
|
248
|
+
}
|
|
249
|
+
script = (
|
|
250
|
+
"file://../scripts/scrna_metabolic_landscape/"
|
|
251
|
+
"MetabolicPathwayHeterogeneity.R"
|
|
252
|
+
)
|
|
253
|
+
plugin_opts = {
|
|
254
|
+
"report": (
|
|
255
|
+
"file://../reports/scrna_metabolic_landscape/"
|
|
256
|
+
"MetabolicPathwayHeterogeneity.svelte"
|
|
257
|
+
)
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
def post_init(self):
|
|
261
|
+
"""Load runtime processes"""
|
|
262
|
+
if self.opts.metafile:
|
|
263
|
+
suffix = Path(self.opts.metafile).suffix
|
|
264
|
+
self.opts.is_seurat = suffix in (".rds", ".RDS")
|
|
283
265
|
|
|
284
|
-
@
|
|
285
|
-
def
|
|
266
|
+
@ProcGroup.add_proc
|
|
267
|
+
def p_input(self) -> Type[Proc]:
|
|
286
268
|
"""Build MetabolicInputs process"""
|
|
287
269
|
from .misc import File2Proc
|
|
288
270
|
|
|
289
|
-
if self.options.metafile:
|
|
290
|
-
suffix = Path(self.options.metafile).suffix
|
|
291
|
-
self.options.is_seurat = suffix in (".rds", ".RDS")
|
|
292
|
-
|
|
293
271
|
class MetabolicInput(File2Proc):
|
|
294
272
|
"""Input for the metabolic pathway analysis pipeline for
|
|
295
273
|
scRNA-seq data
|
|
@@ -302,159 +280,139 @@ class ScrnaMetabolicLandscape(Pipeline):
|
|
|
302
280
|
metafile: Soft link to `in.metafile`
|
|
303
281
|
"""
|
|
304
282
|
|
|
305
|
-
if self.
|
|
306
|
-
input_data = [self.
|
|
283
|
+
if self.opts.metafile:
|
|
284
|
+
input_data = [self.opts.metafile]
|
|
307
285
|
|
|
308
286
|
return MetabolicInput
|
|
309
287
|
|
|
310
|
-
@
|
|
311
|
-
def
|
|
288
|
+
@ProcGroup.add_proc
|
|
289
|
+
def p_preparing(self) -> Type[Proc]:
|
|
312
290
|
"""Build SeuratPreparing process"""
|
|
313
291
|
from .scrna import SeuratPreparing
|
|
314
292
|
|
|
315
293
|
class SeuratPreparing(SeuratPreparing):
|
|
316
|
-
requires =
|
|
294
|
+
requires = self.p_input
|
|
317
295
|
|
|
318
296
|
return SeuratPreparing
|
|
319
297
|
|
|
320
|
-
@
|
|
321
|
-
def
|
|
298
|
+
@ProcGroup.add_proc
|
|
299
|
+
def p_clustering(self) -> Type[Proc]:
|
|
322
300
|
"""Build SeuratClustering process"""
|
|
301
|
+
if self.opts.is_seurat:
|
|
302
|
+
return self.p_input
|
|
303
|
+
|
|
323
304
|
from .scrna import SeuratClustering
|
|
324
305
|
|
|
325
306
|
class SeuratClustering(SeuratClustering):
|
|
326
|
-
requires =
|
|
307
|
+
requires = self.p_preparing
|
|
327
308
|
|
|
328
309
|
return SeuratClustering
|
|
329
310
|
|
|
330
|
-
@
|
|
331
|
-
def
|
|
311
|
+
@ProcGroup.add_proc
|
|
312
|
+
def p_mutater(self) -> Type[Proc]:
|
|
332
313
|
"""Build SeuratMetadataMutater process"""
|
|
314
|
+
if self.opts.mutaters:
|
|
315
|
+
return self.p_clustering
|
|
316
|
+
|
|
333
317
|
from .scrna import SeuratMetadataMutater
|
|
334
318
|
|
|
335
319
|
class SeuratMetadataMutater(SeuratMetadataMutater):
|
|
336
|
-
requires =
|
|
320
|
+
requires = self.p_clustering
|
|
337
321
|
input_data = lambda ch: tibble(
|
|
338
322
|
srtobj=ch.iloc[:, 0],
|
|
339
323
|
metafile=[None],
|
|
340
|
-
mutaters=[self.
|
|
324
|
+
mutaters=[self.opts.mutaters],
|
|
341
325
|
)
|
|
342
326
|
|
|
343
327
|
return SeuratMetadataMutater
|
|
344
328
|
|
|
345
|
-
@
|
|
346
|
-
def
|
|
329
|
+
@ProcGroup.add_proc
|
|
330
|
+
def p_expr_impute(self) -> Type[Proc]:
|
|
347
331
|
"""Build MetabolicExprImpute process"""
|
|
348
332
|
from .scrna import ExprImpute
|
|
349
333
|
|
|
350
334
|
class MetabolicExprImpute(ExprImpute):
|
|
351
|
-
requires =
|
|
335
|
+
requires = self.p_mutater
|
|
352
336
|
|
|
353
337
|
return MetabolicExprImpute
|
|
354
338
|
|
|
355
|
-
@
|
|
356
|
-
def
|
|
339
|
+
@ProcGroup.add_proc
|
|
340
|
+
def p_pathway_activity(self) -> Type[Proc]:
|
|
357
341
|
"""Build MetabolicPathwayActivity process"""
|
|
358
342
|
return Proc.from_proc(
|
|
359
|
-
MetabolicPathwayActivity,
|
|
343
|
+
ScrnaMetabolicLandscape.MetabolicPathwayActivity,
|
|
360
344
|
"MetabolicPathwayActivity",
|
|
361
|
-
requires=
|
|
345
|
+
requires=self.p_expr_impute,
|
|
362
346
|
order=-1,
|
|
363
347
|
envs={
|
|
364
|
-
"ncores": self.
|
|
365
|
-
"gmtfile": self.
|
|
366
|
-
"grouping": self.
|
|
367
|
-
"grouping_prefix": self.
|
|
368
|
-
"subsetting": self.
|
|
369
|
-
"subsetting_prefix": self.
|
|
348
|
+
"ncores": self.opts.ncores,
|
|
349
|
+
"gmtfile": self.opts.gmtfile,
|
|
350
|
+
"grouping": self.opts.grouping,
|
|
351
|
+
"grouping_prefix": self.opts.grouping_prefix,
|
|
352
|
+
"subsetting": self.opts.subsetting,
|
|
353
|
+
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
370
354
|
},
|
|
371
355
|
)
|
|
372
356
|
|
|
373
|
-
@
|
|
374
|
-
def
|
|
357
|
+
@ProcGroup.add_proc
|
|
358
|
+
def p_pathway_heterogeneity(self) -> Type[Proc]:
|
|
375
359
|
"""Build MetabolicPathwayHeterogeneity process"""
|
|
376
360
|
return Proc.from_proc(
|
|
377
|
-
MetabolicPathwayHeterogeneity,
|
|
361
|
+
ScrnaMetabolicLandscape.MetabolicPathwayHeterogeneity,
|
|
378
362
|
"MetabolicPathwayHeterogeneity",
|
|
379
|
-
requires=
|
|
363
|
+
requires=self.p_expr_impute,
|
|
380
364
|
envs={
|
|
381
|
-
"ncores": self.
|
|
382
|
-
"gmtfile": self.
|
|
383
|
-
"grouping": self.
|
|
384
|
-
"grouping_prefix": self.
|
|
385
|
-
"subsetting": self.
|
|
386
|
-
"subsetting_prefix": self.
|
|
365
|
+
"ncores": self.opts.ncores,
|
|
366
|
+
"gmtfile": self.opts.gmtfile,
|
|
367
|
+
"grouping": self.opts.grouping,
|
|
368
|
+
"grouping_prefix": self.opts.grouping_prefix,
|
|
369
|
+
"subsetting": self.opts.subsetting,
|
|
370
|
+
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
387
371
|
},
|
|
388
372
|
)
|
|
389
373
|
|
|
390
|
-
@
|
|
391
|
-
def
|
|
374
|
+
@ProcGroup.add_proc
|
|
375
|
+
def p_features(self) -> Type[Proc]:
|
|
392
376
|
"""Build MetabolicFeatures process"""
|
|
393
377
|
return Proc.from_proc(
|
|
394
|
-
MetabolicFeatures,
|
|
378
|
+
ScrnaMetabolicLandscape.MetabolicFeatures,
|
|
395
379
|
"MetabolicFeatures",
|
|
396
|
-
requires=
|
|
380
|
+
requires=self.p_expr_impute,
|
|
397
381
|
envs={
|
|
398
|
-
"ncores": self.
|
|
399
|
-
"gmtfile": self.
|
|
400
|
-
"grouping": self.
|
|
401
|
-
"grouping_prefix": self.
|
|
402
|
-
"subsetting": self.
|
|
403
|
-
"subsetting_prefix": self.
|
|
382
|
+
"ncores": self.opts.ncores,
|
|
383
|
+
"gmtfile": self.opts.gmtfile,
|
|
384
|
+
"grouping": self.opts.grouping,
|
|
385
|
+
"grouping_prefix": self.opts.grouping_prefix,
|
|
386
|
+
"subsetting": self.opts.subsetting,
|
|
387
|
+
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
404
388
|
},
|
|
405
389
|
)
|
|
406
390
|
|
|
407
|
-
@
|
|
408
|
-
def
|
|
391
|
+
@ProcGroup.add_proc
|
|
392
|
+
def p_features_intra_subset(self) -> Type[Proc]:
|
|
409
393
|
"""Build MetabolicFeaturesIntraSubset process"""
|
|
410
|
-
if self.
|
|
394
|
+
if self.opts.subsetting_comparison and not self.opts.subsetting:
|
|
411
395
|
raise ValueError(
|
|
412
396
|
"Cannot use `subsetting_comparison` without `subsetting`."
|
|
413
397
|
)
|
|
414
398
|
|
|
415
399
|
return Proc.from_proc(
|
|
416
|
-
MetabolicFeaturesIntraSubset,
|
|
400
|
+
ScrnaMetabolicLandscape.MetabolicFeaturesIntraSubset,
|
|
417
401
|
"MetabolicFeaturesIntraSubset",
|
|
418
|
-
requires=
|
|
402
|
+
requires=self.p_expr_impute,
|
|
419
403
|
envs={
|
|
420
|
-
"ncores": self.
|
|
421
|
-
"gmtfile": self.
|
|
422
|
-
"grouping": self.
|
|
423
|
-
"grouping_prefix": self.
|
|
424
|
-
"subsetting": self.
|
|
425
|
-
"subsetting_prefix": self.
|
|
426
|
-
"subsetting_comparison": self.
|
|
404
|
+
"ncores": self.opts.ncores,
|
|
405
|
+
"gmtfile": self.opts.gmtfile,
|
|
406
|
+
"grouping": self.opts.grouping,
|
|
407
|
+
"grouping_prefix": self.opts.grouping_prefix,
|
|
408
|
+
"subsetting": self.opts.subsetting,
|
|
409
|
+
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
410
|
+
"subsetting_comparison": self.opts.subsetting_comparison,
|
|
427
411
|
},
|
|
428
412
|
)
|
|
429
413
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
raise ValueError("`gmtfile` with metabolic pathways is required.")
|
|
436
|
-
|
|
437
|
-
MetabolicInput = self.build_input()
|
|
438
|
-
|
|
439
|
-
if self.options.is_seurat:
|
|
440
|
-
# Use the rds file
|
|
441
|
-
SeuratClustering = MetabolicInput
|
|
442
|
-
else:
|
|
443
|
-
# Do clustering
|
|
444
|
-
SeuratPreparing = self.build_preparing(MetabolicInput)
|
|
445
|
-
SeuratClustering = self.build_clustering(SeuratPreparing)
|
|
446
|
-
|
|
447
|
-
if self.options.mutaters:
|
|
448
|
-
SeuratMetadataMutater = self.build_mutater(SeuratClustering)
|
|
449
|
-
else:
|
|
450
|
-
# No mutaters, just use the SeuratClustering
|
|
451
|
-
SeuratMetadataMutater = SeuratClustering
|
|
452
|
-
|
|
453
|
-
# Do imputation and normalization for all the data together
|
|
454
|
-
MetabolicExprImpute = self.build_expr_impute(SeuratMetadataMutater)
|
|
455
|
-
|
|
456
|
-
self.build_pathway_activity(MetabolicExprImpute)
|
|
457
|
-
self.build_pathway_heterogeneity(MetabolicExprImpute)
|
|
458
|
-
self.build_features(MetabolicExprImpute)
|
|
459
|
-
if self.options.subsetting_comparison:
|
|
460
|
-
self.build_features_intra_subset(MetabolicExprImpute)
|
|
414
|
+
|
|
415
|
+
if __name__ == "__main__":
|
|
416
|
+
from pipen_args import install # noqa: F401
|
|
417
|
+
|
|
418
|
+
ScrnaMetabolicLandscape().as_pipen().run()
|