biopipen 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (65) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.py +0 -5
  3. biopipen/core/config.toml +4 -4
  4. biopipen/core/defaults.py +3 -3
  5. biopipen/core/filters.py +1 -0
  6. biopipen/core/proc.py +1 -3
  7. biopipen/core/testing.py +1 -2
  8. biopipen/ns/bam.py +10 -14
  9. biopipen/ns/bcftools.py +37 -7
  10. biopipen/ns/bed.py +9 -16
  11. biopipen/ns/cnv.py +8 -11
  12. biopipen/ns/cnvkit.py +32 -59
  13. biopipen/ns/cnvkit_pipeline.py +266 -310
  14. biopipen/ns/csv.py +0 -2
  15. biopipen/ns/gene.py +0 -1
  16. biopipen/ns/gsea.py +4 -10
  17. biopipen/ns/misc.py +0 -5
  18. biopipen/ns/plot.py +2 -4
  19. biopipen/ns/rnaseq.py +0 -1
  20. biopipen/ns/scrna.py +78 -120
  21. biopipen/ns/scrna_metabolic_landscape.py +306 -348
  22. biopipen/ns/tcgamaf.py +52 -0
  23. biopipen/ns/tcr.py +5 -15
  24. biopipen/ns/vcf.py +52 -34
  25. biopipen/ns/web.py +8 -19
  26. biopipen/reports/bam/CNAClinic.svelte +1 -1
  27. biopipen/reports/bam/CNVpytor.svelte +2 -2
  28. biopipen/reports/bam/ControlFREEC.svelte +1 -1
  29. biopipen/reports/cnv/AneuploidyScore.svelte +2 -2
  30. biopipen/reports/cnv/AneuploidyScoreSummary.svelte +1 -1
  31. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  32. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  34. biopipen/reports/gsea/FGSEA.svelte +1 -1
  35. biopipen/reports/gsea/GSEA.svelte +2 -2
  36. biopipen/reports/scrna/CellsDistribution.svelte +1 -1
  37. biopipen/reports/scrna/DimPlots.svelte +1 -1
  38. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +42 -39
  40. biopipen/reports/scrna/ScFGSEA.svelte +3 -3
  41. biopipen/reports/scrna/SeuratClusterStats.svelte +3 -3
  42. biopipen/reports/scrna/SeuratPreparing.svelte +2 -2
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubsets.svelte +2 -2
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +1 -1
  45. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +1 -1
  46. biopipen/reports/tcr/CloneResidency.svelte +4 -4
  47. biopipen/reports/tcr/Immunarch.svelte +2 -2
  48. biopipen/reports/tcr/SampleDiversity.svelte +2 -2
  49. biopipen/reports/tcr/TCRClusteringStats.svelte +3 -3
  50. biopipen/reports/tcr/VJUsage.svelte +1 -1
  51. biopipen/reports/utils/gsea.liq +1 -1
  52. biopipen/reports/utils/misc.liq +1 -1
  53. biopipen/reports/vcf/TruvariBenchSummary.svelte +1 -1
  54. biopipen/reports/vcf/TruvariConsistency.svelte +3 -3
  55. biopipen/scripts/bcftools/BcftoolsSort.py +19 -0
  56. biopipen/scripts/scrna/MarkersFinder.R +73 -35
  57. biopipen/scripts/tcgamaf/Maf2Vcf.py +22 -0
  58. biopipen/scripts/tcgamaf/MafAddChr.py +14 -0
  59. biopipen/scripts/tcgamaf/maf2vcf.pl +427 -0
  60. biopipen/scripts/vcf/VcfAnno.py +26 -0
  61. biopipen/scripts/vcf/VcfFix_utils.py +3 -2
  62. {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/METADATA +7 -8
  63. {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/RECORD +65 -59
  64. {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/WHEEL +1 -1
  65. {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/entry_points.txt +2 -1
@@ -5,211 +5,13 @@ from typing import Type
5
5
 
6
6
  from diot import Diot
7
7
  from datar.tibble import tibble
8
- from pipen_cli_run import Pipeline, process
8
+ from pipen_args import ProcGroup
9
9
 
10
10
  from ..core.config import config
11
11
  from ..core.proc import Proc
12
12
 
13
- DEFAULT_OPTS = Diot(
14
- metafile=None,
15
- is_seurat=None,
16
- gmtfile=None,
17
- grouping=None,
18
- grouping_prefix="",
19
- subsetting=None,
20
- subsetting_prefix=None,
21
- subsetting_comparison={},
22
- mutaters=None,
23
- ncores=config.misc.ncores,
24
- )
25
-
26
-
27
- class MetabolicPathwayActivity(Proc):
28
- """Pathway activities for each group
29
-
30
- Requires:
31
- - name: r-scater
32
- check: |
33
- {{proc.lang}} <(echo "library(scater)")
34
- - name: r-reshape2
35
- check: |
36
- {{proc.lang}} <(echo "library(reshape2)")
37
- - name: r-rcolorbrewer
38
- check: |
39
- {{proc.lang}} <(echo "library(RColorBrewer)")
40
- - name: r-ggplot2
41
- check: |
42
- {{proc.lang}} <(echo "library(ggplot2)")
43
- - name: r-ggprism
44
- check: |
45
- {{proc.lang}} <(echo "library(ggprism)")
46
- - name: r-complexheatmap
47
- check: |
48
- {{proc.lang}} <(echo "library(ComplexHeatmap)")
49
- - name: r-parallel
50
- check: |
51
- {{proc.lang}} <(echo "library(parallel)")
52
- """
53
- input = "sobjfile:file"
54
- output = "outdir:dir:{{in.sobjfile | stem}}.pathwayactivity"
55
- envs = {
56
- "ntimes": 5000,
57
- "ncores": config.misc.ncores,
58
- "heatmap_devpars": {},
59
- "violin_devpars": {},
60
- "gmtfile": None,
61
- "grouping": None,
62
- "grouping_prefix": "",
63
- "subsetting": None,
64
- "subsetting_prefix": "",
65
- }
66
- lang = config.lang.rscript
67
- script = (
68
- "file://../scripts/"
69
- "scrna_metabolic_landscape/MetabolicPathwayActivity.R"
70
- )
71
- plugin_opts = {
72
- "report": (
73
- "file://../reports/"
74
- "scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
75
- )
76
- }
77
-
78
-
79
- class MetabolicFeatures(Proc):
80
- """Inter-subset metabolic features - Enrichment analysis in details
81
-
82
- Requires:
83
- - name: r-parallel
84
- check: |
85
- {{proc.lang}} <(echo "library(parallel)")
86
- - name: r-fgsea
87
- check: |
88
- {{proc.lang}} <(echo "library(fgsea)")
89
- """
90
- input = "sobjfile:file"
91
- output = "outdir:dir:{{in.sobjfile | stem}}.pathwayfeatures"
92
- lang = config.lang.rscript
93
- envs = {
94
- "ncores": config.misc.ncores,
95
- "fgsea": True,
96
- "prerank_method": "signal_to_noise",
97
- "top": 10,
98
- "gmtfile": None,
99
- "grouping": None,
100
- "grouping_prefix": "",
101
- "subsetting": None,
102
- "subsetting_prefix": "",
103
- }
104
- script = "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
105
- plugin_opts = {
106
- "report": (
107
- "file://../reports/"
108
- "scrna_metabolic_landscape/MetabolicFeatures.svelte"
109
- )
110
- }
111
-
112
-
113
- class MetabolicFeaturesIntraSubset(Proc):
114
- """Intra-subset metabolic features - Enrichment analysis in details
115
-
116
- Requires:
117
- - name: r-parallel
118
- check: |
119
- {{proc.lang}} <(echo "library(parallel)")
120
- - name: r-scater
121
- check: |
122
- {{proc.lang}} <(echo "library(scater)")
123
- - name: r-fgsea
124
- check: |
125
- {{proc.lang}} <(echo "library(fgsea)")
126
- """
127
- input = "sobjfile:file"
128
- output = "outdir:dir:{{in.sobjfile | stem}}.intra-subset-pathwayfeatures"
129
- lang = config.lang.rscript
130
- envs = {
131
- "ncores": config.misc.ncores,
132
- "gmtfile": None,
133
- "fgsea": True,
134
- "prerank_method": "signal_to_noise",
135
- "top": 10,
136
- "grouping": None,
137
- "grouping_prefix": "",
138
- "subsetting": None,
139
- "subsetting_prefix": "",
140
- "subsetting_comparison": {},
141
- }
142
- script = (
143
- "file://../scripts/scrna_metabolic_landscape/"
144
- "MetabolicFeaturesIntraSubsets.R"
145
- )
146
- plugin_opts = {
147
- "report": (
148
- "file://../reports/scrna_metabolic_landscape/"
149
- "MetabolicFeaturesIntraSubsets.svelte"
150
- )
151
- }
152
-
153
-
154
- class MetabolicPathwayHeterogeneity(Proc):
155
- """Pathway heterogeneity
156
-
157
- Requires:
158
- - name: r-gtools
159
- check: |
160
- {{proc.lang}} <(echo "library(gtools)")
161
- - name: r-ggplot2
162
- check: |
163
- {{proc.lang}} <(echo "library(ggplot2)")
164
- - name: r-ggprism
165
- check: |
166
- {{proc.lang}} <(echo "library(ggprism)")
167
- - name: r-parallel
168
- check: |
169
- {{proc.lang}} <(echo "library(parallel)")
170
- - name: r-dplyr
171
- check: |
172
- {{proc.lang}} <(echo "library(dplyr)")
173
- - name: r-tibble
174
- check: |
175
- {{proc.lang}} <(echo "library(tibble)")
176
- - name: r-enrichr
177
- check: |
178
- {{proc.lang}} <(echo "library(enrichR)")
179
- - name: r-data.table
180
- check: |
181
- {{proc.lang}} <(echo "library(data.table)")
182
- - name: r-fgsea
183
- check: |
184
- {{proc.lang}} <(echo "library(fgsea)")
185
- """
186
- input = "sobjfile:file"
187
- output = "outdir:dir:{{in.sobjfile | stem}}.pathwayhetero"
188
- lang = config.lang.rscript
189
- envs = {
190
- "gmtfile": None,
191
- "select_pcs": 0.8,
192
- "pathway_pval_cutoff": 0.01,
193
- "ncores": config.misc.ncores,
194
- "bubble_devpars": {},
195
- "grouping": None,
196
- "grouping_prefix": "",
197
- "subsetting": None,
198
- "subsetting_prefix": "",
199
- }
200
- script = (
201
- "file://../scripts/scrna_metabolic_landscape/"
202
- "MetabolicPathwayHeterogeneity.R"
203
- )
204
- plugin_opts = {
205
- "report": (
206
- "file://../reports/scrna_metabolic_landscape/"
207
- "MetabolicPathwayHeterogeneity.svelte"
208
- )
209
- }
210
13
 
211
-
212
- class ScrnaMetabolicLandscape(Pipeline):
14
+ class ScrnaMetabolicLandscape(ProcGroup):
213
15
  """Metabolic landscape analysis for scRNA-seq data
214
16
 
215
17
  An abstract from
@@ -224,72 +26,248 @@ class ScrnaMetabolicLandscape(Pipeline):
224
26
  "Metabolic landscape of the tumor microenvironment at
225
27
  single cell resolution." Nature communications 10.1 (2019): 1-12.
226
28
 
227
- Input files:
228
- - metafile: Either a metafile or an rds file of a Seurat object.
229
- If it is a metafile, it should have two columns: `Sample` and
230
- `RNADir`. `Sample` should be the first column with unique
231
- identifiers for the samples and `RNADir` indicates where the
232
- barcodes, genes, expression matrices are. The data will be loaded
233
- and an unsupervised clustering will be done.
234
- Currently only 10X data is supported.
235
- If it is an rds file, the seurat object will be used directly
236
- - is_seurat: Whether the input `metafile` is a seurat object.
237
- If `metafile` is specified directly, this option will be ignored
238
- and will be inferred from the file extension. If `metafile` is
239
- not specified, meaning `<pipeline>.procs.MetabolicInput` is dependent
240
- on other processes, this option will be used to determine whether
241
- the input is a seurat object or not.
242
- - gmtfile: The GMT file with the metabolic pathways. The gene names should
243
- match the gene names in the gene list in RNADir or the Seurat object
244
-
245
- Global options:
246
- - grouping: defines the basic groups to investigate the metabolic activity
247
- Typically the clusters.
248
- - grouping_prefix: Working as a prefix to group names
249
- For example, if we have `grouping_prefix = "cluster"` and
250
- we have `1` and `2` in the `grouping` column, the groups
251
- will be named as `cluster_1` and `cluster_2`
252
- - subsetting: How do we subset the data. Another column in the metadata
253
- to do comparisons.
254
- - subsetting_prefix: Working as a prefix to subset names
255
- For example, if we have `subsetting_prefix = "timepoint"` and
256
- we have `pre` and `post` in the `subsetting` column, the subsets
257
- will be named as `timepoint_pre` and `timepoint_post`
258
- - subsetting_comparison: What kind of comparisons are we doing to compare
259
- cells from different subsets.
260
- It should be dict with keys as the names of the comparisons and
261
- values as the 2 comparison groups from the `subsetting` column.
262
- For example, if we have `pre` and `post` in the `subsetting` column,
263
- we could have `subsetting_comparison = {"pre_vs_post": ["post", "pre"]}`
264
- The second group will be the control group in the comparison.
265
- If we also have `1`, `2` and `3` in the `grouping` column, by default,
266
- the comparisons are done within each subset for each group. For example,
267
- For group `1`, groups `2` and `3` will be used as control, and for
268
- group `2`, groups `1` and `3` will be used as control, and for group
269
- `3`, groups `1` and `2` will be used as control. It is similar to
270
- `Seurat::FindMarkers` procedure. With this option, the comparisons
271
- are also done to compare cells from different subsets within each group.
272
- With the example above, we will have `pre_vs_post` comparisons within
273
- each group.
274
- - mutaters: Add new columns to the metadata for grouping/subsetting.
275
- They are passed to `sobj@meta.data |> mutate(...)`. For example,
276
- `{"timepoint": "if_else(treatment == 'control', 'pre', 'post')"}`
277
- will add a new column `timepoint` to the metadata with values of
278
- `pre` and `post` based on the `treatment` column.
279
- - ncores: Number of cores to use for parallelization for each process
29
+ Args:
30
+ metafile: Either a metafile or an rds file of a Seurat object.
31
+ If it is a metafile, it should have two columns: `Sample` and
32
+ `RNADir`. `Sample` should be the first column with unique
33
+ identifiers for the samples and `RNADir` indicates where the
34
+ barcodes, genes, expression matrices are. The data will be loaded
35
+ and an unsupervised clustering will be done.
36
+ Currently only 10X data is supported.
37
+ If it is an rds file, the seurat object will be used directly
38
+ is_seurat: Whether the input `metafile` is a seurat object.
39
+ If `metafile` is specified directly, this option will be ignored
40
+ and will be inferred from the file extension. If `metafile` is
41
+ not specified, meaning `<pipeline>.procs.MetabolicInput` is
42
+ dependent on other processes, this option will be used to determine
43
+ whether the input is a seurat object or not.
44
+ gmtfile: The GMT file with the metabolic pathways. The gene names should
45
+ match the gene names in the gene list in RNADir or the Seurat object
46
+ grouping: defines the basic groups to investigate the metabolic activity
47
+ Typically the clusters.
48
+ grouping_prefix: Working as a prefix to group names
49
+ For example, if we have `grouping_prefix = "cluster"` and
50
+ we have `1` and `2` in the `grouping` column, the groups
51
+ will be named as `cluster_1` and `cluster_2`
52
+ subsetting: How do we subset the data. Another column in the metadata
53
+ to do comparisons.
54
+ subsetting_prefix: Working as a prefix to subset names
55
+ For example, if we have `subsetting_prefix = "timepoint"` and
56
+ we have `pre` and `post` in the `subsetting` column, the subsets
57
+ will be named as `timepoint_pre` and `timepoint_post`
58
+ subsetting_comparison: What kind of comparisons are we doing to compare
59
+ cells from different subsets.
60
+ It should be dict with keys as the names of the comparisons and
61
+ values as the 2 comparison groups from the `subsetting` column.
62
+ For example, if we have `pre` and `post` in the `subsetting` column,
63
+ we could have
64
+ `subsetting_comparison = {"pre_vs_post": ["post", "pre"]}`
65
+ The second group will be the control group in the comparison.
66
+ If we also have `1`, `2` and `3` in the `grouping` column,
67
+ by default, the comparisons are done within each subset for
68
+ each group. For example, for group `1`, groups `2` and `3`
69
+ will be used as control, and for group `2`, groups `1` and `3`
70
+ will be used as control, and for group `3`, groups `1` and `2`
71
+ will be used as control. It is similar to `Seurat::FindMarkers`
72
+ procedure. With this option, the comparisons are also done to
73
+ compare cells from different subsets within each group. With the
74
+ example above, we will have `pre_vs_post` comparisons within
75
+ each group.
76
+ mutaters: Add new columns to the metadata for grouping/subsetting.
77
+ They are passed to `sobj@meta.data |> mutate(...)`. For example,
78
+ `{"timepoint": "if_else(treatment == 'control', 'pre', 'post')"}`
79
+ will add a new column `timepoint` to the metadata with values of
80
+ `pre` and `post` based on the `treatment` column.
81
+ ncores: Number of cores to use for parallelization for each process
280
82
  """
83
+ DEFAULTS = Diot(
84
+ metafile=None,
85
+ is_seurat=None,
86
+ gmtfile=None,
87
+ grouping=None,
88
+ grouping_prefix="",
89
+ subsetting=None,
90
+ subsetting_prefix=None,
91
+ subsetting_comparison={},
92
+ mutaters=None,
93
+ ncores=config.misc.ncores,
94
+ )
281
95
 
282
- defaults = config.pipeline.scrna_metabolic_landscape
96
+ class MetabolicPathwayActivity(Proc):
97
+ """Pathway activities for each group
98
+
99
+ Requires:
100
+ r-scater:
101
+ - check: {{proc.lang}} <(echo "library(scater)")
102
+ r-reshape2:
103
+ - check: {{proc.lang}} <(echo "library(reshape2)")
104
+ r-rcolorbrewer:
105
+ - check: {{proc.lang}} <(echo "library(RColorBrewer)")
106
+ r-ggplot2:
107
+ - check: {{proc.lang}} <(echo "library(ggplot2)")
108
+ r-ggprism:
109
+ - check: {{proc.lang}} <(echo "library(ggprism)")
110
+ r-complexheatmap:
111
+ - check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
112
+ r-parallel:
113
+ - check: {{proc.lang}} <(echo "library(parallel)")
114
+ """
115
+ input = "sobjfile:file"
116
+ output = "outdir:dir:{{in.sobjfile | stem}}.pathwayactivity"
117
+ envs = {
118
+ "ntimes": 5000,
119
+ "ncores": config.misc.ncores,
120
+ "heatmap_devpars": {},
121
+ "violin_devpars": {},
122
+ "gmtfile": None,
123
+ "grouping": None,
124
+ "grouping_prefix": "",
125
+ "subsetting": None,
126
+ "subsetting_prefix": "",
127
+ }
128
+ lang = config.lang.rscript
129
+ script = (
130
+ "file://../scripts/"
131
+ "scrna_metabolic_landscape/MetabolicPathwayActivity.R"
132
+ )
133
+ plugin_opts = {
134
+ "report": (
135
+ "file://../reports/"
136
+ "scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
137
+ )
138
+ }
139
+
140
+ class MetabolicFeatures(Proc):
141
+ """Inter-subset metabolic features - Enrichment analysis in details
142
+
143
+ Requires:
144
+ r-parallel:
145
+ - check: {{proc.lang}} <(echo "library(parallel)")
146
+ r-fgsea:
147
+ - check: {{proc.lang}} <(echo "library(fgsea)")
148
+ """
149
+ input = "sobjfile:file"
150
+ output = "outdir:dir:{{in.sobjfile | stem}}.pathwayfeatures"
151
+ lang = config.lang.rscript
152
+ envs = {
153
+ "ncores": config.misc.ncores,
154
+ "fgsea": True,
155
+ "prerank_method": "signal_to_noise",
156
+ "top": 10,
157
+ "gmtfile": None,
158
+ "grouping": None,
159
+ "grouping_prefix": "",
160
+ "subsetting": None,
161
+ "subsetting_prefix": "",
162
+ }
163
+ script = (
164
+ "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
165
+ )
166
+ plugin_opts = {
167
+ "report": (
168
+ "file://../reports/"
169
+ "scrna_metabolic_landscape/MetabolicFeatures.svelte"
170
+ )
171
+ }
172
+
173
+ class MetabolicFeaturesIntraSubset(Proc):
174
+ """Intra-subset metabolic features - Enrichment analysis in details
175
+
176
+ Requires:
177
+ r-parallel:
178
+ - check: {{proc.lang}} <(echo "library(parallel)")
179
+ r-scater:
180
+ - check: {{proc.lang}} <(echo "library(scater)")
181
+ r-fgsea:
182
+ - check: {{proc.lang}} <(echo "library(fgsea)")
183
+ """
184
+ input = "sobjfile:file"
185
+ output = (
186
+ "outdir:dir:{{in.sobjfile | stem}}.intra-subset-pathwayfeatures"
187
+ )
188
+ lang = config.lang.rscript
189
+ envs = {
190
+ "ncores": config.misc.ncores,
191
+ "gmtfile": None,
192
+ "fgsea": True,
193
+ "prerank_method": "signal_to_noise",
194
+ "top": 10,
195
+ "grouping": None,
196
+ "grouping_prefix": "",
197
+ "subsetting": None,
198
+ "subsetting_prefix": "",
199
+ "subsetting_comparison": {},
200
+ }
201
+ script = (
202
+ "file://../scripts/scrna_metabolic_landscape/"
203
+ "MetabolicFeaturesIntraSubsets.R"
204
+ )
205
+ plugin_opts = {
206
+ "report": (
207
+ "file://../reports/scrna_metabolic_landscape/"
208
+ "MetabolicFeaturesIntraSubsets.svelte"
209
+ )
210
+ }
211
+
212
+ class MetabolicPathwayHeterogeneity(Proc):
213
+ """Pathway heterogeneity
214
+
215
+ Requires:
216
+ r-gtools:
217
+ - check: {{proc.lang}} <(echo "library(gtools)")
218
+ r-ggplot2:
219
+ - check: {{proc.lang}} <(echo "library(ggplot2)")
220
+ r-ggprism:
221
+ - check: {{proc.lang}} <(echo "library(ggprism)")
222
+ r-parallel:
223
+ - check: {{proc.lang}} <(echo "library(parallel)")
224
+ r-dplyr:
225
+ - check: {{proc.lang}} <(echo "library(dplyr)")
226
+ r-tibble:
227
+ - check: {{proc.lang}} <(echo "library(tibble)")
228
+ r-enrichr:
229
+ - check: {{proc.lang}} <(echo "library(enrichR)")
230
+ r-data.table:
231
+ - check: {{proc.lang}} <(echo "library(data.table)")
232
+ r-fgsea:
233
+ - check: {{proc.lang}} <(echo "library(fgsea)")
234
+ """
235
+ input = "sobjfile:file"
236
+ output = "outdir:dir:{{in.sobjfile | stem}}.pathwayhetero"
237
+ lang = config.lang.rscript
238
+ envs = {
239
+ "gmtfile": None,
240
+ "select_pcs": 0.8,
241
+ "pathway_pval_cutoff": 0.01,
242
+ "ncores": config.misc.ncores,
243
+ "bubble_devpars": {},
244
+ "grouping": None,
245
+ "grouping_prefix": "",
246
+ "subsetting": None,
247
+ "subsetting_prefix": "",
248
+ }
249
+ script = (
250
+ "file://../scripts/scrna_metabolic_landscape/"
251
+ "MetabolicPathwayHeterogeneity.R"
252
+ )
253
+ plugin_opts = {
254
+ "report": (
255
+ "file://../reports/scrna_metabolic_landscape/"
256
+ "MetabolicPathwayHeterogeneity.svelte"
257
+ )
258
+ }
259
+
260
+ def post_init(self):
261
+ """Load runtime processes"""
262
+ if self.opts.metafile:
263
+ suffix = Path(self.opts.metafile).suffix
264
+ self.opts.is_seurat = suffix in (".rds", ".RDS")
283
265
 
284
- @process(start=True)
285
- def build_input(self) -> Type[Proc]:
266
+ @ProcGroup.add_proc
267
+ def p_input(self) -> Type[Proc]:
286
268
  """Build MetabolicInputs process"""
287
269
  from .misc import File2Proc
288
270
 
289
- if self.options.metafile:
290
- suffix = Path(self.options.metafile).suffix
291
- self.options.is_seurat = suffix in (".rds", ".RDS")
292
-
293
271
  class MetabolicInput(File2Proc):
294
272
  """Input for the metabolic pathway analysis pipeline for
295
273
  scRNA-seq data
@@ -302,159 +280,139 @@ class ScrnaMetabolicLandscape(Pipeline):
302
280
  metafile: Soft link to `in.metafile`
303
281
  """
304
282
 
305
- if self.options.metafile:
306
- input_data = [self.options.metafile]
283
+ if self.opts.metafile:
284
+ input_data = [self.opts.metafile]
307
285
 
308
286
  return MetabolicInput
309
287
 
310
- @process
311
- def build_preparing(self, input_proc: Type[Proc]) -> Type[Proc]:
288
+ @ProcGroup.add_proc
289
+ def p_preparing(self) -> Type[Proc]:
312
290
  """Build SeuratPreparing process"""
313
291
  from .scrna import SeuratPreparing
314
292
 
315
293
  class SeuratPreparing(SeuratPreparing):
316
- requires = input_proc
294
+ requires = self.p_input
317
295
 
318
296
  return SeuratPreparing
319
297
 
320
- @process
321
- def build_clustering(self, preparing_proc: Type[Proc]) -> Type[Proc]:
298
+ @ProcGroup.add_proc
299
+ def p_clustering(self) -> Type[Proc]:
322
300
  """Build SeuratClustering process"""
301
+ if self.opts.is_seurat:
302
+ return self.p_input
303
+
323
304
  from .scrna import SeuratClustering
324
305
 
325
306
  class SeuratClustering(SeuratClustering):
326
- requires = preparing_proc
307
+ requires = self.p_preparing
327
308
 
328
309
  return SeuratClustering
329
310
 
330
- @process
331
- def build_mutater(self, clustering_proc: Type[Proc]) -> Type[Proc]:
311
+ @ProcGroup.add_proc
312
+ def p_mutater(self) -> Type[Proc]:
332
313
  """Build SeuratMetadataMutater process"""
314
+ if self.opts.mutaters:
315
+ return self.p_clustering
316
+
333
317
  from .scrna import SeuratMetadataMutater
334
318
 
335
319
  class SeuratMetadataMutater(SeuratMetadataMutater):
336
- requires = clustering_proc
320
+ requires = self.p_clustering
337
321
  input_data = lambda ch: tibble(
338
322
  srtobj=ch.iloc[:, 0],
339
323
  metafile=[None],
340
- mutaters=[self.options.mutaters],
324
+ mutaters=[self.opts.mutaters],
341
325
  )
342
326
 
343
327
  return SeuratMetadataMutater
344
328
 
345
- @process
346
- def build_expr_impute(self, subset_proc: Type[Proc]) -> Type[Proc]:
329
+ @ProcGroup.add_proc
330
+ def p_expr_impute(self) -> Type[Proc]:
347
331
  """Build MetabolicExprImpute process"""
348
332
  from .scrna import ExprImpute
349
333
 
350
334
  class MetabolicExprImpute(ExprImpute):
351
- requires = subset_proc
335
+ requires = self.p_mutater
352
336
 
353
337
  return MetabolicExprImpute
354
338
 
355
- @process(end=True)
356
- def build_pathway_activity(self, expr_imp_proc: Type[Proc]) -> Type[Proc]:
339
+ @ProcGroup.add_proc
340
+ def p_pathway_activity(self) -> Type[Proc]:
357
341
  """Build MetabolicPathwayActivity process"""
358
342
  return Proc.from_proc(
359
- MetabolicPathwayActivity,
343
+ ScrnaMetabolicLandscape.MetabolicPathwayActivity,
360
344
  "MetabolicPathwayActivity",
361
- requires=expr_imp_proc,
345
+ requires=self.p_expr_impute,
362
346
  order=-1,
363
347
  envs={
364
- "ncores": self.options.ncores,
365
- "gmtfile": self.options.gmtfile,
366
- "grouping": self.options.grouping,
367
- "grouping_prefix": self.options.grouping_prefix,
368
- "subsetting": self.options.subsetting,
369
- "subsetting_prefix": self.options.subsetting_prefix,
348
+ "ncores": self.opts.ncores,
349
+ "gmtfile": self.opts.gmtfile,
350
+ "grouping": self.opts.grouping,
351
+ "grouping_prefix": self.opts.grouping_prefix,
352
+ "subsetting": self.opts.subsetting,
353
+ "subsetting_prefix": self.opts.subsetting_prefix,
370
354
  },
371
355
  )
372
356
 
373
- @process(end=True)
374
- def build_pathway_heterogeneity(self, norm_proc: Type[Proc]) -> Type[Proc]:
357
+ @ProcGroup.add_proc
358
+ def p_pathway_heterogeneity(self) -> Type[Proc]:
375
359
  """Build MetabolicPathwayHeterogeneity process"""
376
360
  return Proc.from_proc(
377
- MetabolicPathwayHeterogeneity,
361
+ ScrnaMetabolicLandscape.MetabolicPathwayHeterogeneity,
378
362
  "MetabolicPathwayHeterogeneity",
379
- requires=norm_proc,
363
+ requires=self.p_expr_impute,
380
364
  envs={
381
- "ncores": self.options.ncores,
382
- "gmtfile": self.options.gmtfile,
383
- "grouping": self.options.grouping,
384
- "grouping_prefix": self.options.grouping_prefix,
385
- "subsetting": self.options.subsetting,
386
- "subsetting_prefix": self.options.subsetting_prefix,
365
+ "ncores": self.opts.ncores,
366
+ "gmtfile": self.opts.gmtfile,
367
+ "grouping": self.opts.grouping,
368
+ "grouping_prefix": self.opts.grouping_prefix,
369
+ "subsetting": self.opts.subsetting,
370
+ "subsetting_prefix": self.opts.subsetting_prefix,
387
371
  },
388
372
  )
389
373
 
390
- @process(end=True)
391
- def build_features(self, norm_proc: Type[Proc]) -> Type[Proc]:
374
+ @ProcGroup.add_proc
375
+ def p_features(self) -> Type[Proc]:
392
376
  """Build MetabolicFeatures process"""
393
377
  return Proc.from_proc(
394
- MetabolicFeatures,
378
+ ScrnaMetabolicLandscape.MetabolicFeatures,
395
379
  "MetabolicFeatures",
396
- requires=norm_proc,
380
+ requires=self.p_expr_impute,
397
381
  envs={
398
- "ncores": self.options.ncores,
399
- "gmtfile": self.options.gmtfile,
400
- "grouping": self.options.grouping,
401
- "grouping_prefix": self.options.grouping_prefix,
402
- "subsetting": self.options.subsetting,
403
- "subsetting_prefix": self.options.subsetting_prefix,
382
+ "ncores": self.opts.ncores,
383
+ "gmtfile": self.opts.gmtfile,
384
+ "grouping": self.opts.grouping,
385
+ "grouping_prefix": self.opts.grouping_prefix,
386
+ "subsetting": self.opts.subsetting,
387
+ "subsetting_prefix": self.opts.subsetting_prefix,
404
388
  },
405
389
  )
406
390
 
407
- @process(end=True)
408
- def build_features_intra_subset(self, norm_proc: Type[Proc]) -> Type[Proc]:
391
+ @ProcGroup.add_proc
392
+ def p_features_intra_subset(self) -> Type[Proc]:
409
393
  """Build MetabolicFeaturesIntraSubset process"""
410
- if self.options.subsetting_comparison and not self.options.subsetting:
394
+ if self.opts.subsetting_comparison and not self.opts.subsetting:
411
395
  raise ValueError(
412
396
  "Cannot use `subsetting_comparison` without `subsetting`."
413
397
  )
414
398
 
415
399
  return Proc.from_proc(
416
- MetabolicFeaturesIntraSubset,
400
+ ScrnaMetabolicLandscape.MetabolicFeaturesIntraSubset,
417
401
  "MetabolicFeaturesIntraSubset",
418
- requires=norm_proc,
402
+ requires=self.p_expr_impute,
419
403
  envs={
420
- "ncores": self.options.ncores,
421
- "gmtfile": self.options.gmtfile,
422
- "grouping": self.options.grouping,
423
- "grouping_prefix": self.options.grouping_prefix,
424
- "subsetting": self.options.subsetting,
425
- "subsetting_prefix": self.options.subsetting_prefix,
426
- "subsetting_comparison": self.options.subsetting_comparison,
404
+ "ncores": self.opts.ncores,
405
+ "gmtfile": self.opts.gmtfile,
406
+ "grouping": self.opts.grouping,
407
+ "grouping_prefix": self.opts.grouping_prefix,
408
+ "subsetting": self.opts.subsetting,
409
+ "subsetting_prefix": self.opts.subsetting_prefix,
410
+ "subsetting_comparison": self.opts.subsetting_comparison,
427
411
  },
428
412
  )
429
413
 
430
- def build(self) -> None:
431
- """Build processes for metabolic landscape analysis pipeline"""
432
- self.options = DEFAULT_OPTS | self.options
433
-
434
- if not self.options.gmtfile:
435
- raise ValueError("`gmtfile` with metabolic pathways is required.")
436
-
437
- MetabolicInput = self.build_input()
438
-
439
- if self.options.is_seurat:
440
- # Use the rds file
441
- SeuratClustering = MetabolicInput
442
- else:
443
- # Do clustering
444
- SeuratPreparing = self.build_preparing(MetabolicInput)
445
- SeuratClustering = self.build_clustering(SeuratPreparing)
446
-
447
- if self.options.mutaters:
448
- SeuratMetadataMutater = self.build_mutater(SeuratClustering)
449
- else:
450
- # No mutaters, just use the SeuratClustering
451
- SeuratMetadataMutater = SeuratClustering
452
-
453
- # Do imputation and normalization for all the data together
454
- MetabolicExprImpute = self.build_expr_impute(SeuratMetadataMutater)
455
-
456
- self.build_pathway_activity(MetabolicExprImpute)
457
- self.build_pathway_heterogeneity(MetabolicExprImpute)
458
- self.build_features(MetabolicExprImpute)
459
- if self.options.subsetting_comparison:
460
- self.build_features_intra_subset(MetabolicExprImpute)
414
+
415
+ if __name__ == "__main__":
416
+ from pipen_args import install # noqa: F401
417
+
418
+ ScrnaMetabolicLandscape().as_pipen().run()