biopipen 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (65) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.py +0 -5
  3. biopipen/core/config.toml +4 -4
  4. biopipen/core/defaults.py +3 -3
  5. biopipen/core/filters.py +1 -0
  6. biopipen/core/proc.py +1 -3
  7. biopipen/core/testing.py +1 -2
  8. biopipen/ns/bam.py +10 -14
  9. biopipen/ns/bcftools.py +37 -7
  10. biopipen/ns/bed.py +9 -16
  11. biopipen/ns/cnv.py +8 -11
  12. biopipen/ns/cnvkit.py +32 -59
  13. biopipen/ns/cnvkit_pipeline.py +266 -310
  14. biopipen/ns/csv.py +0 -2
  15. biopipen/ns/gene.py +0 -1
  16. biopipen/ns/gsea.py +4 -10
  17. biopipen/ns/misc.py +0 -5
  18. biopipen/ns/plot.py +2 -4
  19. biopipen/ns/rnaseq.py +0 -1
  20. biopipen/ns/scrna.py +78 -120
  21. biopipen/ns/scrna_metabolic_landscape.py +306 -348
  22. biopipen/ns/tcgamaf.py +52 -0
  23. biopipen/ns/tcr.py +5 -15
  24. biopipen/ns/vcf.py +52 -34
  25. biopipen/ns/web.py +8 -19
  26. biopipen/reports/bam/CNAClinic.svelte +1 -1
  27. biopipen/reports/bam/CNVpytor.svelte +2 -2
  28. biopipen/reports/bam/ControlFREEC.svelte +1 -1
  29. biopipen/reports/cnv/AneuploidyScore.svelte +2 -2
  30. biopipen/reports/cnv/AneuploidyScoreSummary.svelte +1 -1
  31. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  32. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  34. biopipen/reports/gsea/FGSEA.svelte +1 -1
  35. biopipen/reports/gsea/GSEA.svelte +2 -2
  36. biopipen/reports/scrna/CellsDistribution.svelte +1 -1
  37. biopipen/reports/scrna/DimPlots.svelte +1 -1
  38. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +42 -39
  40. biopipen/reports/scrna/ScFGSEA.svelte +3 -3
  41. biopipen/reports/scrna/SeuratClusterStats.svelte +3 -3
  42. biopipen/reports/scrna/SeuratPreparing.svelte +2 -2
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubsets.svelte +2 -2
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +1 -1
  45. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +1 -1
  46. biopipen/reports/tcr/CloneResidency.svelte +4 -4
  47. biopipen/reports/tcr/Immunarch.svelte +2 -2
  48. biopipen/reports/tcr/SampleDiversity.svelte +2 -2
  49. biopipen/reports/tcr/TCRClusteringStats.svelte +3 -3
  50. biopipen/reports/tcr/VJUsage.svelte +1 -1
  51. biopipen/reports/utils/gsea.liq +1 -1
  52. biopipen/reports/utils/misc.liq +1 -1
  53. biopipen/reports/vcf/TruvariBenchSummary.svelte +1 -1
  54. biopipen/reports/vcf/TruvariConsistency.svelte +3 -3
  55. biopipen/scripts/bcftools/BcftoolsSort.py +19 -0
  56. biopipen/scripts/scrna/MarkersFinder.R +73 -35
  57. biopipen/scripts/tcgamaf/Maf2Vcf.py +22 -0
  58. biopipen/scripts/tcgamaf/MafAddChr.py +14 -0
  59. biopipen/scripts/tcgamaf/maf2vcf.pl +427 -0
  60. biopipen/scripts/vcf/VcfAnno.py +26 -0
  61. biopipen/scripts/vcf/VcfFix_utils.py +3 -2
  62. {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/METADATA +7 -8
  63. {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/RECORD +65 -59
  64. {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/WHEEL +1 -1
  65. {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/entry_points.txt +2 -1
@@ -7,7 +7,7 @@ import pandas
7
7
  from diot import Diot
8
8
  from datar.tibble import tibble
9
9
  from biopipen.core.proc import Proc
10
- from pipen_cli_run import Pipeline, process
10
+ from pipen_args.procgroup import ProcGroup
11
11
 
12
12
  from ..core.config import config
13
13
 
@@ -19,45 +19,6 @@ except ImportError:
19
19
  if TYPE_CHECKING:
20
20
  from pandas import DataFrame
21
21
 
22
- DEFAULT_COLS = Diot(
23
- group="Group",
24
- purity="Purity",
25
- snpvcf="SnpVcf",
26
- bam="Bam",
27
- vcf_sample_id="VcfSampleId",
28
- vcf_normal_id="VcfNormalId",
29
- sex="Sex",
30
- guess_baits="GuessBaits",
31
- )
32
-
33
- DEFAULT_OPTS = Diot(
34
- metafile=None,
35
- baitfile=None,
36
- accfile=None,
37
- cnvkit=config.exe.cnvkit,
38
- convert=config.exe.convert,
39
- rscript=config.lang.rscript,
40
- samtools=config.exe.samtools,
41
- ncores=config.misc.ncores,
42
- reffa=config.ref.reffa,
43
- annotate=config.ref.refflat,
44
- short_names=True,
45
- method="hybrid",
46
- guessbaits=False,
47
- heatmap_cnr=False,
48
- case=None,
49
- control=None,
50
- access_excludes=[],
51
- guessbaits_guided=None,
52
- male_reference=False,
53
- drop_low_coverage=False,
54
- min_variant_depth=20,
55
- no_gc=False,
56
- no_edge=False,
57
- no_rmask=False,
58
- zygosity_freq=0.25,
59
- )
60
-
61
22
 
62
23
  @lru_cache()
63
24
  def _metadf(metafile: str) -> DataFrame:
@@ -78,7 +39,7 @@ class _MetaCol:
78
39
  return self.cols.get(name, self.default_cols[name])
79
40
 
80
41
 
81
- class CNVkitPipeline(Pipeline):
42
+ class CNVkitPipeline(ProcGroup):
82
43
  """The CNVkit pipeline
83
44
 
84
45
  Unlike `cnvkit.py batch`, this decouples the steps of the `batch` command so
@@ -86,90 +47,6 @@ class CNVkitPipeline(Pipeline):
86
47
 
87
48
  The pipeline requires following options:
88
49
 
89
- Input files:
90
- - metafile: a tab-separated file (see the next section)
91
- - baitfile: Potentially targeted genomic regions.
92
- E.g. all possible exons for the reference genome.
93
- This is optional when `method` is `wgs`.
94
- - accfile: The accessible genomic regions.
95
- If not given, use `cnvkit.py access` to generate one.
96
-
97
- Special options:
98
- - access_excludes: File(s) with regions to be excluded for
99
- `cnvkit.py access`.
100
- - guessbaits_guided: Whether to use guided mode for guessing baits.
101
- - guessbaits: Guess the bait file from the bam files, either guided or
102
- unguided.
103
- If False, `baitfile` is used. Otherwise, if `baitfile` is given, use it
104
- (guided), otherwise use `accfile` (unguided).
105
- The bam files with `metacols.guess_baits` column set to `True`, `TRUE`,
106
- `true`, `1`, `Yes`, `YES`, or `yes` will be used to guess the bait file.
107
- - heatmap_cnr: Whether to generate a heatmap of the .cnr files
108
- (bin-level signals). This is allowed to set to False, it will take
109
- longer to run.
110
- - case: The group name of samples in `metacols.group` to call CNVs for.
111
- If not specified, use all samples. In such a case, `control` must not be
112
- specified, as we are using a flat reference.
113
- - control: The group name of samples in `metacols.group` to use as reference
114
- if not specified, use a flat reference.
115
- - metacols: The column names for each type of information in metafile
116
- - group: The column name in the metafile that indicates the sample group
117
- Default: `Group`
118
- - purity: The column name in the metafile that indicates the sample
119
- purity. Default: `Purity`
120
- - snpvcf: The column name in the metafile that indicates the path to
121
- the SNP VCF file. Default: `SnpVcf`
122
- - bam: The column name in the metafile that indicates the path to the
123
- BAM file. Default: `Bam`
124
- - vcf_sample_id: The column name in the metafile that indicates the
125
- sample ID in the VCF file. Default: `VcfSampleId`
126
- - vcf_normal_id: The column name in the metafile that indicates the
127
- normal sample ID in the VCF file. Default: `VcfNormalId`
128
- - sex: The column name in the metafile that indicates the sample
129
- sex. Default: `Sex`
130
- - guess_baits: The column name in the metafile that indicates whether
131
- to guess the bait file from the bam files. Default: `GuessBaits`
132
-
133
- Global options that are used by multiple processes
134
- (can be overriden individually by `[<proc>.envs.xxx]`):
135
- - cnvkit: the path to the cnvkit.py executable, defaults to
136
- `config.exe.cnvkit` from `./.biopipen.toml` or `~/.biopipen.toml`.
137
- - rscript: Path to the Rscript excecutable to use for running R code.
138
- Requires `DNAcopy` to be installed in R, defaults to
139
- `config.lang.rscript`
140
- - samtools: Path to samtools, used for guessing bait file.
141
- - convert: Linux `convert` command to convert pdf to png
142
- So that they can be embedded in the HTML report.
143
- - ncores: number of cores to use, defaults to `config.misc.ncores`
144
- - reffa: the reference genome (e.g. hg19.fa)
145
- Used by `CNVkitAccess`, `CNVkitAutobin` and `CNVkitReference`
146
- - annotate: Use gene models from this file to assign names to the
147
- target regions. Format: UCSC refFlat.txt or ensFlat.txt file
148
- (preferred), or BED, interval list, GFF, or similar.
149
- - short_names: Reduce multi-accession bait labels to be short and consistent
150
- - method: Sequencing protocol: hybridization capture ('hybrid'),
151
- targeted amplicon sequencing ('amplicon'),
152
- or whole genome sequencing ('wgs'). Determines
153
- whether and how to use antitarget bins.
154
- - male_reference: Use or assume a male reference (i.e. female samples
155
- will have +1 log-CNR of chrX; otherwise male samples would have
156
- -1 chrX).
157
- Used by `CNVkitReference`, `CNVkitCall`, `CNVkitHeatmapCns` and
158
- `CNVkitHeatmapCnr`.
159
- - drop_low_coverage: Drop very-low-coverage bins before segmentation to
160
- avoid false-positive deletions in poor-quality tumor samples.
161
- Used by `CNVkitSegment` and `CNVkitCall`
162
- - no_gc: Skip GC correction for `cnvkit.py reference/fix`.
163
- - no_edge: Skip edge-effect correction for `cnvkit.py reference/fix`.
164
- - no_rmask: Skip RepeatMasker correction for `cnvkit.py reference/fix`.
165
- no_* options are used by `CNVkitReference` and `CNVkitFix`
166
- - min_variant_depth: Minimum read depth for a SNV to be displayed
167
- in the b-allele frequency plot.
168
- Used by `CNVkitSegment` and `CNVkitCall`
169
- - zygosity_freq: Ignore VCF's genotypes (GT field) and instead infer
170
- zygosity from allele frequencies.
171
- Used by `CNVkitSegment` and `CNVkitCall`
172
-
173
50
  Options for different processes can be specified by `[CNVkitXXX.envs.xxx]`
174
51
  See `biopipen.ns.cnvkit.CNVkitXXX` for more details.
175
52
 
@@ -190,8 +67,7 @@ class CNVkitPipeline(Pipeline):
190
67
 
191
68
  To run this pipeline from command line, with the `pipen-run` plugin:
192
69
  >>> # In this case, `pipeline.cnvkit_pipeline.metafile` must be provided
193
- >>> pipen run cnvkit_pipeline CNVkitPipeline \
194
- >>> +config <config.toml> <other pipeline args>
70
+ >>> pipen run cnvkit_pipeline CNVkitPipeline <other pipeline args>
195
71
 
196
72
  To use this as a dependency for other pipelines:
197
73
  >>> from biopipen.ns.cnvkit_pipeline import CNVkitPipeline
@@ -199,17 +75,135 @@ class CNVkitPipeline(Pipeline):
199
75
  >>> # pipeline.starts: Start processes of the pipeline
200
76
  >>> # pipeline.ends: End processes of the pipeline
201
77
  >>> # pipeline.procs.<proc>: The process with name <proc>
202
- """
203
78
 
204
- defaults = config.pipeline.cnvkit_pipeline
79
+ Args:
80
+ metafile: a tab-separated file (see the next section)
81
+ baitfile: Potentially targeted genomic regions.
82
+ E.g. all possible exons for the reference genome.
83
+ This is optional when `method` is `wgs`.
84
+ accfile: The accessible genomic regions.
85
+ If not given, use `cnvkit.py access` to generate one.
86
+ access_excludes: File(s) with regions to be excluded for
87
+ `cnvkit.py access`.
88
+ guessbaits_guided: Whether to use guided mode for guessing baits.
89
+ guessbaits: Guess the bait file from the bam files, either guided or
90
+ unguided.
91
+ If False, `baitfile` is used. Otherwise, if `baitfile` is given,
92
+ use it (guided), otherwise use `accfile` (unguided).
93
+ The bam files with `metacols.guess_baits` column set to
94
+ `True`, `TRUE`, `true`, `1`, `Yes`, `YES`, or `yes`
95
+ will be used to guess the bait file.
96
+ heatmap_cnr: Whether to generate a heatmap of the .cnr files
97
+ (bin-level signals). This is allowed to set to False, it will take
98
+ longer to run.
99
+ case: The group name of samples in `metacols.group` to call CNVs for.
100
+ If not specified, use all samples. In such a case, `control` must
101
+ not be specified, as we are using a flat reference.
102
+ control: The group name of samples in `metacols.group` to use as
103
+ reference if not specified, use a flat reference.
104
+ cnvkit: the path to the cnvkit.py executable, defaults to
105
+ `config.exe.cnvkit` from `./.biopipen.toml` or `~/.biopipen.toml`.
106
+ rscript: Path to the Rscript excecutable to use for running R code.
107
+ Requires `DNAcopy` to be installed in R, defaults to
108
+ `config.lang.rscript`
109
+ samtools: Path to samtools, used for guessing bait file.
110
+ convert: Linux `convert` command to convert pdf to png
111
+ So that they can be embedded in the HTML report.
112
+ ncores: number of cores to use, defaults to `config.misc.ncores`
113
+ reffa: the reference genome (e.g. hg19.fa)
114
+ Used by `CNVkitAccess`, `CNVkitAutobin` and `CNVkitReference`
115
+ annotate: Use gene models from this file to assign names to the
116
+ target regions. Format: UCSC refFlat.txt or ensFlat.txt file
117
+ (preferred), or BED, interval list, GFF, or similar.
118
+ short_names: Reduce multi-accession bait labels to be short and
119
+ consistent
120
+ method: Sequencing protocol: hybridization capture ('hybrid'),
121
+ targeted amplicon sequencing ('amplicon'),
122
+ or whole genome sequencing ('wgs'). Determines
123
+ whether and how to use antitarget bins.
124
+ male_reference: Use or assume a male reference (i.e. female samples
125
+ will have +1 log-CNR of chrX; otherwise male samples would have
126
+ -1 chrX).
127
+ Used by `CNVkitReference`, `CNVkitCall`, `CNVkitHeatmapCns` and
128
+ `CNVkitHeatmapCnr`.
129
+ drop_low_coverage: Drop very-low-coverage bins before segmentation to
130
+ avoid false-positive deletions in poor-quality tumor samples.
131
+ Used by `CNVkitSegment` and `CNVkitCall`
132
+ no_gc: Skip GC correction for `cnvkit.py reference/fix`.
133
+ no_edge: Skip edge-effect correction for `cnvkit.py reference/fix`.
134
+ no_rmask: Skip RepeatMasker correction for `cnvkit.py reference/fix`.
135
+ no_* options are used by `CNVkitReference` and `CNVkitFix`
136
+ min_variant_depth: Minimum read depth for a SNV to be displayed
137
+ in the b-allele frequency plot.
138
+ Used by `CNVkitSegment` and `CNVkitCall`
139
+ zygosity_freq: Ignore VCF's genotypes (GT field) and instead infer
140
+ zygosity from allele frequencies.
141
+ Used by `CNVkitSegment` and `CNVkitCall`
142
+ metacols: The column names for each type of information in metafile
143
+ - group: The column name in the metafile that indicates the sample
144
+ group
145
+ - purity: The column name in the metafile that indicates the sample
146
+ purity
147
+ - snpvcf: The column name in the metafile that indicates the path to
148
+ the SNP VCF file
149
+ - bam: The column name in the metafile that indicates the path to
150
+ the BAM file
151
+ - vcf_sample_id: The column name in the metafile that indicates the
152
+ sample ID in the VCF file
153
+ - vcf_normal_id: The column name in the metafile that indicates the
154
+ normal sample ID in the VCF file
155
+ - sex: The column name in the metafile that indicates the sample sex
156
+ - guess_baits: The column name in the metafile that indicates
157
+ whether to guess the bait file from the bam files
158
+ """
159
+ DEFAULTS = Diot(
160
+ metafile=None,
161
+ baitfile=None,
162
+ accfile=None,
163
+ cnvkit=config.exe.cnvkit,
164
+ convert=config.exe.convert,
165
+ rscript=config.lang.rscript,
166
+ samtools=config.exe.samtools,
167
+ ncores=config.misc.ncores,
168
+ reffa=config.ref.reffa,
169
+ annotate=config.ref.refflat,
170
+ short_names=True,
171
+ method="hybrid",
172
+ guessbaits=False,
173
+ heatmap_cnr=False,
174
+ case=None,
175
+ control=None,
176
+ access_excludes=[],
177
+ guessbaits_guided=None,
178
+ male_reference=False,
179
+ drop_low_coverage=False,
180
+ min_variant_depth=20,
181
+ no_gc=False,
182
+ no_edge=False,
183
+ no_rmask=False,
184
+ zygosity_freq=0.25,
185
+ metacols=Diot(
186
+ group="Group",
187
+ purity="Purity",
188
+ snpvcf="SnpVcf",
189
+ bam="Bam",
190
+ vcf_sample_id="VcfSampleId",
191
+ vcf_normal_id="VcfNormalId",
192
+ sex="Sex",
193
+ guess_baits="GuessBaits",
194
+ ),
195
+ )
205
196
 
206
197
  @cached_property
207
198
  def col(self):
208
199
  """Get the column names by self.col.<colname>"""
209
- return _MetaCol(self.options.get("metacols"), DEFAULT_COLS)
200
+ return _MetaCol(
201
+ self.opts.get("metacols"),
202
+ self.__class__.DEFAULTS.metacols,
203
+ )
210
204
 
211
- @process(start=True)
212
- def build_metafile(self):
205
+ @ProcGroup.add_proc
206
+ def p_metafile(self):
213
207
  """Build MetaFile process"""
214
208
  from .misc import File2Proc
215
209
 
@@ -220,23 +214,23 @@ class CNVkitPipeline(Pipeline):
220
214
  # Remember to set the dependency in the pipeline:
221
215
  # >>> pipeline.procs.MetaFile.requires = [other_pipeline.procs]
222
216
  # where other_pipeline.procs generate the metafile
223
- if self.options.metafile:
224
- input_data = [self.options.metafile]
217
+ if self.opts.metafile:
218
+ input_data = [self.opts.metafile]
225
219
 
226
220
  return MetaFile
227
221
 
228
- @process(start=True)
229
- def build_cnvkit_access(self):
222
+ @ProcGroup.add_proc
223
+ def p_cnvkit_access(self):
230
224
  """Build CNVkitAccess process"""
231
- if self.options.get("accfile"):
225
+ if self.opts.get("accfile"):
232
226
  from .misc import File2Proc
233
227
 
234
228
  class CNVkitAccess(File2Proc):
235
- input_data = [self.options.accfile]
229
+ input_data = [self.opts.accfile]
236
230
  else:
237
231
  from .cnvkit import CNVkitAccess
238
232
 
239
- excludes = self.options.get("excludes", [])
233
+ excludes = self.opts.get("excludes", [])
240
234
  if not isinstance(excludes, (list, tuple)):
241
235
  excludes = [excludes]
242
236
 
@@ -244,18 +238,21 @@ class CNVkitPipeline(Pipeline):
244
238
  # can be overwritten by [CNVkitAccess.in.exludes]
245
239
  input_data = [excludes]
246
240
  envs = {
247
- "cnvkit": self.options.cnvkit,
248
- "ref": self.options.reffa,
241
+ "cnvkit": self.opts.cnvkit,
242
+ "ref": self.opts.reffa,
249
243
  }
250
244
 
251
245
  return CNVkitAccess
252
246
 
253
- @process
254
- def build_cnvkit_guessbaits(self, metafile_proc, access_proc):
247
+ @ProcGroup.add_proc
248
+ def p_cnvkit_guessbaits(self):
255
249
  """Build CNVkitGuessBaits process"""
256
250
  from .cnvkit import CNVkitGuessBaits
257
251
 
258
- if self.options.guessbaits_guided is None:
252
+ if not self.opts.guessbaits:
253
+ return None
254
+
255
+ if self.opts.guessbaits_guided is None:
259
256
  raise ValueError(
260
257
  "`guessbaits.guided` must be specified, expecting True or False"
261
258
  )
@@ -281,8 +278,8 @@ class CNVkitPipeline(Pipeline):
281
278
  self.col.bam,
282
279
  ].tolist()
283
280
 
284
- if self.options.guessbaits_guided:
285
- if not self.options.baitfile:
281
+ if self.opts.guessbaits_guided:
282
+ if not self.opts.baitfile:
286
283
  raise ValueError(
287
284
  "`baitfile` must be specified for guided mode "
288
285
  "to guess baits. See: "
@@ -290,78 +287,81 @@ class CNVkitPipeline(Pipeline):
290
287
  )
291
288
 
292
289
  class CNVkitGuessBaits(CNVkitGuessBaits):
293
- requires = metafile_proc
290
+ requires = self.p_metafile
294
291
  input_data = lambda metafile_ch: tibble(
295
292
  bamfiles=[_guess_baits_bams(metafile_ch)],
296
- atfile=self.options.baitfile,
293
+ atfile=self.opts.baitfile,
297
294
  )
298
295
  envs = {
299
- "cnvkit": self.options.cnvkit,
300
- "samtools": self.options.samtools,
301
- "ncores": self.options.ncores,
302
- "ref": self.options.reffa,
296
+ "cnvkit": self.opts.cnvkit,
297
+ "samtools": self.opts.samtools,
298
+ "ncores": self.opts.ncores,
299
+ "ref": self.opts.reffa,
303
300
  "guided": True,
304
301
  }
305
302
  else: # unguided
306
303
  class CNVkitGuessBaits(CNVkitGuessBaits):
307
- requires = metafile_proc, access_proc
304
+ requires = self.p_metafile, self.p_cnvkit_access
308
305
  input_data = lambda metafile_ch, access_ch: tibble(
309
306
  bamfiles=[_guess_baits_bams(metafile_ch)],
310
307
  accessfile=_1st(access_ch),
311
308
  )
312
309
  envs = {
313
- "cnvkit": self.options.cnvkit,
314
- "samtools": self.options.samtools,
315
- "ncores": self.options.ncores,
316
- "ref": self.options.reffa,
310
+ "cnvkit": self.opts.cnvkit,
311
+ "samtools": self.opts.samtools,
312
+ "ncores": self.opts.ncores,
313
+ "ref": self.opts.reffa,
317
314
  "guided": False,
318
315
  }
319
316
 
320
317
  return CNVkitGuessBaits
321
318
 
322
- @process
323
- def build_cnvkit_autobin(self, metafile_proc, access_proc, guessbaits_proc):
319
+ @ProcGroup.add_proc
320
+ def p_cnvkit_autobin(self):
324
321
  """Build CNVkitAutobin process"""
325
322
  from .cnvkit import CNVkitAutobin
326
323
 
327
324
  class CNVkitAutobin(CNVkitAutobin):
328
- if guessbaits_proc:
329
- requires = metafile_proc, access_proc, guessbaits_proc
325
+ if self.p_cnvkit_guessbaits:
326
+ requires = (
327
+ self.p_metafile,
328
+ self.p_cnvkit_access,
329
+ self.p_cnvkit_guessbaits,
330
+ )
330
331
  input_data = lambda ch1, ch2, ch3: tibble(
331
332
  bamfiles=[_metadf(_1st(ch1))[self.col.bam].tolist()],
332
333
  accfile=_1st(ch2),
333
334
  baitfile=(
334
335
  _1st(ch3)
335
- if self.options.guessbaits
336
- else self.options.baitfile
336
+ if self.opts.guessbaits
337
+ else self.opts.baitfile
337
338
  ),
338
339
  )
339
340
  else:
340
- requires = metafile_proc, access_proc
341
+ requires = self.p_metafile, self.p_cnvkit_access
341
342
  input_data = lambda ch1, ch2: tibble(
342
343
  bamfiles=[_metadf(_1st(ch1))[self.col.bam].tolist()],
343
344
  accfile=_1st(ch2),
344
- baitfile=self.options.baitfile,
345
+ baitfile=self.opts.baitfile,
345
346
  )
346
347
  envs = {
347
- "cnvkit": self.options.cnvkit,
348
- "method": self.options.method,
349
- "annotate": self.options.annotate,
350
- "short_names": self.options.short_names,
351
- "ref": self.options.reffa,
348
+ "cnvkit": self.opts.cnvkit,
349
+ "method": self.opts.method,
350
+ "annotate": self.opts.annotate,
351
+ "short_names": self.opts.short_names,
352
+ "ref": self.opts.reffa,
352
353
  }
353
354
 
354
355
  return CNVkitAutobin
355
356
 
356
- @process
357
- def build_cnvkit_coverage(self, metafile_proc, autobin_proc, anti):
357
+ def _p_cnvkit_coverage(self, anti: bool):
358
358
  """Build CNVkitTargetCoverage and CNVkitAntiTargetCoverage processes"""
359
359
  from .cnvkit import CNVkitCoverage
360
360
 
361
361
  return Proc.from_proc(
362
362
  CNVkitCoverage,
363
363
  name="CNVkitCoverageAnittarget" if anti else "CNVkitCoverageTarget",
364
- requires=[metafile_proc, autobin_proc],
364
+ requires=[self.p_metafile, self.p_cnvkit_autobin],
365
365
  input_data=lambda ch1, ch2: tibble(
366
366
  _metadf(_1st(ch1))[self.col.bam].tolist(),
367
367
  target_file=ch2[
@@ -369,29 +369,33 @@ class CNVkitPipeline(Pipeline):
369
369
  ].tolist()[0],
370
370
  ),
371
371
  envs={
372
- "cnvkit": self.options.cnvkit,
373
- "ncores": self.options.ncores,
374
- "ref": self.options.reffa,
372
+ "cnvkit": self.opts.cnvkit,
373
+ "ncores": self.opts.ncores,
374
+ "ref": self.opts.reffa,
375
375
  }
376
376
  )
377
377
 
378
- @process
379
- def build_cnvkit_reference(
380
- self,
381
- metafile_proc,
382
- target_coverage_proc,
383
- antitarget_coverage_proc,
384
- autobin_proc,
385
- ):
378
+ @ProcGroup.add_proc
379
+ def p_cnvkit_coverage_target(self):
380
+ """Build CNVkitCoverageTarget process"""
381
+ return self._p_cnvkit_coverage(anti=False)
382
+
383
+ @ProcGroup.add_proc
384
+ def p_cnvkit_coverage_antitarget(self):
385
+ """Build CNVkitCoverageAntiTarget process"""
386
+ return self._p_cnvkit_coverage(anti=True)
387
+
388
+ @ProcGroup.add_proc
389
+ def p_cnvkit_reference(self):
386
390
  """Build CNVkitReference process"""
387
391
  from .cnvkit import CNVkitReference
388
392
 
389
393
  def _input_data(ch1, ch2, ch3, ch4):
390
394
  metadf = _metadf(_1st(ch1))
391
395
 
392
- if self.options.control:
396
+ if self.opts.control:
393
397
  # Use control samples to build reference
394
- control_masks = metadf[self.col.group] == self.options.control
398
+ control_masks = metadf[self.col.group] == self.opts.control
395
399
  covfiles = [
396
400
  ch2.outfile[control_masks].tolist()
397
401
  + ch3.outfile[control_masks].tolist()
@@ -418,34 +422,28 @@ class CNVkitPipeline(Pipeline):
418
422
 
419
423
  class CNVkitReference(CNVkitReference):
420
424
  requires = [
421
- metafile_proc,
422
- target_coverage_proc,
423
- antitarget_coverage_proc,
424
- autobin_proc,
425
+ self.p_metafile,
426
+ self.p_cnvkit_coverage_target,
427
+ self.p_cnvkit_coverage_antitarget,
428
+ self.p_cnvkit_autobin,
425
429
  ]
426
430
  input_data = _input_data
427
431
  envs = {
428
- "cnvkit": self.options.cnvkit,
429
- "no_gc": self.options.no_gc,
430
- "no_edge": self.options.no_edge,
431
- "no_rmask": self.options.no_rmask,
432
- "ref": self.options.reffa,
432
+ "cnvkit": self.opts.cnvkit,
433
+ "no_gc": self.opts.no_gc,
434
+ "no_edge": self.opts.no_edge,
435
+ "no_rmask": self.opts.no_rmask,
436
+ "ref": self.opts.reffa,
433
437
  }
434
438
 
435
439
  return CNVkitReference
436
440
 
437
- @process
438
- def build_cnvkit_fix(
439
- self,
440
- metafile_proc,
441
- target_coverage_proc,
442
- antitarget_coverage_proc,
443
- reference_proc,
444
- ):
441
+ @ProcGroup.add_proc
442
+ def p_cnvkit_fix(self):
445
443
  """Build CNVkitFix process"""
446
444
  from .cnvkit import CNVkitFix
447
445
 
448
- if not self.options.case and self.options.control:
446
+ if not self.opts.case and self.opts.control:
449
447
  raise ValueError(
450
448
  "`case` is not specified, meaning using all samples as cases, "
451
449
  "but `control` is specified (we can only use a flat reference "
@@ -454,10 +452,10 @@ class CNVkitPipeline(Pipeline):
454
452
 
455
453
  def _input_data(ch1, ch2, ch3, ch4):
456
454
  metadf = _metadf(_1st(ch1))
457
- if not self.options.case:
455
+ if not self.opts.case:
458
456
  tumor_masks = [True] * len(metadf)
459
457
  else:
460
- tumor_masks = metadf[self.col.group] == self.options.case
458
+ tumor_masks = metadf[self.col.group] == self.opts.case
461
459
 
462
460
  return tibble(
463
461
  target_file=ch2.outfile[tumor_masks],
@@ -468,32 +466,32 @@ class CNVkitPipeline(Pipeline):
468
466
 
469
467
  class CNVkitFix(CNVkitFix):
470
468
  requires = [
471
- metafile_proc,
472
- target_coverage_proc,
473
- antitarget_coverage_proc,
474
- reference_proc,
469
+ self.p_metafile,
470
+ self.p_cnvkit_coverage_target,
471
+ self.p_cnvkit_coverage_antitarget,
472
+ self.p_cnvkit_reference,
475
473
  ]
476
474
  input_data = _input_data
477
475
  envs = {
478
- "cnvkit": self.options.cnvkit,
479
- "no_gc": self.options.no_gc,
480
- "no_edge": self.options.no_edge,
481
- "no_rmask": self.options.no_rmask,
476
+ "cnvkit": self.opts.cnvkit,
477
+ "no_gc": self.opts.no_gc,
478
+ "no_edge": self.opts.no_edge,
479
+ "no_rmask": self.opts.no_rmask,
482
480
  }
483
481
 
484
482
  return CNVkitFix
485
483
 
486
- @process
487
- def build_cnvkit_segment(self, metafile_proc, fix_proc):
484
+ @ProcGroup.add_proc
485
+ def p_cnvkit_segment(self):
488
486
  """Build CNVkitSegment process"""
489
487
  from .cnvkit import CNVkitSegment
490
488
 
491
489
  def _input_data(ch1, ch2):
492
490
  metadf = _metadf(_1st(ch1))
493
- if not self.options.case:
491
+ if not self.opts.case:
494
492
  tumor_masks = [True] * len(metadf)
495
493
  else:
496
- tumor_masks = metadf[self.col.group] == self.options.case
494
+ tumor_masks = metadf[self.col.group] == self.opts.case
497
495
 
498
496
  return tibble(
499
497
  chrfile=ch2.outfile,
@@ -515,27 +513,27 @@ class CNVkitPipeline(Pipeline):
515
513
  )
516
514
 
517
515
  class CNVkitSegment(CNVkitSegment):
518
- requires = metafile_proc, fix_proc
516
+ requires = self.p_metafile, self.p_cnvkit_fix
519
517
  input_data = _input_data
520
518
  envs = {
521
- "cnvkit": self.options.cnvkit,
522
- "rscript": self.options.rscript,
523
- "ncores": self.options.ncores,
519
+ "cnvkit": self.opts.cnvkit,
520
+ "rscript": self.opts.rscript,
521
+ "ncores": self.opts.ncores,
524
522
  }
525
523
 
526
524
  return CNVkitSegment
527
525
 
528
- @process(end=True)
529
- def build_cnvkit_scatter(self, metafile_proc, fix_proc, segment_proc):
526
+ @ProcGroup.add_proc
527
+ def p_cnvkit_scatter(self):
530
528
  """Build CNVkitScatter process"""
531
529
  from .cnvkit import CNVkitScatter
532
530
 
533
531
  def _input_data(ch1, ch2, ch3):
534
532
  metadf = _metadf(_1st(ch1))
535
- if not self.options.case:
533
+ if not self.opts.case:
536
534
  tumor_masks = [True] * len(metadf)
537
535
  else:
538
- tumor_masks = metadf[self.col.group] == self.options.case
536
+ tumor_masks = metadf[self.col.group] == self.opts.case
539
537
 
540
538
  return tibble(
541
539
  chrfile=ch2.outfile,
@@ -558,26 +556,26 @@ class CNVkitPipeline(Pipeline):
558
556
  )
559
557
 
560
558
  class CNVkitScatter(CNVkitScatter):
561
- requires = metafile_proc, fix_proc, segment_proc
559
+ requires = self.p_metafile, self.p_cnvkit_fix, self.p_cnvkit_segment
562
560
  input_data = _input_data
563
561
  envs = {
564
- "cnvkit": self.options.cnvkit,
565
- "convert": self.options.convert,
562
+ "cnvkit": self.opts.cnvkit,
563
+ "convert": self.opts.convert,
566
564
  }
567
565
 
568
566
  return CNVkitScatter
569
567
 
570
- @process(end=True)
571
- def build_cnvkit_diagram(self, metafile_proc, fix_proc, segment_proc):
568
+ @ProcGroup.add_proc
569
+ def p_cnvkit_diagram(self):
572
570
  """Build CNVkitDiagram process"""
573
571
  from .cnvkit import CNVkitDiagram
574
572
 
575
573
  def _input_data(ch1, ch2, ch3):
576
574
  metadf = _metadf(_1st(ch1))
577
- if not self.options.case:
575
+ if not self.opts.case:
578
576
  tumor_masks = [True] * len(metadf)
579
577
  else:
580
- tumor_masks = metadf[self.col.group] == self.options.case
578
+ tumor_masks = metadf[self.col.group] == self.opts.case
581
579
 
582
580
  return tibble(
583
581
  chrfile=ch2.outfile,
@@ -590,26 +588,26 @@ class CNVkitPipeline(Pipeline):
590
588
  )
591
589
 
592
590
  class CNVkitDiagram(CNVkitDiagram):
593
- requires = metafile_proc, fix_proc, segment_proc
591
+ requires = self.p_metafile, self.p_cnvkit_fix, self.p_cnvkit_segment
594
592
  input_data = _input_data
595
593
  envs = {
596
- "cnvkit": self.options.cnvkit,
597
- "convert": self.options.convert,
594
+ "cnvkit": self.opts.cnvkit,
595
+ "convert": self.opts.convert,
598
596
  }
599
597
 
600
598
  return CNVkitDiagram
601
599
 
602
- @process(end=True)
603
- def build_cnvkit_heatmap_cns(self, metafile_proc, segment_proc):
600
+ @ProcGroup.add_proc
601
+ def p_cnvkit_heatmap_cns(self):
604
602
  """Build CNVkitHeatmapCns process"""
605
603
  from .cnvkit import CNVkitHeatmap
606
604
 
607
605
  def _input_data(ch1, ch2):
608
606
  metadf = _metadf(_1st(ch1))
609
- if not self.options.case:
607
+ if not self.opts.case:
610
608
  tumor_masks = [True] * len(metadf)
611
609
  else:
612
- tumor_masks = metadf[self.col.group] == self.options.case
610
+ tumor_masks = metadf[self.col.group] == self.opts.case
613
611
 
614
612
  return tibble(
615
613
  segfiles=[ch2.outfile.tolist()],
@@ -622,27 +620,30 @@ class CNVkitPipeline(Pipeline):
622
620
 
623
621
  class CNVkitHeatmapCns(CNVkitHeatmap):
624
622
  """Heatmap of segment-level signals of multiple samples"""
625
- requires = metafile_proc, segment_proc
623
+ requires = self.p_metafile, self.p_cnvkit_segment
626
624
  input_data = _input_data
627
625
  envs = {
628
- "cnvkit": self.options.cnvkit,
629
- "convert": self.options.convert,
630
- "male_reference": self.options.male_reference,
626
+ "cnvkit": self.opts.cnvkit,
627
+ "convert": self.opts.convert,
628
+ "male_reference": self.opts.male_reference,
631
629
  }
632
630
 
633
631
  return CNVkitHeatmapCns
634
632
 
635
- @process(end=True)
636
- def build_cnvkit_heatmap_cnr(self, metafile_proc, fix_proc):
633
+ @ProcGroup.add_proc
634
+ def p_cnvkit_heatmap_cnr(self):
637
635
  """Build CNVkitHeatmapCnr process"""
638
636
  from .cnvkit import CNVkitHeatmap
639
637
 
638
+ if not self.opts.heatmap_cnr:
639
+ return None
640
+
640
641
  def _input_data(ch1, ch2):
641
642
  metadf = _metadf(_1st(ch1))
642
- if not self.options.case:
643
+ if not self.opts.case:
643
644
  tumor_masks = [True] * len(metadf)
644
645
  else:
645
- tumor_masks = metadf[self.col.group] == self.options.case
646
+ tumor_masks = metadf[self.col.group] == self.opts.case
646
647
 
647
648
  return tibble(
648
649
  segfiles=[ch2.outfile.tolist()],
@@ -655,27 +656,27 @@ class CNVkitPipeline(Pipeline):
655
656
 
656
657
  class CNVkitHeatmapCnr(CNVkitHeatmap):
657
658
  """Heatmap of bin-level signals of multiple samples"""
658
- requires = metafile_proc, fix_proc
659
+ requires = self.p_metafile, self.p_cnvkit_fix
659
660
  input_data = _input_data
660
661
  envs = {
661
- "cnvkit": self.options.cnvkit,
662
- "convert": self.options.convert,
663
- "male_reference": self.options.male_reference,
662
+ "cnvkit": self.opts.cnvkit,
663
+ "convert": self.opts.convert,
664
+ "male_reference": self.opts.male_reference,
664
665
  }
665
666
 
666
667
  return CNVkitHeatmapCnr
667
668
 
668
- @process(end=True)
669
- def build_cnvkit_call(self, metafile_proc, fix_proc, segment_proc):
669
+ @ProcGroup.add_proc
670
+ def p_cnvkit_call(self):
670
671
  """Build CNVkitCall process"""
671
672
  from .cnvkit import CNVkitCall
672
673
 
673
674
  def _input_data(ch1, ch2, ch3):
674
675
  metadf = _metadf(_1st(ch1))
675
- if not self.options.case:
676
+ if not self.opts.case:
676
677
  tumor_masks = [True] * len(metadf)
677
678
  else:
678
- tumor_masks = metadf[self.col.group] == self.options.case
679
+ tumor_masks = metadf[self.col.group] == self.opts.case
679
680
 
680
681
  return tibble(
681
682
  cnrfile=ch2.outfile,
@@ -708,65 +709,20 @@ class CNVkitPipeline(Pipeline):
708
709
  )
709
710
 
710
711
  class CNVkitCall(CNVkitCall):
711
- requires = metafile_proc, fix_proc, segment_proc
712
+ requires = self.p_metafile, self.p_cnvkit_fix, self.p_cnvkit_segment
712
713
  input_data = _input_data
713
714
  envs = {
714
- "cnvkit": self.options.cnvkit,
715
- "drop_low_coverage": self.options.drop_low_coverage,
716
- "male_reference": self.options.male_reference,
717
- "min_variant_depth": self.options.min_variant_depth,
718
- "zygosity_freq": self.options.zygosity_freq,
715
+ "cnvkit": self.opts.cnvkit,
716
+ "drop_low_coverage": self.opts.drop_low_coverage,
717
+ "male_reference": self.opts.male_reference,
718
+ "min_variant_depth": self.opts.min_variant_depth,
719
+ "zygosity_freq": self.opts.zygosity_freq,
719
720
  }
720
721
 
721
722
  return CNVkitCall
722
723
 
723
- def build(self):
724
- self.options = DEFAULT_OPTS | self.options
725
-
726
- MetaFile = self.build_metafile()
727
- CNVkitAccess = self.build_cnvkit_access()
728
724
 
729
- CNVkitGuessBaits = None
730
- if self.options.guessbaits:
731
- CNVkitGuessBaits = self.build_cnvkit_guessbaits(
732
- MetaFile,
733
- CNVkitAccess,
734
- )
735
-
736
- CNVkitAutobin = self.build_cnvkit_autobin(
737
- MetaFile,
738
- CNVkitAccess,
739
- CNVkitGuessBaits,
740
- )
725
+ if __name__ == "__main__":
726
+ from pipen_args import install # noqa: F401
741
727
 
742
- CNVkitCoverageTarget = self.build_cnvkit_coverage(
743
- MetaFile,
744
- CNVkitAutobin,
745
- anti=False,
746
- )
747
- CNVkitCoverageAntitarget = self.build_cnvkit_coverage(
748
- MetaFile,
749
- CNVkitAutobin,
750
- anti=True,
751
- )
752
- CNVkitReference = self.build_cnvkit_reference(
753
- MetaFile,
754
- CNVkitCoverageTarget,
755
- CNVkitCoverageAntitarget,
756
- CNVkitAutobin,
757
- )
758
- CNVkitFix = self.build_cnvkit_fix(
759
- MetaFile,
760
- CNVkitCoverageTarget,
761
- CNVkitCoverageAntitarget,
762
- CNVkitReference,
763
- )
764
- CNVkitSegment = self.build_cnvkit_segment(MetaFile, CNVkitFix)
765
-
766
- # end processes
767
- self.build_cnvkit_scatter(MetaFile, CNVkitFix, CNVkitSegment)
768
- self.build_cnvkit_diagram(MetaFile, CNVkitFix, CNVkitSegment)
769
- self.build_cnvkit_heatmap_cns(MetaFile, CNVkitSegment)
770
- if self.options.heatmap_cnr:
771
- self.build_cnvkit_heatmap_cnr(MetaFile, CNVkitFix)
772
- self.build_cnvkit_call(MetaFile, CNVkitFix, CNVkitSegment)
728
+ CNVkitPipeline().as_pipen().run()