biopipen 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.py +0 -5
- biopipen/core/config.toml +4 -4
- biopipen/core/defaults.py +3 -3
- biopipen/core/filters.py +1 -0
- biopipen/core/proc.py +1 -3
- biopipen/core/testing.py +1 -2
- biopipen/ns/bam.py +10 -14
- biopipen/ns/bcftools.py +37 -7
- biopipen/ns/bed.py +9 -16
- biopipen/ns/cnv.py +8 -11
- biopipen/ns/cnvkit.py +32 -59
- biopipen/ns/cnvkit_pipeline.py +266 -310
- biopipen/ns/csv.py +0 -2
- biopipen/ns/gene.py +0 -1
- biopipen/ns/gsea.py +4 -10
- biopipen/ns/misc.py +0 -5
- biopipen/ns/plot.py +2 -4
- biopipen/ns/rnaseq.py +0 -1
- biopipen/ns/scrna.py +78 -120
- biopipen/ns/scrna_metabolic_landscape.py +306 -348
- biopipen/ns/tcgamaf.py +52 -0
- biopipen/ns/tcr.py +5 -15
- biopipen/ns/vcf.py +52 -34
- biopipen/ns/web.py +8 -19
- biopipen/reports/bam/CNAClinic.svelte +1 -1
- biopipen/reports/bam/CNVpytor.svelte +2 -2
- biopipen/reports/bam/ControlFREEC.svelte +1 -1
- biopipen/reports/cnv/AneuploidyScore.svelte +2 -2
- biopipen/reports/cnv/AneuploidyScoreSummary.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/gsea/FGSEA.svelte +1 -1
- biopipen/reports/gsea/GSEA.svelte +2 -2
- biopipen/reports/scrna/CellsDistribution.svelte +1 -1
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +42 -39
- biopipen/reports/scrna/ScFGSEA.svelte +3 -3
- biopipen/reports/scrna/SeuratClusterStats.svelte +3 -3
- biopipen/reports/scrna/SeuratPreparing.svelte +2 -2
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubsets.svelte +2 -2
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +1 -1
- biopipen/reports/tcr/CloneResidency.svelte +4 -4
- biopipen/reports/tcr/Immunarch.svelte +2 -2
- biopipen/reports/tcr/SampleDiversity.svelte +2 -2
- biopipen/reports/tcr/TCRClusteringStats.svelte +3 -3
- biopipen/reports/tcr/VJUsage.svelte +1 -1
- biopipen/reports/utils/gsea.liq +1 -1
- biopipen/reports/utils/misc.liq +1 -1
- biopipen/reports/vcf/TruvariBenchSummary.svelte +1 -1
- biopipen/reports/vcf/TruvariConsistency.svelte +3 -3
- biopipen/scripts/bcftools/BcftoolsSort.py +19 -0
- biopipen/scripts/scrna/MarkersFinder.R +73 -35
- biopipen/scripts/tcgamaf/Maf2Vcf.py +22 -0
- biopipen/scripts/tcgamaf/MafAddChr.py +14 -0
- biopipen/scripts/tcgamaf/maf2vcf.pl +427 -0
- biopipen/scripts/vcf/VcfAnno.py +26 -0
- biopipen/scripts/vcf/VcfFix_utils.py +3 -2
- {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/METADATA +7 -8
- {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/RECORD +65 -59
- {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/WHEEL +1 -1
- {biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/entry_points.txt +2 -1
biopipen/ns/cnvkit_pipeline.py
CHANGED
|
@@ -7,7 +7,7 @@ import pandas
|
|
|
7
7
|
from diot import Diot
|
|
8
8
|
from datar.tibble import tibble
|
|
9
9
|
from biopipen.core.proc import Proc
|
|
10
|
-
from
|
|
10
|
+
from pipen_args.procgroup import ProcGroup
|
|
11
11
|
|
|
12
12
|
from ..core.config import config
|
|
13
13
|
|
|
@@ -19,45 +19,6 @@ except ImportError:
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
20
|
from pandas import DataFrame
|
|
21
21
|
|
|
22
|
-
DEFAULT_COLS = Diot(
|
|
23
|
-
group="Group",
|
|
24
|
-
purity="Purity",
|
|
25
|
-
snpvcf="SnpVcf",
|
|
26
|
-
bam="Bam",
|
|
27
|
-
vcf_sample_id="VcfSampleId",
|
|
28
|
-
vcf_normal_id="VcfNormalId",
|
|
29
|
-
sex="Sex",
|
|
30
|
-
guess_baits="GuessBaits",
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
DEFAULT_OPTS = Diot(
|
|
34
|
-
metafile=None,
|
|
35
|
-
baitfile=None,
|
|
36
|
-
accfile=None,
|
|
37
|
-
cnvkit=config.exe.cnvkit,
|
|
38
|
-
convert=config.exe.convert,
|
|
39
|
-
rscript=config.lang.rscript,
|
|
40
|
-
samtools=config.exe.samtools,
|
|
41
|
-
ncores=config.misc.ncores,
|
|
42
|
-
reffa=config.ref.reffa,
|
|
43
|
-
annotate=config.ref.refflat,
|
|
44
|
-
short_names=True,
|
|
45
|
-
method="hybrid",
|
|
46
|
-
guessbaits=False,
|
|
47
|
-
heatmap_cnr=False,
|
|
48
|
-
case=None,
|
|
49
|
-
control=None,
|
|
50
|
-
access_excludes=[],
|
|
51
|
-
guessbaits_guided=None,
|
|
52
|
-
male_reference=False,
|
|
53
|
-
drop_low_coverage=False,
|
|
54
|
-
min_variant_depth=20,
|
|
55
|
-
no_gc=False,
|
|
56
|
-
no_edge=False,
|
|
57
|
-
no_rmask=False,
|
|
58
|
-
zygosity_freq=0.25,
|
|
59
|
-
)
|
|
60
|
-
|
|
61
22
|
|
|
62
23
|
@lru_cache()
|
|
63
24
|
def _metadf(metafile: str) -> DataFrame:
|
|
@@ -78,7 +39,7 @@ class _MetaCol:
|
|
|
78
39
|
return self.cols.get(name, self.default_cols[name])
|
|
79
40
|
|
|
80
41
|
|
|
81
|
-
class CNVkitPipeline(
|
|
42
|
+
class CNVkitPipeline(ProcGroup):
|
|
82
43
|
"""The CNVkit pipeline
|
|
83
44
|
|
|
84
45
|
Unlike `cnvkit.py batch`, this decouples the steps of the `batch` command so
|
|
@@ -86,90 +47,6 @@ class CNVkitPipeline(Pipeline):
|
|
|
86
47
|
|
|
87
48
|
The pipeline requires following options:
|
|
88
49
|
|
|
89
|
-
Input files:
|
|
90
|
-
- metafile: a tab-separated file (see the next section)
|
|
91
|
-
- baitfile: Potentially targeted genomic regions.
|
|
92
|
-
E.g. all possible exons for the reference genome.
|
|
93
|
-
This is optional when `method` is `wgs`.
|
|
94
|
-
- accfile: The accessible genomic regions.
|
|
95
|
-
If not given, use `cnvkit.py access` to generate one.
|
|
96
|
-
|
|
97
|
-
Special options:
|
|
98
|
-
- access_excludes: File(s) with regions to be excluded for
|
|
99
|
-
`cnvkit.py access`.
|
|
100
|
-
- guessbaits_guided: Whether to use guided mode for guessing baits.
|
|
101
|
-
- guessbaits: Guess the bait file from the bam files, either guided or
|
|
102
|
-
unguided.
|
|
103
|
-
If False, `baitfile` is used. Otherwise, if `baitfile` is given, use it
|
|
104
|
-
(guided), otherwise use `accfile` (unguided).
|
|
105
|
-
The bam files with `metacols.guess_baits` column set to `True`, `TRUE`,
|
|
106
|
-
`true`, `1`, `Yes`, `YES`, or `yes` will be used to guess the bait file.
|
|
107
|
-
- heatmap_cnr: Whether to generate a heatmap of the .cnr files
|
|
108
|
-
(bin-level signals). This is allowed to set to False, it will take
|
|
109
|
-
longer to run.
|
|
110
|
-
- case: The group name of samples in `metacols.group` to call CNVs for.
|
|
111
|
-
If not specified, use all samples. In such a case, `control` must not be
|
|
112
|
-
specified, as we are using a flat reference.
|
|
113
|
-
- control: The group name of samples in `metacols.group` to use as reference
|
|
114
|
-
if not specified, use a flat reference.
|
|
115
|
-
- metacols: The column names for each type of information in metafile
|
|
116
|
-
- group: The column name in the metafile that indicates the sample group
|
|
117
|
-
Default: `Group`
|
|
118
|
-
- purity: The column name in the metafile that indicates the sample
|
|
119
|
-
purity. Default: `Purity`
|
|
120
|
-
- snpvcf: The column name in the metafile that indicates the path to
|
|
121
|
-
the SNP VCF file. Default: `SnpVcf`
|
|
122
|
-
- bam: The column name in the metafile that indicates the path to the
|
|
123
|
-
BAM file. Default: `Bam`
|
|
124
|
-
- vcf_sample_id: The column name in the metafile that indicates the
|
|
125
|
-
sample ID in the VCF file. Default: `VcfSampleId`
|
|
126
|
-
- vcf_normal_id: The column name in the metafile that indicates the
|
|
127
|
-
normal sample ID in the VCF file. Default: `VcfNormalId`
|
|
128
|
-
- sex: The column name in the metafile that indicates the sample
|
|
129
|
-
sex. Default: `Sex`
|
|
130
|
-
- guess_baits: The column name in the metafile that indicates whether
|
|
131
|
-
to guess the bait file from the bam files. Default: `GuessBaits`
|
|
132
|
-
|
|
133
|
-
Global options that are used by multiple processes
|
|
134
|
-
(can be overriden individually by `[<proc>.envs.xxx]`):
|
|
135
|
-
- cnvkit: the path to the cnvkit.py executable, defaults to
|
|
136
|
-
`config.exe.cnvkit` from `./.biopipen.toml` or `~/.biopipen.toml`.
|
|
137
|
-
- rscript: Path to the Rscript excecutable to use for running R code.
|
|
138
|
-
Requires `DNAcopy` to be installed in R, defaults to
|
|
139
|
-
`config.lang.rscript`
|
|
140
|
-
- samtools: Path to samtools, used for guessing bait file.
|
|
141
|
-
- convert: Linux `convert` command to convert pdf to png
|
|
142
|
-
So that they can be embedded in the HTML report.
|
|
143
|
-
- ncores: number of cores to use, defaults to `config.misc.ncores`
|
|
144
|
-
- reffa: the reference genome (e.g. hg19.fa)
|
|
145
|
-
Used by `CNVkitAccess`, `CNVkitAutobin` and `CNVkitReference`
|
|
146
|
-
- annotate: Use gene models from this file to assign names to the
|
|
147
|
-
target regions. Format: UCSC refFlat.txt or ensFlat.txt file
|
|
148
|
-
(preferred), or BED, interval list, GFF, or similar.
|
|
149
|
-
- short_names: Reduce multi-accession bait labels to be short and consistent
|
|
150
|
-
- method: Sequencing protocol: hybridization capture ('hybrid'),
|
|
151
|
-
targeted amplicon sequencing ('amplicon'),
|
|
152
|
-
or whole genome sequencing ('wgs'). Determines
|
|
153
|
-
whether and how to use antitarget bins.
|
|
154
|
-
- male_reference: Use or assume a male reference (i.e. female samples
|
|
155
|
-
will have +1 log-CNR of chrX; otherwise male samples would have
|
|
156
|
-
-1 chrX).
|
|
157
|
-
Used by `CNVkitReference`, `CNVkitCall`, `CNVkitHeatmapCns` and
|
|
158
|
-
`CNVkitHeatmapCnr`.
|
|
159
|
-
- drop_low_coverage: Drop very-low-coverage bins before segmentation to
|
|
160
|
-
avoid false-positive deletions in poor-quality tumor samples.
|
|
161
|
-
Used by `CNVkitSegment` and `CNVkitCall`
|
|
162
|
-
- no_gc: Skip GC correction for `cnvkit.py reference/fix`.
|
|
163
|
-
- no_edge: Skip edge-effect correction for `cnvkit.py reference/fix`.
|
|
164
|
-
- no_rmask: Skip RepeatMasker correction for `cnvkit.py reference/fix`.
|
|
165
|
-
no_* options are used by `CNVkitReference` and `CNVkitFix`
|
|
166
|
-
- min_variant_depth: Minimum read depth for a SNV to be displayed
|
|
167
|
-
in the b-allele frequency plot.
|
|
168
|
-
Used by `CNVkitSegment` and `CNVkitCall`
|
|
169
|
-
- zygosity_freq: Ignore VCF's genotypes (GT field) and instead infer
|
|
170
|
-
zygosity from allele frequencies.
|
|
171
|
-
Used by `CNVkitSegment` and `CNVkitCall`
|
|
172
|
-
|
|
173
50
|
Options for different processes can be specified by `[CNVkitXXX.envs.xxx]`
|
|
174
51
|
See `biopipen.ns.cnvkit.CNVkitXXX` for more details.
|
|
175
52
|
|
|
@@ -190,8 +67,7 @@ class CNVkitPipeline(Pipeline):
|
|
|
190
67
|
|
|
191
68
|
To run this pipeline from command line, with the `pipen-run` plugin:
|
|
192
69
|
>>> # In this case, `pipeline.cnvkit_pipeline.metafile` must be provided
|
|
193
|
-
>>> pipen run cnvkit_pipeline CNVkitPipeline
|
|
194
|
-
>>> +config <config.toml> <other pipeline args>
|
|
70
|
+
>>> pipen run cnvkit_pipeline CNVkitPipeline <other pipeline args>
|
|
195
71
|
|
|
196
72
|
To use this as a dependency for other pipelines:
|
|
197
73
|
>>> from biopipen.ns.cnvkit_pipeline import CNVkitPipeline
|
|
@@ -199,17 +75,135 @@ class CNVkitPipeline(Pipeline):
|
|
|
199
75
|
>>> # pipeline.starts: Start processes of the pipeline
|
|
200
76
|
>>> # pipeline.ends: End processes of the pipeline
|
|
201
77
|
>>> # pipeline.procs.<proc>: The process with name <proc>
|
|
202
|
-
"""
|
|
203
78
|
|
|
204
|
-
|
|
79
|
+
Args:
|
|
80
|
+
metafile: a tab-separated file (see the next section)
|
|
81
|
+
baitfile: Potentially targeted genomic regions.
|
|
82
|
+
E.g. all possible exons for the reference genome.
|
|
83
|
+
This is optional when `method` is `wgs`.
|
|
84
|
+
accfile: The accessible genomic regions.
|
|
85
|
+
If not given, use `cnvkit.py access` to generate one.
|
|
86
|
+
access_excludes: File(s) with regions to be excluded for
|
|
87
|
+
`cnvkit.py access`.
|
|
88
|
+
guessbaits_guided: Whether to use guided mode for guessing baits.
|
|
89
|
+
guessbaits: Guess the bait file from the bam files, either guided or
|
|
90
|
+
unguided.
|
|
91
|
+
If False, `baitfile` is used. Otherwise, if `baitfile` is given,
|
|
92
|
+
use it (guided), otherwise use `accfile` (unguided).
|
|
93
|
+
The bam files with `metacols.guess_baits` column set to
|
|
94
|
+
`True`, `TRUE`, `true`, `1`, `Yes`, `YES`, or `yes`
|
|
95
|
+
will be used to guess the bait file.
|
|
96
|
+
heatmap_cnr: Whether to generate a heatmap of the .cnr files
|
|
97
|
+
(bin-level signals). This is allowed to set to False, it will take
|
|
98
|
+
longer to run.
|
|
99
|
+
case: The group name of samples in `metacols.group` to call CNVs for.
|
|
100
|
+
If not specified, use all samples. In such a case, `control` must
|
|
101
|
+
not be specified, as we are using a flat reference.
|
|
102
|
+
control: The group name of samples in `metacols.group` to use as
|
|
103
|
+
reference if not specified, use a flat reference.
|
|
104
|
+
cnvkit: the path to the cnvkit.py executable, defaults to
|
|
105
|
+
`config.exe.cnvkit` from `./.biopipen.toml` or `~/.biopipen.toml`.
|
|
106
|
+
rscript: Path to the Rscript excecutable to use for running R code.
|
|
107
|
+
Requires `DNAcopy` to be installed in R, defaults to
|
|
108
|
+
`config.lang.rscript`
|
|
109
|
+
samtools: Path to samtools, used for guessing bait file.
|
|
110
|
+
convert: Linux `convert` command to convert pdf to png
|
|
111
|
+
So that they can be embedded in the HTML report.
|
|
112
|
+
ncores: number of cores to use, defaults to `config.misc.ncores`
|
|
113
|
+
reffa: the reference genome (e.g. hg19.fa)
|
|
114
|
+
Used by `CNVkitAccess`, `CNVkitAutobin` and `CNVkitReference`
|
|
115
|
+
annotate: Use gene models from this file to assign names to the
|
|
116
|
+
target regions. Format: UCSC refFlat.txt or ensFlat.txt file
|
|
117
|
+
(preferred), or BED, interval list, GFF, or similar.
|
|
118
|
+
short_names: Reduce multi-accession bait labels to be short and
|
|
119
|
+
consistent
|
|
120
|
+
method: Sequencing protocol: hybridization capture ('hybrid'),
|
|
121
|
+
targeted amplicon sequencing ('amplicon'),
|
|
122
|
+
or whole genome sequencing ('wgs'). Determines
|
|
123
|
+
whether and how to use antitarget bins.
|
|
124
|
+
male_reference: Use or assume a male reference (i.e. female samples
|
|
125
|
+
will have +1 log-CNR of chrX; otherwise male samples would have
|
|
126
|
+
-1 chrX).
|
|
127
|
+
Used by `CNVkitReference`, `CNVkitCall`, `CNVkitHeatmapCns` and
|
|
128
|
+
`CNVkitHeatmapCnr`.
|
|
129
|
+
drop_low_coverage: Drop very-low-coverage bins before segmentation to
|
|
130
|
+
avoid false-positive deletions in poor-quality tumor samples.
|
|
131
|
+
Used by `CNVkitSegment` and `CNVkitCall`
|
|
132
|
+
no_gc: Skip GC correction for `cnvkit.py reference/fix`.
|
|
133
|
+
no_edge: Skip edge-effect correction for `cnvkit.py reference/fix`.
|
|
134
|
+
no_rmask: Skip RepeatMasker correction for `cnvkit.py reference/fix`.
|
|
135
|
+
no_* options are used by `CNVkitReference` and `CNVkitFix`
|
|
136
|
+
min_variant_depth: Minimum read depth for a SNV to be displayed
|
|
137
|
+
in the b-allele frequency plot.
|
|
138
|
+
Used by `CNVkitSegment` and `CNVkitCall`
|
|
139
|
+
zygosity_freq: Ignore VCF's genotypes (GT field) and instead infer
|
|
140
|
+
zygosity from allele frequencies.
|
|
141
|
+
Used by `CNVkitSegment` and `CNVkitCall`
|
|
142
|
+
metacols: The column names for each type of information in metafile
|
|
143
|
+
- group: The column name in the metafile that indicates the sample
|
|
144
|
+
group
|
|
145
|
+
- purity: The column name in the metafile that indicates the sample
|
|
146
|
+
purity
|
|
147
|
+
- snpvcf: The column name in the metafile that indicates the path to
|
|
148
|
+
the SNP VCF file
|
|
149
|
+
- bam: The column name in the metafile that indicates the path to
|
|
150
|
+
the BAM file
|
|
151
|
+
- vcf_sample_id: The column name in the metafile that indicates the
|
|
152
|
+
sample ID in the VCF file
|
|
153
|
+
- vcf_normal_id: The column name in the metafile that indicates the
|
|
154
|
+
normal sample ID in the VCF file
|
|
155
|
+
- sex: The column name in the metafile that indicates the sample sex
|
|
156
|
+
- guess_baits: The column name in the metafile that indicates
|
|
157
|
+
whether to guess the bait file from the bam files
|
|
158
|
+
"""
|
|
159
|
+
DEFAULTS = Diot(
|
|
160
|
+
metafile=None,
|
|
161
|
+
baitfile=None,
|
|
162
|
+
accfile=None,
|
|
163
|
+
cnvkit=config.exe.cnvkit,
|
|
164
|
+
convert=config.exe.convert,
|
|
165
|
+
rscript=config.lang.rscript,
|
|
166
|
+
samtools=config.exe.samtools,
|
|
167
|
+
ncores=config.misc.ncores,
|
|
168
|
+
reffa=config.ref.reffa,
|
|
169
|
+
annotate=config.ref.refflat,
|
|
170
|
+
short_names=True,
|
|
171
|
+
method="hybrid",
|
|
172
|
+
guessbaits=False,
|
|
173
|
+
heatmap_cnr=False,
|
|
174
|
+
case=None,
|
|
175
|
+
control=None,
|
|
176
|
+
access_excludes=[],
|
|
177
|
+
guessbaits_guided=None,
|
|
178
|
+
male_reference=False,
|
|
179
|
+
drop_low_coverage=False,
|
|
180
|
+
min_variant_depth=20,
|
|
181
|
+
no_gc=False,
|
|
182
|
+
no_edge=False,
|
|
183
|
+
no_rmask=False,
|
|
184
|
+
zygosity_freq=0.25,
|
|
185
|
+
metacols=Diot(
|
|
186
|
+
group="Group",
|
|
187
|
+
purity="Purity",
|
|
188
|
+
snpvcf="SnpVcf",
|
|
189
|
+
bam="Bam",
|
|
190
|
+
vcf_sample_id="VcfSampleId",
|
|
191
|
+
vcf_normal_id="VcfNormalId",
|
|
192
|
+
sex="Sex",
|
|
193
|
+
guess_baits="GuessBaits",
|
|
194
|
+
),
|
|
195
|
+
)
|
|
205
196
|
|
|
206
197
|
@cached_property
|
|
207
198
|
def col(self):
|
|
208
199
|
"""Get the column names by self.col.<colname>"""
|
|
209
|
-
return _MetaCol(
|
|
200
|
+
return _MetaCol(
|
|
201
|
+
self.opts.get("metacols"),
|
|
202
|
+
self.__class__.DEFAULTS.metacols,
|
|
203
|
+
)
|
|
210
204
|
|
|
211
|
-
@
|
|
212
|
-
def
|
|
205
|
+
@ProcGroup.add_proc
|
|
206
|
+
def p_metafile(self):
|
|
213
207
|
"""Build MetaFile process"""
|
|
214
208
|
from .misc import File2Proc
|
|
215
209
|
|
|
@@ -220,23 +214,23 @@ class CNVkitPipeline(Pipeline):
|
|
|
220
214
|
# Remember to set the dependency in the pipeline:
|
|
221
215
|
# >>> pipeline.procs.MetaFile.requires = [other_pipeline.procs]
|
|
222
216
|
# where other_pipeline.procs generate the metafile
|
|
223
|
-
if self.
|
|
224
|
-
input_data = [self.
|
|
217
|
+
if self.opts.metafile:
|
|
218
|
+
input_data = [self.opts.metafile]
|
|
225
219
|
|
|
226
220
|
return MetaFile
|
|
227
221
|
|
|
228
|
-
@
|
|
229
|
-
def
|
|
222
|
+
@ProcGroup.add_proc
|
|
223
|
+
def p_cnvkit_access(self):
|
|
230
224
|
"""Build CNVkitAccess process"""
|
|
231
|
-
if self.
|
|
225
|
+
if self.opts.get("accfile"):
|
|
232
226
|
from .misc import File2Proc
|
|
233
227
|
|
|
234
228
|
class CNVkitAccess(File2Proc):
|
|
235
|
-
input_data = [self.
|
|
229
|
+
input_data = [self.opts.accfile]
|
|
236
230
|
else:
|
|
237
231
|
from .cnvkit import CNVkitAccess
|
|
238
232
|
|
|
239
|
-
excludes = self.
|
|
233
|
+
excludes = self.opts.get("excludes", [])
|
|
240
234
|
if not isinstance(excludes, (list, tuple)):
|
|
241
235
|
excludes = [excludes]
|
|
242
236
|
|
|
@@ -244,18 +238,21 @@ class CNVkitPipeline(Pipeline):
|
|
|
244
238
|
# can be overwritten by [CNVkitAccess.in.exludes]
|
|
245
239
|
input_data = [excludes]
|
|
246
240
|
envs = {
|
|
247
|
-
"cnvkit": self.
|
|
248
|
-
"ref": self.
|
|
241
|
+
"cnvkit": self.opts.cnvkit,
|
|
242
|
+
"ref": self.opts.reffa,
|
|
249
243
|
}
|
|
250
244
|
|
|
251
245
|
return CNVkitAccess
|
|
252
246
|
|
|
253
|
-
@
|
|
254
|
-
def
|
|
247
|
+
@ProcGroup.add_proc
|
|
248
|
+
def p_cnvkit_guessbaits(self):
|
|
255
249
|
"""Build CNVkitGuessBaits process"""
|
|
256
250
|
from .cnvkit import CNVkitGuessBaits
|
|
257
251
|
|
|
258
|
-
if self.
|
|
252
|
+
if not self.opts.guessbaits:
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
if self.opts.guessbaits_guided is None:
|
|
259
256
|
raise ValueError(
|
|
260
257
|
"`guessbaits.guided` must be specified, expecting True or False"
|
|
261
258
|
)
|
|
@@ -281,8 +278,8 @@ class CNVkitPipeline(Pipeline):
|
|
|
281
278
|
self.col.bam,
|
|
282
279
|
].tolist()
|
|
283
280
|
|
|
284
|
-
if self.
|
|
285
|
-
if not self.
|
|
281
|
+
if self.opts.guessbaits_guided:
|
|
282
|
+
if not self.opts.baitfile:
|
|
286
283
|
raise ValueError(
|
|
287
284
|
"`baitfile` must be specified for guided mode "
|
|
288
285
|
"to guess baits. See: "
|
|
@@ -290,78 +287,81 @@ class CNVkitPipeline(Pipeline):
|
|
|
290
287
|
)
|
|
291
288
|
|
|
292
289
|
class CNVkitGuessBaits(CNVkitGuessBaits):
|
|
293
|
-
requires =
|
|
290
|
+
requires = self.p_metafile
|
|
294
291
|
input_data = lambda metafile_ch: tibble(
|
|
295
292
|
bamfiles=[_guess_baits_bams(metafile_ch)],
|
|
296
|
-
atfile=self.
|
|
293
|
+
atfile=self.opts.baitfile,
|
|
297
294
|
)
|
|
298
295
|
envs = {
|
|
299
|
-
"cnvkit": self.
|
|
300
|
-
"samtools": self.
|
|
301
|
-
"ncores": self.
|
|
302
|
-
"ref": self.
|
|
296
|
+
"cnvkit": self.opts.cnvkit,
|
|
297
|
+
"samtools": self.opts.samtools,
|
|
298
|
+
"ncores": self.opts.ncores,
|
|
299
|
+
"ref": self.opts.reffa,
|
|
303
300
|
"guided": True,
|
|
304
301
|
}
|
|
305
302
|
else: # unguided
|
|
306
303
|
class CNVkitGuessBaits(CNVkitGuessBaits):
|
|
307
|
-
requires =
|
|
304
|
+
requires = self.p_metafile, self.p_cnvkit_access
|
|
308
305
|
input_data = lambda metafile_ch, access_ch: tibble(
|
|
309
306
|
bamfiles=[_guess_baits_bams(metafile_ch)],
|
|
310
307
|
accessfile=_1st(access_ch),
|
|
311
308
|
)
|
|
312
309
|
envs = {
|
|
313
|
-
"cnvkit": self.
|
|
314
|
-
"samtools": self.
|
|
315
|
-
"ncores": self.
|
|
316
|
-
"ref": self.
|
|
310
|
+
"cnvkit": self.opts.cnvkit,
|
|
311
|
+
"samtools": self.opts.samtools,
|
|
312
|
+
"ncores": self.opts.ncores,
|
|
313
|
+
"ref": self.opts.reffa,
|
|
317
314
|
"guided": False,
|
|
318
315
|
}
|
|
319
316
|
|
|
320
317
|
return CNVkitGuessBaits
|
|
321
318
|
|
|
322
|
-
@
|
|
323
|
-
def
|
|
319
|
+
@ProcGroup.add_proc
|
|
320
|
+
def p_cnvkit_autobin(self):
|
|
324
321
|
"""Build CNVkitAutobin process"""
|
|
325
322
|
from .cnvkit import CNVkitAutobin
|
|
326
323
|
|
|
327
324
|
class CNVkitAutobin(CNVkitAutobin):
|
|
328
|
-
if
|
|
329
|
-
requires =
|
|
325
|
+
if self.p_cnvkit_guessbaits:
|
|
326
|
+
requires = (
|
|
327
|
+
self.p_metafile,
|
|
328
|
+
self.p_cnvkit_access,
|
|
329
|
+
self.p_cnvkit_guessbaits,
|
|
330
|
+
)
|
|
330
331
|
input_data = lambda ch1, ch2, ch3: tibble(
|
|
331
332
|
bamfiles=[_metadf(_1st(ch1))[self.col.bam].tolist()],
|
|
332
333
|
accfile=_1st(ch2),
|
|
333
334
|
baitfile=(
|
|
334
335
|
_1st(ch3)
|
|
335
|
-
if self.
|
|
336
|
-
else self.
|
|
336
|
+
if self.opts.guessbaits
|
|
337
|
+
else self.opts.baitfile
|
|
337
338
|
),
|
|
338
339
|
)
|
|
339
340
|
else:
|
|
340
|
-
requires =
|
|
341
|
+
requires = self.p_metafile, self.p_cnvkit_access
|
|
341
342
|
input_data = lambda ch1, ch2: tibble(
|
|
342
343
|
bamfiles=[_metadf(_1st(ch1))[self.col.bam].tolist()],
|
|
343
344
|
accfile=_1st(ch2),
|
|
344
|
-
baitfile=self.
|
|
345
|
+
baitfile=self.opts.baitfile,
|
|
345
346
|
)
|
|
346
347
|
envs = {
|
|
347
|
-
"cnvkit": self.
|
|
348
|
-
"method": self.
|
|
349
|
-
"annotate": self.
|
|
350
|
-
"short_names": self.
|
|
351
|
-
"ref": self.
|
|
348
|
+
"cnvkit": self.opts.cnvkit,
|
|
349
|
+
"method": self.opts.method,
|
|
350
|
+
"annotate": self.opts.annotate,
|
|
351
|
+
"short_names": self.opts.short_names,
|
|
352
|
+
"ref": self.opts.reffa,
|
|
352
353
|
}
|
|
353
354
|
|
|
354
355
|
return CNVkitAutobin
|
|
355
356
|
|
|
356
|
-
|
|
357
|
-
def build_cnvkit_coverage(self, metafile_proc, autobin_proc, anti):
|
|
357
|
+
def _p_cnvkit_coverage(self, anti: bool):
|
|
358
358
|
"""Build CNVkitTargetCoverage and CNVkitAntiTargetCoverage processes"""
|
|
359
359
|
from .cnvkit import CNVkitCoverage
|
|
360
360
|
|
|
361
361
|
return Proc.from_proc(
|
|
362
362
|
CNVkitCoverage,
|
|
363
363
|
name="CNVkitCoverageAnittarget" if anti else "CNVkitCoverageTarget",
|
|
364
|
-
requires=[
|
|
364
|
+
requires=[self.p_metafile, self.p_cnvkit_autobin],
|
|
365
365
|
input_data=lambda ch1, ch2: tibble(
|
|
366
366
|
_metadf(_1st(ch1))[self.col.bam].tolist(),
|
|
367
367
|
target_file=ch2[
|
|
@@ -369,29 +369,33 @@ class CNVkitPipeline(Pipeline):
|
|
|
369
369
|
].tolist()[0],
|
|
370
370
|
),
|
|
371
371
|
envs={
|
|
372
|
-
"cnvkit": self.
|
|
373
|
-
"ncores": self.
|
|
374
|
-
"ref": self.
|
|
372
|
+
"cnvkit": self.opts.cnvkit,
|
|
373
|
+
"ncores": self.opts.ncores,
|
|
374
|
+
"ref": self.opts.reffa,
|
|
375
375
|
}
|
|
376
376
|
)
|
|
377
377
|
|
|
378
|
-
@
|
|
379
|
-
def
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
378
|
+
@ProcGroup.add_proc
|
|
379
|
+
def p_cnvkit_coverage_target(self):
|
|
380
|
+
"""Build CNVkitCoverageTarget process"""
|
|
381
|
+
return self._p_cnvkit_coverage(anti=False)
|
|
382
|
+
|
|
383
|
+
@ProcGroup.add_proc
|
|
384
|
+
def p_cnvkit_coverage_antitarget(self):
|
|
385
|
+
"""Build CNVkitCoverageAntiTarget process"""
|
|
386
|
+
return self._p_cnvkit_coverage(anti=True)
|
|
387
|
+
|
|
388
|
+
@ProcGroup.add_proc
|
|
389
|
+
def p_cnvkit_reference(self):
|
|
386
390
|
"""Build CNVkitReference process"""
|
|
387
391
|
from .cnvkit import CNVkitReference
|
|
388
392
|
|
|
389
393
|
def _input_data(ch1, ch2, ch3, ch4):
|
|
390
394
|
metadf = _metadf(_1st(ch1))
|
|
391
395
|
|
|
392
|
-
if self.
|
|
396
|
+
if self.opts.control:
|
|
393
397
|
# Use control samples to build reference
|
|
394
|
-
control_masks = metadf[self.col.group] == self.
|
|
398
|
+
control_masks = metadf[self.col.group] == self.opts.control
|
|
395
399
|
covfiles = [
|
|
396
400
|
ch2.outfile[control_masks].tolist()
|
|
397
401
|
+ ch3.outfile[control_masks].tolist()
|
|
@@ -418,34 +422,28 @@ class CNVkitPipeline(Pipeline):
|
|
|
418
422
|
|
|
419
423
|
class CNVkitReference(CNVkitReference):
|
|
420
424
|
requires = [
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
+
self.p_metafile,
|
|
426
|
+
self.p_cnvkit_coverage_target,
|
|
427
|
+
self.p_cnvkit_coverage_antitarget,
|
|
428
|
+
self.p_cnvkit_autobin,
|
|
425
429
|
]
|
|
426
430
|
input_data = _input_data
|
|
427
431
|
envs = {
|
|
428
|
-
"cnvkit": self.
|
|
429
|
-
"no_gc": self.
|
|
430
|
-
"no_edge": self.
|
|
431
|
-
"no_rmask": self.
|
|
432
|
-
"ref": self.
|
|
432
|
+
"cnvkit": self.opts.cnvkit,
|
|
433
|
+
"no_gc": self.opts.no_gc,
|
|
434
|
+
"no_edge": self.opts.no_edge,
|
|
435
|
+
"no_rmask": self.opts.no_rmask,
|
|
436
|
+
"ref": self.opts.reffa,
|
|
433
437
|
}
|
|
434
438
|
|
|
435
439
|
return CNVkitReference
|
|
436
440
|
|
|
437
|
-
@
|
|
438
|
-
def
|
|
439
|
-
self,
|
|
440
|
-
metafile_proc,
|
|
441
|
-
target_coverage_proc,
|
|
442
|
-
antitarget_coverage_proc,
|
|
443
|
-
reference_proc,
|
|
444
|
-
):
|
|
441
|
+
@ProcGroup.add_proc
|
|
442
|
+
def p_cnvkit_fix(self):
|
|
445
443
|
"""Build CNVkitFix process"""
|
|
446
444
|
from .cnvkit import CNVkitFix
|
|
447
445
|
|
|
448
|
-
if not self.
|
|
446
|
+
if not self.opts.case and self.opts.control:
|
|
449
447
|
raise ValueError(
|
|
450
448
|
"`case` is not specified, meaning using all samples as cases, "
|
|
451
449
|
"but `control` is specified (we can only use a flat reference "
|
|
@@ -454,10 +452,10 @@ class CNVkitPipeline(Pipeline):
|
|
|
454
452
|
|
|
455
453
|
def _input_data(ch1, ch2, ch3, ch4):
|
|
456
454
|
metadf = _metadf(_1st(ch1))
|
|
457
|
-
if not self.
|
|
455
|
+
if not self.opts.case:
|
|
458
456
|
tumor_masks = [True] * len(metadf)
|
|
459
457
|
else:
|
|
460
|
-
tumor_masks = metadf[self.col.group] == self.
|
|
458
|
+
tumor_masks = metadf[self.col.group] == self.opts.case
|
|
461
459
|
|
|
462
460
|
return tibble(
|
|
463
461
|
target_file=ch2.outfile[tumor_masks],
|
|
@@ -468,32 +466,32 @@ class CNVkitPipeline(Pipeline):
|
|
|
468
466
|
|
|
469
467
|
class CNVkitFix(CNVkitFix):
|
|
470
468
|
requires = [
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
469
|
+
self.p_metafile,
|
|
470
|
+
self.p_cnvkit_coverage_target,
|
|
471
|
+
self.p_cnvkit_coverage_antitarget,
|
|
472
|
+
self.p_cnvkit_reference,
|
|
475
473
|
]
|
|
476
474
|
input_data = _input_data
|
|
477
475
|
envs = {
|
|
478
|
-
"cnvkit": self.
|
|
479
|
-
"no_gc": self.
|
|
480
|
-
"no_edge": self.
|
|
481
|
-
"no_rmask": self.
|
|
476
|
+
"cnvkit": self.opts.cnvkit,
|
|
477
|
+
"no_gc": self.opts.no_gc,
|
|
478
|
+
"no_edge": self.opts.no_edge,
|
|
479
|
+
"no_rmask": self.opts.no_rmask,
|
|
482
480
|
}
|
|
483
481
|
|
|
484
482
|
return CNVkitFix
|
|
485
483
|
|
|
486
|
-
@
|
|
487
|
-
def
|
|
484
|
+
@ProcGroup.add_proc
|
|
485
|
+
def p_cnvkit_segment(self):
|
|
488
486
|
"""Build CNVkitSegment process"""
|
|
489
487
|
from .cnvkit import CNVkitSegment
|
|
490
488
|
|
|
491
489
|
def _input_data(ch1, ch2):
|
|
492
490
|
metadf = _metadf(_1st(ch1))
|
|
493
|
-
if not self.
|
|
491
|
+
if not self.opts.case:
|
|
494
492
|
tumor_masks = [True] * len(metadf)
|
|
495
493
|
else:
|
|
496
|
-
tumor_masks = metadf[self.col.group] == self.
|
|
494
|
+
tumor_masks = metadf[self.col.group] == self.opts.case
|
|
497
495
|
|
|
498
496
|
return tibble(
|
|
499
497
|
chrfile=ch2.outfile,
|
|
@@ -515,27 +513,27 @@ class CNVkitPipeline(Pipeline):
|
|
|
515
513
|
)
|
|
516
514
|
|
|
517
515
|
class CNVkitSegment(CNVkitSegment):
|
|
518
|
-
requires =
|
|
516
|
+
requires = self.p_metafile, self.p_cnvkit_fix
|
|
519
517
|
input_data = _input_data
|
|
520
518
|
envs = {
|
|
521
|
-
"cnvkit": self.
|
|
522
|
-
"rscript": self.
|
|
523
|
-
"ncores": self.
|
|
519
|
+
"cnvkit": self.opts.cnvkit,
|
|
520
|
+
"rscript": self.opts.rscript,
|
|
521
|
+
"ncores": self.opts.ncores,
|
|
524
522
|
}
|
|
525
523
|
|
|
526
524
|
return CNVkitSegment
|
|
527
525
|
|
|
528
|
-
@
|
|
529
|
-
def
|
|
526
|
+
@ProcGroup.add_proc
|
|
527
|
+
def p_cnvkit_scatter(self):
|
|
530
528
|
"""Build CNVkitScatter process"""
|
|
531
529
|
from .cnvkit import CNVkitScatter
|
|
532
530
|
|
|
533
531
|
def _input_data(ch1, ch2, ch3):
|
|
534
532
|
metadf = _metadf(_1st(ch1))
|
|
535
|
-
if not self.
|
|
533
|
+
if not self.opts.case:
|
|
536
534
|
tumor_masks = [True] * len(metadf)
|
|
537
535
|
else:
|
|
538
|
-
tumor_masks = metadf[self.col.group] == self.
|
|
536
|
+
tumor_masks = metadf[self.col.group] == self.opts.case
|
|
539
537
|
|
|
540
538
|
return tibble(
|
|
541
539
|
chrfile=ch2.outfile,
|
|
@@ -558,26 +556,26 @@ class CNVkitPipeline(Pipeline):
|
|
|
558
556
|
)
|
|
559
557
|
|
|
560
558
|
class CNVkitScatter(CNVkitScatter):
|
|
561
|
-
requires =
|
|
559
|
+
requires = self.p_metafile, self.p_cnvkit_fix, self.p_cnvkit_segment
|
|
562
560
|
input_data = _input_data
|
|
563
561
|
envs = {
|
|
564
|
-
"cnvkit": self.
|
|
565
|
-
"convert": self.
|
|
562
|
+
"cnvkit": self.opts.cnvkit,
|
|
563
|
+
"convert": self.opts.convert,
|
|
566
564
|
}
|
|
567
565
|
|
|
568
566
|
return CNVkitScatter
|
|
569
567
|
|
|
570
|
-
@
|
|
571
|
-
def
|
|
568
|
+
@ProcGroup.add_proc
|
|
569
|
+
def p_cnvkit_diagram(self):
|
|
572
570
|
"""Build CNVkitDiagram process"""
|
|
573
571
|
from .cnvkit import CNVkitDiagram
|
|
574
572
|
|
|
575
573
|
def _input_data(ch1, ch2, ch3):
|
|
576
574
|
metadf = _metadf(_1st(ch1))
|
|
577
|
-
if not self.
|
|
575
|
+
if not self.opts.case:
|
|
578
576
|
tumor_masks = [True] * len(metadf)
|
|
579
577
|
else:
|
|
580
|
-
tumor_masks = metadf[self.col.group] == self.
|
|
578
|
+
tumor_masks = metadf[self.col.group] == self.opts.case
|
|
581
579
|
|
|
582
580
|
return tibble(
|
|
583
581
|
chrfile=ch2.outfile,
|
|
@@ -590,26 +588,26 @@ class CNVkitPipeline(Pipeline):
|
|
|
590
588
|
)
|
|
591
589
|
|
|
592
590
|
class CNVkitDiagram(CNVkitDiagram):
|
|
593
|
-
requires =
|
|
591
|
+
requires = self.p_metafile, self.p_cnvkit_fix, self.p_cnvkit_segment
|
|
594
592
|
input_data = _input_data
|
|
595
593
|
envs = {
|
|
596
|
-
"cnvkit": self.
|
|
597
|
-
"convert": self.
|
|
594
|
+
"cnvkit": self.opts.cnvkit,
|
|
595
|
+
"convert": self.opts.convert,
|
|
598
596
|
}
|
|
599
597
|
|
|
600
598
|
return CNVkitDiagram
|
|
601
599
|
|
|
602
|
-
@
|
|
603
|
-
def
|
|
600
|
+
@ProcGroup.add_proc
|
|
601
|
+
def p_cnvkit_heatmap_cns(self):
|
|
604
602
|
"""Build CNVkitHeatmapCns process"""
|
|
605
603
|
from .cnvkit import CNVkitHeatmap
|
|
606
604
|
|
|
607
605
|
def _input_data(ch1, ch2):
|
|
608
606
|
metadf = _metadf(_1st(ch1))
|
|
609
|
-
if not self.
|
|
607
|
+
if not self.opts.case:
|
|
610
608
|
tumor_masks = [True] * len(metadf)
|
|
611
609
|
else:
|
|
612
|
-
tumor_masks = metadf[self.col.group] == self.
|
|
610
|
+
tumor_masks = metadf[self.col.group] == self.opts.case
|
|
613
611
|
|
|
614
612
|
return tibble(
|
|
615
613
|
segfiles=[ch2.outfile.tolist()],
|
|
@@ -622,27 +620,30 @@ class CNVkitPipeline(Pipeline):
|
|
|
622
620
|
|
|
623
621
|
class CNVkitHeatmapCns(CNVkitHeatmap):
|
|
624
622
|
"""Heatmap of segment-level signals of multiple samples"""
|
|
625
|
-
requires =
|
|
623
|
+
requires = self.p_metafile, self.p_cnvkit_segment
|
|
626
624
|
input_data = _input_data
|
|
627
625
|
envs = {
|
|
628
|
-
"cnvkit": self.
|
|
629
|
-
"convert": self.
|
|
630
|
-
"male_reference": self.
|
|
626
|
+
"cnvkit": self.opts.cnvkit,
|
|
627
|
+
"convert": self.opts.convert,
|
|
628
|
+
"male_reference": self.opts.male_reference,
|
|
631
629
|
}
|
|
632
630
|
|
|
633
631
|
return CNVkitHeatmapCns
|
|
634
632
|
|
|
635
|
-
@
|
|
636
|
-
def
|
|
633
|
+
@ProcGroup.add_proc
|
|
634
|
+
def p_cnvkit_heatmap_cnr(self):
|
|
637
635
|
"""Build CNVkitHeatmapCnr process"""
|
|
638
636
|
from .cnvkit import CNVkitHeatmap
|
|
639
637
|
|
|
638
|
+
if not self.opts.heatmap_cnr:
|
|
639
|
+
return None
|
|
640
|
+
|
|
640
641
|
def _input_data(ch1, ch2):
|
|
641
642
|
metadf = _metadf(_1st(ch1))
|
|
642
|
-
if not self.
|
|
643
|
+
if not self.opts.case:
|
|
643
644
|
tumor_masks = [True] * len(metadf)
|
|
644
645
|
else:
|
|
645
|
-
tumor_masks = metadf[self.col.group] == self.
|
|
646
|
+
tumor_masks = metadf[self.col.group] == self.opts.case
|
|
646
647
|
|
|
647
648
|
return tibble(
|
|
648
649
|
segfiles=[ch2.outfile.tolist()],
|
|
@@ -655,27 +656,27 @@ class CNVkitPipeline(Pipeline):
|
|
|
655
656
|
|
|
656
657
|
class CNVkitHeatmapCnr(CNVkitHeatmap):
|
|
657
658
|
"""Heatmap of bin-level signals of multiple samples"""
|
|
658
|
-
requires =
|
|
659
|
+
requires = self.p_metafile, self.p_cnvkit_fix
|
|
659
660
|
input_data = _input_data
|
|
660
661
|
envs = {
|
|
661
|
-
"cnvkit": self.
|
|
662
|
-
"convert": self.
|
|
663
|
-
"male_reference": self.
|
|
662
|
+
"cnvkit": self.opts.cnvkit,
|
|
663
|
+
"convert": self.opts.convert,
|
|
664
|
+
"male_reference": self.opts.male_reference,
|
|
664
665
|
}
|
|
665
666
|
|
|
666
667
|
return CNVkitHeatmapCnr
|
|
667
668
|
|
|
668
|
-
@
|
|
669
|
-
def
|
|
669
|
+
@ProcGroup.add_proc
|
|
670
|
+
def p_cnvkit_call(self):
|
|
670
671
|
"""Build CNVkitCall process"""
|
|
671
672
|
from .cnvkit import CNVkitCall
|
|
672
673
|
|
|
673
674
|
def _input_data(ch1, ch2, ch3):
|
|
674
675
|
metadf = _metadf(_1st(ch1))
|
|
675
|
-
if not self.
|
|
676
|
+
if not self.opts.case:
|
|
676
677
|
tumor_masks = [True] * len(metadf)
|
|
677
678
|
else:
|
|
678
|
-
tumor_masks = metadf[self.col.group] == self.
|
|
679
|
+
tumor_masks = metadf[self.col.group] == self.opts.case
|
|
679
680
|
|
|
680
681
|
return tibble(
|
|
681
682
|
cnrfile=ch2.outfile,
|
|
@@ -708,65 +709,20 @@ class CNVkitPipeline(Pipeline):
|
|
|
708
709
|
)
|
|
709
710
|
|
|
710
711
|
class CNVkitCall(CNVkitCall):
|
|
711
|
-
requires =
|
|
712
|
+
requires = self.p_metafile, self.p_cnvkit_fix, self.p_cnvkit_segment
|
|
712
713
|
input_data = _input_data
|
|
713
714
|
envs = {
|
|
714
|
-
"cnvkit": self.
|
|
715
|
-
"drop_low_coverage": self.
|
|
716
|
-
"male_reference": self.
|
|
717
|
-
"min_variant_depth": self.
|
|
718
|
-
"zygosity_freq": self.
|
|
715
|
+
"cnvkit": self.opts.cnvkit,
|
|
716
|
+
"drop_low_coverage": self.opts.drop_low_coverage,
|
|
717
|
+
"male_reference": self.opts.male_reference,
|
|
718
|
+
"min_variant_depth": self.opts.min_variant_depth,
|
|
719
|
+
"zygosity_freq": self.opts.zygosity_freq,
|
|
719
720
|
}
|
|
720
721
|
|
|
721
722
|
return CNVkitCall
|
|
722
723
|
|
|
723
|
-
def build(self):
|
|
724
|
-
self.options = DEFAULT_OPTS | self.options
|
|
725
|
-
|
|
726
|
-
MetaFile = self.build_metafile()
|
|
727
|
-
CNVkitAccess = self.build_cnvkit_access()
|
|
728
724
|
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
CNVkitGuessBaits = self.build_cnvkit_guessbaits(
|
|
732
|
-
MetaFile,
|
|
733
|
-
CNVkitAccess,
|
|
734
|
-
)
|
|
735
|
-
|
|
736
|
-
CNVkitAutobin = self.build_cnvkit_autobin(
|
|
737
|
-
MetaFile,
|
|
738
|
-
CNVkitAccess,
|
|
739
|
-
CNVkitGuessBaits,
|
|
740
|
-
)
|
|
725
|
+
if __name__ == "__main__":
|
|
726
|
+
from pipen_args import install # noqa: F401
|
|
741
727
|
|
|
742
|
-
|
|
743
|
-
MetaFile,
|
|
744
|
-
CNVkitAutobin,
|
|
745
|
-
anti=False,
|
|
746
|
-
)
|
|
747
|
-
CNVkitCoverageAntitarget = self.build_cnvkit_coverage(
|
|
748
|
-
MetaFile,
|
|
749
|
-
CNVkitAutobin,
|
|
750
|
-
anti=True,
|
|
751
|
-
)
|
|
752
|
-
CNVkitReference = self.build_cnvkit_reference(
|
|
753
|
-
MetaFile,
|
|
754
|
-
CNVkitCoverageTarget,
|
|
755
|
-
CNVkitCoverageAntitarget,
|
|
756
|
-
CNVkitAutobin,
|
|
757
|
-
)
|
|
758
|
-
CNVkitFix = self.build_cnvkit_fix(
|
|
759
|
-
MetaFile,
|
|
760
|
-
CNVkitCoverageTarget,
|
|
761
|
-
CNVkitCoverageAntitarget,
|
|
762
|
-
CNVkitReference,
|
|
763
|
-
)
|
|
764
|
-
CNVkitSegment = self.build_cnvkit_segment(MetaFile, CNVkitFix)
|
|
765
|
-
|
|
766
|
-
# end processes
|
|
767
|
-
self.build_cnvkit_scatter(MetaFile, CNVkitFix, CNVkitSegment)
|
|
768
|
-
self.build_cnvkit_diagram(MetaFile, CNVkitFix, CNVkitSegment)
|
|
769
|
-
self.build_cnvkit_heatmap_cns(MetaFile, CNVkitSegment)
|
|
770
|
-
if self.options.heatmap_cnr:
|
|
771
|
-
self.build_cnvkit_heatmap_cnr(MetaFile, CNVkitFix)
|
|
772
|
-
self.build_cnvkit_call(MetaFile, CNVkitFix, CNVkitSegment)
|
|
728
|
+
CNVkitPipeline().as_pipen().run()
|