PyPI - biopipen - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

biopipen 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (65) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.py +0 -5
biopipen/core/config.toml +4 -4
biopipen/core/defaults.py +3 -3
biopipen/core/filters.py +1 -0
biopipen/core/proc.py +1 -3
biopipen/core/testing.py +1 -2
biopipen/ns/bam.py +10 -14
biopipen/ns/bcftools.py +37 -7
biopipen/ns/bed.py +9 -16
biopipen/ns/cnv.py +8 -11
biopipen/ns/cnvkit.py +32 -59
biopipen/ns/cnvkit_pipeline.py +266 -310
biopipen/ns/csv.py +0 -2
biopipen/ns/gene.py +0 -1
biopipen/ns/gsea.py +4 -10
biopipen/ns/misc.py +0 -5
biopipen/ns/plot.py +2 -4
biopipen/ns/rnaseq.py +0 -1
biopipen/ns/scrna.py +78 -120
biopipen/ns/scrna_metabolic_landscape.py +306 -348
biopipen/ns/tcgamaf.py +52 -0
biopipen/ns/tcr.py +5 -15
biopipen/ns/vcf.py +52 -34
biopipen/ns/web.py +8 -19
biopipen/reports/bam/CNAClinic.svelte +1 -1
biopipen/reports/bam/CNVpytor.svelte +2 -2
biopipen/reports/bam/ControlFREEC.svelte +1 -1
biopipen/reports/cnv/AneuploidyScore.svelte +2 -2
biopipen/reports/cnv/AneuploidyScoreSummary.svelte +1 -1
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/gsea/FGSEA.svelte +1 -1
biopipen/reports/gsea/GSEA.svelte +2 -2
biopipen/reports/scrna/CellsDistribution.svelte +1 -1
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +42 -39
biopipen/reports/scrna/ScFGSEA.svelte +3 -3
biopipen/reports/scrna/SeuratClusterStats.svelte +3 -3
biopipen/reports/scrna/SeuratPreparing.svelte +2 -2
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubsets.svelte +2 -2
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +1 -1
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +1 -1
biopipen/reports/tcr/CloneResidency.svelte +4 -4
biopipen/reports/tcr/Immunarch.svelte +2 -2
biopipen/reports/tcr/SampleDiversity.svelte +2 -2
biopipen/reports/tcr/TCRClusteringStats.svelte +3 -3
biopipen/reports/tcr/VJUsage.svelte +1 -1
biopipen/reports/utils/gsea.liq +1 -1
biopipen/reports/utils/misc.liq +1 -1
biopipen/reports/vcf/TruvariBenchSummary.svelte +1 -1
biopipen/reports/vcf/TruvariConsistency.svelte +3 -3
biopipen/scripts/bcftools/BcftoolsSort.py +19 -0
biopipen/scripts/scrna/MarkersFinder.R +73 -35
biopipen/scripts/tcgamaf/Maf2Vcf.py +22 -0
biopipen/scripts/tcgamaf/MafAddChr.py +14 -0
biopipen/scripts/tcgamaf/maf2vcf.pl +427 -0
biopipen/scripts/vcf/VcfAnno.py +26 -0
biopipen/scripts/vcf/VcfFix_utils.py +3 -2
{biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/METADATA +7 -8
{biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/RECORD +65 -59
{biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/WHEEL +1 -1
{biopipen-0.7.0.dist-info → biopipen-0.8.0.dist-info}/entry_points.txt +2 -1

biopipen/ns/cnvkit_pipeline.py CHANGED Viewed

@@ -7,7 +7,7 @@ import pandas
 from diot import Diot
 from datar.tibble import tibble
 from biopipen.core.proc import Proc
-from pipen_cli_run import Pipeline, process
+from pipen_args.procgroup import ProcGroup
 from ..core.config import config
@@ -19,45 +19,6 @@ except ImportError:
 if TYPE_CHECKING:
     from pandas import DataFrame
-DEFAULT_COLS = Diot(
-    group="Group",
-    purity="Purity",
-    snpvcf="SnpVcf",
-    bam="Bam",
-    vcf_sample_id="VcfSampleId",
-    vcf_normal_id="VcfNormalId",
-    sex="Sex",
-    guess_baits="GuessBaits",
-)
-DEFAULT_OPTS = Diot(
-    metafile=None,
-    baitfile=None,
-    accfile=None,
-    cnvkit=config.exe.cnvkit,
-    convert=config.exe.convert,
-    rscript=config.lang.rscript,
-    samtools=config.exe.samtools,
-    ncores=config.misc.ncores,
-    reffa=config.ref.reffa,
-    annotate=config.ref.refflat,
-    short_names=True,
-    method="hybrid",
-    guessbaits=False,
-    heatmap_cnr=False,
-    case=None,
-    control=None,
-    access_excludes=[],
-    guessbaits_guided=None,
-    male_reference=False,
-    drop_low_coverage=False,
-    min_variant_depth=20,
-    no_gc=False,
-    no_edge=False,
-    no_rmask=False,
-    zygosity_freq=0.25,
-)
 @lru_cache()
 def _metadf(metafile: str) -> DataFrame:
@@ -78,7 +39,7 @@ class _MetaCol:
         return self.cols.get(name, self.default_cols[name])
-class CNVkitPipeline(Pipeline):
+class CNVkitPipeline(ProcGroup):
     """The CNVkit pipeline
     Unlike `cnvkit.py batch`, this decouples the steps of the `batch` command so
@@ -86,90 +47,6 @@ class CNVkitPipeline(Pipeline):
     The pipeline requires following options:
-    Input files:
-    - metafile: a tab-separated file (see the next section)
-    - baitfile: Potentially targeted genomic regions.
-        E.g. all possible exons for the reference genome.
-        This is optional when `method` is `wgs`.
-    - accfile: The accessible genomic regions.
-        If not given, use `cnvkit.py access` to generate one.
-    Special options:
-    - access_excludes: File(s) with regions to be excluded for
-        `cnvkit.py access`.
-    - guessbaits_guided: Whether to use guided mode for guessing baits.
-    - guessbaits: Guess the bait file from the bam files, either guided or
-        unguided.
-        If False, `baitfile` is used. Otherwise, if `baitfile` is given, use it
-        (guided), otherwise use `accfile` (unguided).
-        The bam files with `metacols.guess_baits` column set to `True`, `TRUE`,
-        `true`, `1`, `Yes`, `YES`, or `yes` will be used to guess the bait file.
-    - heatmap_cnr: Whether to generate a heatmap of the .cnr files
-        (bin-level signals). This is allowed to set to False, it will take
-        longer to run.
-    - case: The group name of samples in `metacols.group` to call CNVs for.
-        If not specified, use all samples. In such a case, `control` must not be
-        specified, as we are using a flat reference.
-    - control: The group name of samples in `metacols.group` to use as reference
-        if not specified, use a flat reference.
-    - metacols: The column names for each type of information in metafile
-        - group: The column name in the metafile that indicates the sample group
-            Default: `Group`
-        - purity: The column name in the metafile that indicates the sample
-            purity. Default: `Purity`
-        - snpvcf: The column name in the metafile that indicates the path to
-            the SNP VCF file. Default: `SnpVcf`
-        - bam: The column name in the metafile that indicates the path to the
-            BAM file. Default: `Bam`
-        - vcf_sample_id: The column name in the metafile that indicates the
-            sample ID in the VCF file. Default: `VcfSampleId`
-        - vcf_normal_id: The column name in the metafile that indicates the
-            normal sample ID in the VCF file. Default: `VcfNormalId`
-        - sex: The column name in the metafile that indicates the sample
-            sex. Default: `Sex`
-        - guess_baits: The column name in the metafile that indicates whether
-            to guess the bait file from the bam files. Default: `GuessBaits`
-    Global options that are used by multiple processes
-    (can be overriden individually by `[<proc>.envs.xxx]`):
-    - cnvkit: the path to the cnvkit.py executable, defaults to
-        `config.exe.cnvkit` from `./.biopipen.toml` or `~/.biopipen.toml`.
-    - rscript: Path to the Rscript excecutable to use for running R code.
-        Requires `DNAcopy` to be installed in R, defaults to
-        `config.lang.rscript`
-    - samtools: Path to samtools, used for guessing bait file.
-    - convert: Linux `convert` command to convert pdf to png
-        So that they can be embedded in the HTML report.
-    - ncores: number of cores to use, defaults to `config.misc.ncores`
-    - reffa: the reference genome (e.g. hg19.fa)
-        Used by `CNVkitAccess`, `CNVkitAutobin` and `CNVkitReference`
-    - annotate: Use gene models from this file to assign names to the
-        target regions. Format: UCSC refFlat.txt or ensFlat.txt file
-        (preferred), or BED, interval list, GFF, or similar.
-    - short_names: Reduce multi-accession bait labels to be short and consistent
-    - method: Sequencing protocol: hybridization capture ('hybrid'),
-        targeted amplicon sequencing ('amplicon'),
-        or whole genome sequencing ('wgs'). Determines
-        whether and how to use antitarget bins.
-    - male_reference: Use or assume a male reference (i.e. female samples
-        will have +1 log-CNR of chrX; otherwise male samples would have
-        -1 chrX).
-        Used by `CNVkitReference`, `CNVkitCall`, `CNVkitHeatmapCns` and
-        `CNVkitHeatmapCnr`.
-    - drop_low_coverage: Drop very-low-coverage bins before segmentation to
-        avoid false-positive deletions in poor-quality tumor samples.
-        Used by `CNVkitSegment` and `CNVkitCall`
-    - no_gc: Skip GC correction for `cnvkit.py reference/fix`.
-    - no_edge: Skip edge-effect correction for `cnvkit.py reference/fix`.
-    - no_rmask: Skip RepeatMasker correction for `cnvkit.py reference/fix`.
-        no_* options are used by `CNVkitReference` and `CNVkitFix`
-    - min_variant_depth: Minimum read depth for a SNV to be displayed
-        in the b-allele frequency plot.
-        Used by `CNVkitSegment` and `CNVkitCall`
-    - zygosity_freq: Ignore VCF's genotypes (GT field) and instead infer
-        zygosity from allele frequencies.
-        Used by `CNVkitSegment` and `CNVkitCall`
     Options for different processes can be specified by `[CNVkitXXX.envs.xxx]`
     See `biopipen.ns.cnvkit.CNVkitXXX` for more details.
@@ -190,8 +67,7 @@ class CNVkitPipeline(Pipeline):
     To run this pipeline from command line, with the `pipen-run` plugin:
     >>> # In this case, `pipeline.cnvkit_pipeline.metafile` must be provided
-    >>> pipen run cnvkit_pipeline CNVkitPipeline \
-    >>>     +config <config.toml> <other pipeline args>
+    >>> pipen run cnvkit_pipeline CNVkitPipeline <other pipeline args>
     To use this as a dependency for other pipelines:
     >>> from biopipen.ns.cnvkit_pipeline import CNVkitPipeline
@@ -199,17 +75,135 @@ class CNVkitPipeline(Pipeline):
     >>> # pipeline.starts: Start processes of the pipeline
     >>> # pipeline.ends: End processes of the pipeline
     >>> # pipeline.procs.<proc>: The process with name <proc>
-    """
-    defaults = config.pipeline.cnvkit_pipeline
+    Args:
+        metafile: a tab-separated file (see the next section)
+        baitfile: Potentially targeted genomic regions.
+            E.g. all possible exons for the reference genome.
+            This is optional when `method` is `wgs`.
+        accfile: The accessible genomic regions.
+            If not given, use `cnvkit.py access` to generate one.
+        access_excludes: File(s) with regions to be excluded for
+            `cnvkit.py access`.
+        guessbaits_guided: Whether to use guided mode for guessing baits.
+        guessbaits: Guess the bait file from the bam files, either guided or
+            unguided.
+            If False, `baitfile` is used. Otherwise, if `baitfile` is given,
+            use it (guided), otherwise use `accfile` (unguided).
+            The bam files with `metacols.guess_baits` column set to
+            `True`, `TRUE`, `true`, `1`, `Yes`, `YES`, or `yes`
+            will be used to guess the bait file.
+        heatmap_cnr: Whether to generate a heatmap of the .cnr files
+            (bin-level signals). This is allowed to set to False, it will take
+            longer to run.
+        case: The group name of samples in `metacols.group` to call CNVs for.
+            If not specified, use all samples. In such a case, `control` must
+            not be specified, as we are using a flat reference.
+        control: The group name of samples in `metacols.group` to use as
+            reference if not specified, use a flat reference.
+        cnvkit: the path to the cnvkit.py executable, defaults to
+            `config.exe.cnvkit` from `./.biopipen.toml` or `~/.biopipen.toml`.
+        rscript: Path to the Rscript excecutable to use for running R code.
+            Requires `DNAcopy` to be installed in R, defaults to
+            `config.lang.rscript`
+        samtools: Path to samtools, used for guessing bait file.
+        convert: Linux `convert` command to convert pdf to png
+            So that they can be embedded in the HTML report.
+        ncores: number of cores to use, defaults to `config.misc.ncores`
+        reffa: the reference genome (e.g. hg19.fa)
+            Used by `CNVkitAccess`, `CNVkitAutobin` and `CNVkitReference`
+        annotate: Use gene models from this file to assign names to the
+            target regions. Format: UCSC refFlat.txt or ensFlat.txt file
+            (preferred), or BED, interval list, GFF, or similar.
+        short_names: Reduce multi-accession bait labels to be short and
+            consistent
+        method: Sequencing protocol: hybridization capture ('hybrid'),
+            targeted amplicon sequencing ('amplicon'),
+            or whole genome sequencing ('wgs'). Determines
+            whether and how to use antitarget bins.
+        male_reference: Use or assume a male reference (i.e. female samples
+            will have +1 log-CNR of chrX; otherwise male samples would have
+            -1 chrX).
+            Used by `CNVkitReference`, `CNVkitCall`, `CNVkitHeatmapCns` and
+            `CNVkitHeatmapCnr`.
+        drop_low_coverage: Drop very-low-coverage bins before segmentation to
+            avoid false-positive deletions in poor-quality tumor samples.
+            Used by `CNVkitSegment` and `CNVkitCall`
+        no_gc: Skip GC correction for `cnvkit.py reference/fix`.
+        no_edge: Skip edge-effect correction for `cnvkit.py reference/fix`.
+        no_rmask: Skip RepeatMasker correction for `cnvkit.py reference/fix`.
+            no_* options are used by `CNVkitReference` and `CNVkitFix`
+        min_variant_depth: Minimum read depth for a SNV to be displayed
+            in the b-allele frequency plot.
+            Used by `CNVkitSegment` and `CNVkitCall`
+        zygosity_freq: Ignore VCF's genotypes (GT field) and instead infer
+            zygosity from allele frequencies.
+            Used by `CNVkitSegment` and `CNVkitCall`
+        metacols: The column names for each type of information in metafile
+            - group: The column name in the metafile that indicates the sample
+                group
+            - purity: The column name in the metafile that indicates the sample
+                purity
+            - snpvcf: The column name in the metafile that indicates the path to
+                the SNP VCF file
+            - bam: The column name in the metafile that indicates the path to
+                the BAM file
+            - vcf_sample_id: The column name in the metafile that indicates the
+                sample ID in the VCF file
+            - vcf_normal_id: The column name in the metafile that indicates the
+                normal sample ID in the VCF file
+            - sex: The column name in the metafile that indicates the sample sex
+            - guess_baits: The column name in the metafile that indicates
+                whether to guess the bait file from the bam files
+    """
+    DEFAULTS = Diot(
+        metafile=None,
+        baitfile=None,
+        accfile=None,
+        cnvkit=config.exe.cnvkit,
+        convert=config.exe.convert,
+        rscript=config.lang.rscript,
+        samtools=config.exe.samtools,
+        ncores=config.misc.ncores,
+        reffa=config.ref.reffa,
+        annotate=config.ref.refflat,
+        short_names=True,
+        method="hybrid",
+        guessbaits=False,
+        heatmap_cnr=False,
+        case=None,
+        control=None,
+        access_excludes=[],
+        guessbaits_guided=None,
+        male_reference=False,
+        drop_low_coverage=False,
+        min_variant_depth=20,
+        no_gc=False,
+        no_edge=False,
+        no_rmask=False,
+        zygosity_freq=0.25,
+        metacols=Diot(
+            group="Group",
+            purity="Purity",
+            snpvcf="SnpVcf",
+            bam="Bam",
+            vcf_sample_id="VcfSampleId",
+            vcf_normal_id="VcfNormalId",
+            sex="Sex",
+            guess_baits="GuessBaits",
+        ),
+    )
     @cached_property
     def col(self):
         """Get the column names by self.col.<colname>"""
-        return _MetaCol(self.options.get("metacols"), DEFAULT_COLS)
+        return _MetaCol(
+            self.opts.get("metacols"),
+            self.__class__.DEFAULTS.metacols,
+        )
-    @process(start=True)
-    def build_metafile(self):
+    @ProcGroup.add_proc
+    def p_metafile(self):
         """Build MetaFile process"""
         from .misc import File2Proc
@@ -220,23 +214,23 @@ class CNVkitPipeline(Pipeline):
             # Remember to set the dependency in the pipeline:
             # >>> pipeline.procs.MetaFile.requires = [other_pipeline.procs]
             # where other_pipeline.procs generate the metafile
-            if self.options.metafile:
-                input_data = [self.options.metafile]
+            if self.opts.metafile:
+                input_data = [self.opts.metafile]
         return MetaFile
-    @process(start=True)
-    def build_cnvkit_access(self):
+    @ProcGroup.add_proc
+    def p_cnvkit_access(self):
         """Build CNVkitAccess process"""
-        if self.options.get("accfile"):
+        if self.opts.get("accfile"):
             from .misc import File2Proc
             class CNVkitAccess(File2Proc):
-                input_data = [self.options.accfile]
+                input_data = [self.opts.accfile]
         else:
             from .cnvkit import CNVkitAccess
-            excludes = self.options.get("excludes", [])
+            excludes = self.opts.get("excludes", [])
             if not isinstance(excludes, (list, tuple)):
                 excludes = [excludes]
@@ -244,18 +238,21 @@ class CNVkitPipeline(Pipeline):
                 # can be overwritten by [CNVkitAccess.in.exludes]
                 input_data = [excludes]
                 envs = {
-                    "cnvkit": self.options.cnvkit,
-                    "ref": self.options.reffa,
+                    "cnvkit": self.opts.cnvkit,
+                    "ref": self.opts.reffa,
                 }
         return CNVkitAccess
-    @process
-    def build_cnvkit_guessbaits(self, metafile_proc, access_proc):
+    @ProcGroup.add_proc
+    def p_cnvkit_guessbaits(self):
         """Build CNVkitGuessBaits process"""
         from .cnvkit import CNVkitGuessBaits
-        if self.options.guessbaits_guided is None:
+        if not self.opts.guessbaits:
+            return None
+        if self.opts.guessbaits_guided is None:
             raise ValueError(
                 "`guessbaits.guided` must be specified, expecting True or False"
             )
@@ -281,8 +278,8 @@ class CNVkitPipeline(Pipeline):
                 self.col.bam,
             ].tolist()
-        if self.options.guessbaits_guided:
-            if not self.options.baitfile:
+        if self.opts.guessbaits_guided:
+            if not self.opts.baitfile:
                 raise ValueError(
                     "`baitfile` must be specified for guided mode "
                     "to guess baits. See: "
@@ -290,78 +287,81 @@ class CNVkitPipeline(Pipeline):
                 )
             class CNVkitGuessBaits(CNVkitGuessBaits):
-                requires = metafile_proc
+                requires = self.p_metafile
                 input_data = lambda metafile_ch: tibble(
                     bamfiles=[_guess_baits_bams(metafile_ch)],
-                    atfile=self.options.baitfile,
+                    atfile=self.opts.baitfile,
                 )
                 envs = {
-                    "cnvkit": self.options.cnvkit,
-                    "samtools": self.options.samtools,
-                    "ncores": self.options.ncores,
-                    "ref": self.options.reffa,
+                    "cnvkit": self.opts.cnvkit,
+                    "samtools": self.opts.samtools,
+                    "ncores": self.opts.ncores,
+                    "ref": self.opts.reffa,
                     "guided": True,
                 }
         else:  # unguided
             class CNVkitGuessBaits(CNVkitGuessBaits):
-                requires = metafile_proc, access_proc
+                requires = self.p_metafile, self.p_cnvkit_access
                 input_data = lambda metafile_ch, access_ch: tibble(
                     bamfiles=[_guess_baits_bams(metafile_ch)],
                     accessfile=_1st(access_ch),
                 )
                 envs = {
-                    "cnvkit": self.options.cnvkit,
-                    "samtools": self.options.samtools,
-                    "ncores": self.options.ncores,
-                    "ref": self.options.reffa,
+                    "cnvkit": self.opts.cnvkit,
+                    "samtools": self.opts.samtools,
+                    "ncores": self.opts.ncores,
+                    "ref": self.opts.reffa,
                     "guided": False,
                 }
         return CNVkitGuessBaits
-    @process
-    def build_cnvkit_autobin(self, metafile_proc, access_proc, guessbaits_proc):
+    @ProcGroup.add_proc
+    def p_cnvkit_autobin(self):
         """Build CNVkitAutobin process"""
         from .cnvkit import CNVkitAutobin
         class CNVkitAutobin(CNVkitAutobin):
-            if guessbaits_proc:
-                requires = metafile_proc, access_proc, guessbaits_proc
+            if self.p_cnvkit_guessbaits:
+                requires = (
+                    self.p_metafile,
+                    self.p_cnvkit_access,
+                    self.p_cnvkit_guessbaits,
+                )
                 input_data = lambda ch1, ch2, ch3: tibble(
                     bamfiles=[_metadf(_1st(ch1))[self.col.bam].tolist()],
                     accfile=_1st(ch2),
                     baitfile=(
                         _1st(ch3)
-                        if self.options.guessbaits
-                        else self.options.baitfile
+                        if self.opts.guessbaits
+                        else self.opts.baitfile
                     ),
                 )
             else:
-                requires = metafile_proc, access_proc
+                requires = self.p_metafile, self.p_cnvkit_access
                 input_data = lambda ch1, ch2: tibble(
                     bamfiles=[_metadf(_1st(ch1))[self.col.bam].tolist()],
                     accfile=_1st(ch2),
-                    baitfile=self.options.baitfile,
+                    baitfile=self.opts.baitfile,
                 )
             envs = {
-                "cnvkit": self.options.cnvkit,
-                "method": self.options.method,
-                "annotate": self.options.annotate,
-                "short_names": self.options.short_names,
-                "ref": self.options.reffa,
+                "cnvkit": self.opts.cnvkit,
+                "method": self.opts.method,
+                "annotate": self.opts.annotate,
+                "short_names": self.opts.short_names,
+                "ref": self.opts.reffa,
             }
         return CNVkitAutobin
-    @process
-    def build_cnvkit_coverage(self, metafile_proc, autobin_proc, anti):
+    def _p_cnvkit_coverage(self, anti: bool):
         """Build CNVkitTargetCoverage and CNVkitAntiTargetCoverage processes"""
         from .cnvkit import CNVkitCoverage
         return Proc.from_proc(
             CNVkitCoverage,
             name="CNVkitCoverageAnittarget" if anti else "CNVkitCoverageTarget",
-            requires=[metafile_proc, autobin_proc],
+            requires=[self.p_metafile, self.p_cnvkit_autobin],
             input_data=lambda ch1, ch2: tibble(
                 _metadf(_1st(ch1))[self.col.bam].tolist(),
                 target_file=ch2[
@@ -369,29 +369,33 @@ class CNVkitPipeline(Pipeline):
                 ].tolist()[0],
             ),
             envs={
-                "cnvkit": self.options.cnvkit,
-                "ncores": self.options.ncores,
-                "ref": self.options.reffa,
+                "cnvkit": self.opts.cnvkit,
+                "ncores": self.opts.ncores,
+                "ref": self.opts.reffa,
             }
         )
-    @process
-    def build_cnvkit_reference(
-        self,
-        metafile_proc,
-        target_coverage_proc,
-        antitarget_coverage_proc,
-        autobin_proc,
-    ):
+    @ProcGroup.add_proc
+    def p_cnvkit_coverage_target(self):
+        """Build CNVkitCoverageTarget process"""
+        return self._p_cnvkit_coverage(anti=False)
+    @ProcGroup.add_proc
+    def p_cnvkit_coverage_antitarget(self):
+        """Build CNVkitCoverageAntiTarget process"""
+        return self._p_cnvkit_coverage(anti=True)
+    @ProcGroup.add_proc
+    def p_cnvkit_reference(self):
         """Build CNVkitReference process"""
         from .cnvkit import CNVkitReference
         def _input_data(ch1, ch2, ch3, ch4):
             metadf = _metadf(_1st(ch1))
-            if self.options.control:
+            if self.opts.control:
                 # Use control samples to build reference
-                control_masks = metadf[self.col.group] == self.options.control
+                control_masks = metadf[self.col.group] == self.opts.control
                 covfiles = [
                     ch2.outfile[control_masks].tolist()
                     + ch3.outfile[control_masks].tolist()
@@ -418,34 +422,28 @@ class CNVkitPipeline(Pipeline):
         class CNVkitReference(CNVkitReference):
             requires = [
-                metafile_proc,
-                target_coverage_proc,
-                antitarget_coverage_proc,
-                autobin_proc,
+                self.p_metafile,
+                self.p_cnvkit_coverage_target,
+                self.p_cnvkit_coverage_antitarget,
+                self.p_cnvkit_autobin,
             ]
             input_data = _input_data
             envs = {
-                "cnvkit": self.options.cnvkit,
-                "no_gc": self.options.no_gc,
-                "no_edge": self.options.no_edge,
-                "no_rmask": self.options.no_rmask,
-                "ref": self.options.reffa,
+                "cnvkit": self.opts.cnvkit,
+                "no_gc": self.opts.no_gc,
+                "no_edge": self.opts.no_edge,
+                "no_rmask": self.opts.no_rmask,
+                "ref": self.opts.reffa,
             }
         return CNVkitReference
-    @process
-    def build_cnvkit_fix(
-        self,
-        metafile_proc,
-        target_coverage_proc,
-        antitarget_coverage_proc,
-        reference_proc,
-    ):
+    @ProcGroup.add_proc
+    def p_cnvkit_fix(self):
         """Build CNVkitFix process"""
         from .cnvkit import CNVkitFix
-        if not self.options.case and self.options.control:
+        if not self.opts.case and self.opts.control:
             raise ValueError(
                 "`case` is not specified, meaning using all samples as cases, "
                 "but `control` is specified (we can only use a flat reference "
@@ -454,10 +452,10 @@ class CNVkitPipeline(Pipeline):
         def _input_data(ch1, ch2, ch3, ch4):
             metadf = _metadf(_1st(ch1))
-            if not self.options.case:
+            if not self.opts.case:
                 tumor_masks = [True] * len(metadf)
             else:
-                tumor_masks = metadf[self.col.group] == self.options.case
+                tumor_masks = metadf[self.col.group] == self.opts.case
             return tibble(
                 target_file=ch2.outfile[tumor_masks],
@@ -468,32 +466,32 @@ class CNVkitPipeline(Pipeline):
         class CNVkitFix(CNVkitFix):
             requires = [
-                metafile_proc,
-                target_coverage_proc,
-                antitarget_coverage_proc,
-                reference_proc,
+                self.p_metafile,
+                self.p_cnvkit_coverage_target,
+                self.p_cnvkit_coverage_antitarget,
+                self.p_cnvkit_reference,
             ]
             input_data = _input_data
             envs = {
-                "cnvkit": self.options.cnvkit,
-                "no_gc": self.options.no_gc,
-                "no_edge": self.options.no_edge,
-                "no_rmask": self.options.no_rmask,
+                "cnvkit": self.opts.cnvkit,
+                "no_gc": self.opts.no_gc,
+                "no_edge": self.opts.no_edge,
+                "no_rmask": self.opts.no_rmask,
             }
         return CNVkitFix
-    @process
-    def build_cnvkit_segment(self, metafile_proc, fix_proc):
+    @ProcGroup.add_proc
+    def p_cnvkit_segment(self):
         """Build CNVkitSegment process"""
         from .cnvkit import CNVkitSegment
         def _input_data(ch1, ch2):
             metadf = _metadf(_1st(ch1))
-            if not self.options.case:
+            if not self.opts.case:
                 tumor_masks = [True] * len(metadf)
             else:
-                tumor_masks = metadf[self.col.group] == self.options.case
+                tumor_masks = metadf[self.col.group] == self.opts.case
             return tibble(
                 chrfile=ch2.outfile,
@@ -515,27 +513,27 @@ class CNVkitPipeline(Pipeline):
             )
         class CNVkitSegment(CNVkitSegment):
-            requires = metafile_proc, fix_proc
+            requires = self.p_metafile, self.p_cnvkit_fix
             input_data = _input_data
             envs = {
-                "cnvkit": self.options.cnvkit,
-                "rscript": self.options.rscript,
-                "ncores": self.options.ncores,
+                "cnvkit": self.opts.cnvkit,
+                "rscript": self.opts.rscript,
+                "ncores": self.opts.ncores,
             }
         return CNVkitSegment
-    @process(end=True)
-    def build_cnvkit_scatter(self, metafile_proc, fix_proc, segment_proc):
+    @ProcGroup.add_proc
+    def p_cnvkit_scatter(self):
         """Build CNVkitScatter process"""
         from .cnvkit import CNVkitScatter
         def _input_data(ch1, ch2, ch3):
             metadf = _metadf(_1st(ch1))
-            if not self.options.case:
+            if not self.opts.case:
                 tumor_masks = [True] * len(metadf)
             else:
-                tumor_masks = metadf[self.col.group] == self.options.case
+                tumor_masks = metadf[self.col.group] == self.opts.case
             return tibble(
                 chrfile=ch2.outfile,
@@ -558,26 +556,26 @@ class CNVkitPipeline(Pipeline):
             )
         class CNVkitScatter(CNVkitScatter):
-            requires = metafile_proc, fix_proc, segment_proc
+            requires = self.p_metafile, self.p_cnvkit_fix, self.p_cnvkit_segment
             input_data = _input_data
             envs = {
-                "cnvkit": self.options.cnvkit,
-                "convert": self.options.convert,
+                "cnvkit": self.opts.cnvkit,
+                "convert": self.opts.convert,
             }
         return CNVkitScatter
-    @process(end=True)
-    def build_cnvkit_diagram(self, metafile_proc, fix_proc, segment_proc):
+    @ProcGroup.add_proc
+    def p_cnvkit_diagram(self):
         """Build CNVkitDiagram process"""
         from .cnvkit import CNVkitDiagram
         def _input_data(ch1, ch2, ch3):
             metadf = _metadf(_1st(ch1))
-            if not self.options.case:
+            if not self.opts.case:
                 tumor_masks = [True] * len(metadf)
             else:
-                tumor_masks = metadf[self.col.group] == self.options.case
+                tumor_masks = metadf[self.col.group] == self.opts.case
             return tibble(
                 chrfile=ch2.outfile,
@@ -590,26 +588,26 @@ class CNVkitPipeline(Pipeline):
             )
         class CNVkitDiagram(CNVkitDiagram):
-            requires = metafile_proc, fix_proc, segment_proc
+            requires = self.p_metafile, self.p_cnvkit_fix, self.p_cnvkit_segment
             input_data = _input_data
             envs = {
-                "cnvkit": self.options.cnvkit,
-                "convert": self.options.convert,
+                "cnvkit": self.opts.cnvkit,
+                "convert": self.opts.convert,
             }
         return CNVkitDiagram
-    @process(end=True)
-    def build_cnvkit_heatmap_cns(self, metafile_proc, segment_proc):
+    @ProcGroup.add_proc
+    def p_cnvkit_heatmap_cns(self):
         """Build CNVkitHeatmapCns process"""
         from .cnvkit import CNVkitHeatmap
         def _input_data(ch1, ch2):
             metadf = _metadf(_1st(ch1))
-            if not self.options.case:
+            if not self.opts.case:
                 tumor_masks = [True] * len(metadf)
             else:
-                tumor_masks = metadf[self.col.group] == self.options.case
+                tumor_masks = metadf[self.col.group] == self.opts.case
             return tibble(
                 segfiles=[ch2.outfile.tolist()],
@@ -622,27 +620,30 @@ class CNVkitPipeline(Pipeline):
         class CNVkitHeatmapCns(CNVkitHeatmap):
             """Heatmap of segment-level signals of multiple samples"""
-            requires = metafile_proc, segment_proc
+            requires = self.p_metafile, self.p_cnvkit_segment
             input_data = _input_data
             envs = {
-                "cnvkit": self.options.cnvkit,
-                "convert": self.options.convert,
-                "male_reference": self.options.male_reference,
+                "cnvkit": self.opts.cnvkit,
+                "convert": self.opts.convert,
+                "male_reference": self.opts.male_reference,
             }
         return CNVkitHeatmapCns
-    @process(end=True)
-    def build_cnvkit_heatmap_cnr(self, metafile_proc, fix_proc):
+    @ProcGroup.add_proc
+    def p_cnvkit_heatmap_cnr(self):
         """Build CNVkitHeatmapCnr process"""
         from .cnvkit import CNVkitHeatmap
+        if not self.opts.heatmap_cnr:
+            return None
         def _input_data(ch1, ch2):
             metadf = _metadf(_1st(ch1))
-            if not self.options.case:
+            if not self.opts.case:
                 tumor_masks = [True] * len(metadf)
             else:
-                tumor_masks = metadf[self.col.group] == self.options.case
+                tumor_masks = metadf[self.col.group] == self.opts.case
             return tibble(
                 segfiles=[ch2.outfile.tolist()],
@@ -655,27 +656,27 @@ class CNVkitPipeline(Pipeline):
         class CNVkitHeatmapCnr(CNVkitHeatmap):
             """Heatmap of bin-level signals of multiple samples"""
-            requires = metafile_proc, fix_proc
+            requires = self.p_metafile, self.p_cnvkit_fix
             input_data = _input_data
             envs = {
-                "cnvkit": self.options.cnvkit,
-                "convert": self.options.convert,
-                "male_reference": self.options.male_reference,
+                "cnvkit": self.opts.cnvkit,
+                "convert": self.opts.convert,
+                "male_reference": self.opts.male_reference,
             }
         return CNVkitHeatmapCnr
-    @process(end=True)
-    def build_cnvkit_call(self, metafile_proc, fix_proc, segment_proc):
+    @ProcGroup.add_proc
+    def p_cnvkit_call(self):
         """Build CNVkitCall process"""
         from .cnvkit import CNVkitCall
         def _input_data(ch1, ch2, ch3):
             metadf = _metadf(_1st(ch1))
-            if not self.options.case:
+            if not self.opts.case:
                 tumor_masks = [True] * len(metadf)
             else:
-                tumor_masks = metadf[self.col.group] == self.options.case
+                tumor_masks = metadf[self.col.group] == self.opts.case
             return tibble(
                 cnrfile=ch2.outfile,
@@ -708,65 +709,20 @@ class CNVkitPipeline(Pipeline):
             )
         class CNVkitCall(CNVkitCall):
-            requires = metafile_proc, fix_proc, segment_proc
+            requires = self.p_metafile, self.p_cnvkit_fix, self.p_cnvkit_segment
             input_data = _input_data
             envs = {
-                "cnvkit": self.options.cnvkit,
-                "drop_low_coverage": self.options.drop_low_coverage,
-                "male_reference": self.options.male_reference,
-                "min_variant_depth": self.options.min_variant_depth,
-                "zygosity_freq": self.options.zygosity_freq,
+                "cnvkit": self.opts.cnvkit,
+                "drop_low_coverage": self.opts.drop_low_coverage,
+                "male_reference": self.opts.male_reference,
+                "min_variant_depth": self.opts.min_variant_depth,
+                "zygosity_freq": self.opts.zygosity_freq,
             }
         return CNVkitCall
-    def build(self):
-        self.options = DEFAULT_OPTS | self.options
-        MetaFile = self.build_metafile()
-        CNVkitAccess = self.build_cnvkit_access()
-        CNVkitGuessBaits = None
-        if self.options.guessbaits:
-            CNVkitGuessBaits = self.build_cnvkit_guessbaits(
-                MetaFile,
-                CNVkitAccess,
-            )
-        CNVkitAutobin = self.build_cnvkit_autobin(
-            MetaFile,
-            CNVkitAccess,
-            CNVkitGuessBaits,
-        )
+if __name__ == "__main__":
+    from pipen_args import install  # noqa: F401
-        CNVkitCoverageTarget = self.build_cnvkit_coverage(
-            MetaFile,
-            CNVkitAutobin,
-            anti=False,
-        )
-        CNVkitCoverageAntitarget = self.build_cnvkit_coverage(
-            MetaFile,
-            CNVkitAutobin,
-            anti=True,
-        )
-        CNVkitReference = self.build_cnvkit_reference(
-            MetaFile,
-            CNVkitCoverageTarget,
-            CNVkitCoverageAntitarget,
-            CNVkitAutobin,
-        )
-        CNVkitFix = self.build_cnvkit_fix(
-            MetaFile,
-            CNVkitCoverageTarget,
-            CNVkitCoverageAntitarget,
-            CNVkitReference,
-        )
-        CNVkitSegment = self.build_cnvkit_segment(MetaFile, CNVkitFix)
-        # end processes
-        self.build_cnvkit_scatter(MetaFile, CNVkitFix, CNVkitSegment)
-        self.build_cnvkit_diagram(MetaFile, CNVkitFix, CNVkitSegment)
-        self.build_cnvkit_heatmap_cns(MetaFile, CNVkitSegment)
-        if self.options.heatmap_cnr:
-            self.build_cnvkit_heatmap_cnr(MetaFile, CNVkitFix)
-        self.build_cnvkit_call(MetaFile, CNVkitFix, CNVkitSegment)
+    CNVkitPipeline().as_pipen().run()

biopipen 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl