PyPI - biopipen - Versions diffs - 0.32.3__py3-none-any.whl → 0.33.1__py3-none-any.whl - Mend

biopipen 0.32.3py3-none-any.whl → 0.33.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (118) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +6 -0
biopipen/core/filters.py +35 -23
biopipen/core/testing.py +6 -1
biopipen/ns/bam.py +39 -0
biopipen/ns/cellranger.py +5 -0
biopipen/ns/cellranger_pipeline.py +2 -2
biopipen/ns/cnvkit_pipeline.py +4 -1
biopipen/ns/delim.py +33 -27
biopipen/ns/protein.py +99 -0
biopipen/ns/scrna.py +428 -250
biopipen/ns/snp.py +16 -3
biopipen/ns/tcr.py +125 -1
biopipen/ns/vcf.py +34 -0
biopipen/ns/web.py +5 -1
biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
biopipen/reports/tcr/ClonalStats.svelte +15 -0
biopipen/reports/utils/misc.liq +20 -7
biopipen/scripts/bam/BamMerge.py +2 -2
biopipen/scripts/bam/BamSampling.py +4 -4
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +3 -3
biopipen/scripts/bam/CNVpytor.py +10 -10
biopipen/scripts/bam/ControlFREEC.py +11 -11
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +20 -9
biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/SampleInfo.R +94 -148
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +4 -4
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifScan.py +8 -8
biopipen/scripts/scrna/CellCellCommunication.py +59 -22
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MarkersFinder.R +273 -654
biopipen/scripts/scrna/RadarPlots.R +73 -53
biopipen/scripts/scrna/SCP-plot.R +15202 -0
biopipen/scripts/scrna/ScVelo.py +0 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -31
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -54
biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -403
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +32 -17
biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -239
biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
biopipen/scripts/scrna/SeuratMap2Ref.R +16 -12
biopipen/scripts/scrna/SeuratPreparing.R +138 -81
biopipen/scripts/scrna/SlingShot.R +71 -0
biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
biopipen/scripts/snp/Plink2GTMat.py +26 -11
biopipen/scripts/snp/PlinkFilter.py +7 -7
biopipen/scripts/snp/PlinkFromVcf.py +8 -5
biopipen/scripts/snp/PlinkSimulation.py +4 -4
biopipen/scripts/snp/PlinkUpdateName.py +4 -4
biopipen/scripts/stats/ChowTest.R +48 -22
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/ClonalStats.R +484 -0
biopipen/scripts/tcr/ScRepLoading.R +127 -0
biopipen/scripts/tcr/TCRDock.py +10 -6
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +4 -4
biopipen/scripts/vcf/BcftoolsView.py +5 -5
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +12 -3
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +3 -3
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
biopipen/scripts/web/gcloud_common.py +1 -1
biopipen/utils/gsea.R +75 -35
biopipen/utils/misc.R +205 -7
biopipen/utils/misc.py +17 -8
biopipen/utils/reference.py +11 -11
biopipen/utils/repr.R +146 -0
biopipen/utils/vcf.py +1 -1
{biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/METADATA +8 -8
{biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/RECORD +115 -105
{biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/WHEEL +1 -1
biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -144
biopipen/scripts/scrna/SeuratPreparing-common.R +0 -467
biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -204
{biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/entry_points.txt +0 -0

biopipen/scripts/cnvkit/CNVkitHeatmap.py CHANGED Viewed

@@ -4,9 +4,9 @@ from diot import Diot
 from biopipen.utils.misc import run_command, dict_to_cli_args
-segfiles = {{in.segfiles | repr}}  # pyright: ignore # noqa
+segfiles = {{in.segfiles | repr}}  # pyright: ignore # noqa  # noqa
 sample_sex = {{in.sample_sex | repr}}  # pyright: ignore
-outdir = {{out.outdir | repr}}  # pyright: ignore
+outdir: str = {{out.outdir | repr}}  # pyright: ignore
 cnvkit = {{envs.cnvkit | quote}}  # pyright: ignore
 convert = {{envs.convert | quote}}  # pyright: ignore
 convert_args = {{envs.convert_args | repr}}  # pyright: ignore
@@ -16,7 +16,7 @@ desaturate= {{ envs.desaturate | repr}}  # pyright: ignore
 male_reference= {{ envs.male_reference | repr}}  # pyright: ignore
 no_shift_xy= {{ envs.no_shift_xy | repr}}  # pyright: ignore
 order = {{envs.order | repr}}  # pyright: ignore
-cases = {{envs.cases | repr}}  # pyright: ignore
+cases: dict | None = {{envs.cases | repr}}  # pyright: ignore
 def parse_order(files, orderfile):
@@ -70,7 +70,7 @@ def do_case(name, case):
     args[""] = [cnvkit, "heatmap"]
     run_command(dict_to_cli_args(args, dashify=True), fg=True)
-    conv_args = dict(**conv_args, _=[pdffile, pngfile])
+    conv_args: dict = dict(**conv_args, _=[pdffile, pngfile])
     conv_args[""] = [convert]
     run_command(
         dict_to_cli_args(conv_args, dashify=True, prefix="-"),

biopipen/scripts/cnvkit/CNVkitReference.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from pathlib import Path
 from biopipen.utils.misc import run_command, dict_to_cli_args
-covfiles = {{in.covfiles | repr}}  # pyright: ignore
+covfiles = {{in.covfiles | repr}}  # pyright: ignore  # noqa
 target_file = {{in.target_file | repr}}  # pyright: ignore
 antitarget_file = {{in.antitarget_file | repr}}  # pyright: ignore
 sample_sex = {{in.sample_sex | repr}}  # pyright: ignore
 outfile = {{out.outfile | quote}}  # pyright: ignore
-reffile = {{envs.ref | repr}}  # pyright: ignore
+reffile: str = {{envs.ref | quote}}  # pyright: ignore
 cnvkit = {{envs.cnvkit | quote}}  # pyright: ignore
 cluster = {{envs.cluster | repr}}  # pyright: ignore
 min_cluster_size = {{envs.min_cluster_size | repr}}  # pyright: ignore

biopipen/scripts/cnvkit/CNVkitScatter.py CHANGED Viewed

@@ -4,14 +4,14 @@ from diot import Diot
 from biopipen.utils.misc import run_command, dict_to_cli_args
-cnrfile = {{in.cnrfile | quote}}  # pyright: ignore
+cnrfile = {{in.cnrfile | quote}}  # pyright: ignore  # noqa
 cnsfile = {{in.cnsfile | quote}}  # pyright: ignore
 convert = {{envs.convert | quote}}  # pyright: ignore
 convert_args = {{envs.convert_args | repr}}  # pyright: ignore
 vcf = {{in.vcf | repr}}  # pyright: ignore
 sample_id = {{in.sample_id | repr}}  # pyright: ignore
 normal_id = {{in.normal_id | repr}}  # pyright: ignore
-outdir = {{out.outdir | quote}}  # pyright: ignore
+outdir: str = {{out.outdir | quote}}  # pyright: ignore
 cnvkit = {{envs.cnvkit | quote}}  # pyright: ignore
 chromosome = {{envs.chromosome | repr}}  # pyright: ignore
 gene = {{envs.gene | repr}}  # pyright: ignore
@@ -25,7 +25,7 @@ y_min = {{envs.y_min | repr}}  # pyright: ignore
 min_variant_depth = {{envs.min_variant_depth | repr}}  # pyright: ignore
 zygosity_freq = {{envs.zygosity_freq | repr}}  # pyright: ignore
 title = {{envs.title | repr}}  # pyright: ignore
-cases = {{envs.cases | repr}}  # pyright: ignore
+cases: dict | None = {{envs.cases | repr}}  # pyright: ignore
 def do_case(name, case):
@@ -50,7 +50,7 @@ def do_case(name, case):
     pdffile = Path(outdir).joinpath(f"{name}.heatmap.pdf")
     pngfile = Path(outdir).joinpath(f"{name}.heatmap.png")
-    args = dict(
+    args: dict = dict(
         **case,
         s=cnsfile,
         o=pdffile,
@@ -62,7 +62,7 @@ def do_case(name, case):
     args[""] = [cnvkit, "scatter"]
     run_command(dict_to_cli_args(args, dashify=True), fg=True)
-    conv_args = dict(**conv_args, _=[pdffile, pngfile])
+    conv_args: dict = dict(**conv_args, _=[pdffile, pngfile])
     conv_args[""] = [convert]
     run_command(
         dict_to_cli_args(conv_args, dashify=True, prefix="-"),

biopipen/scripts/cnvkit/CNVkitSegment.py CHANGED Viewed

@@ -2,11 +2,11 @@ from pathlib import Path
 from biopipen.utils.misc import run_command, dict_to_cli_args
-cnrfile = {{in.cnrfile | quote}}  # pyright: ignore
+cnrfile = {{in.cnrfile | quote}}  # pyright: ignore  # noqa
 vcf = {{in.vcf | repr}}  # pyright: ignore
 sample_id = {{in.sample_id | repr}}  # pyright: ignore
 normal_id = {{in.normal_id | repr}}  # pyright: ignore
-outfile = {{out.outfile | quote}}  # pyright: ignore
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
 cnvkit = {{envs.cnvkit | quote}}  # pyright: ignore
 method = {{envs.method | quote}}  # pyright: ignore
 threshold = {{envs.threshold | repr}}  # pyright: ignore
@@ -21,7 +21,7 @@ zygosity_freq = {{envs.zygosity_freq | repr}}  # pyright: ignore
 def main():
-    args = dict(
+    args: dict = dict(
         o=outfile,
         d=Path(outfile).parent / "intermediate.rds",
         m=method,
@@ -39,8 +39,8 @@ def main():
         _=cnrfile,
     )
     args[""] = [cnvkit, "segment"]
-    args = dict_to_cli_args(args, dashify=True)
-    run_command(args, fg=True)
+    cmd_args = dict_to_cli_args(args, dashify=True)
+    run_command(cmd_args, fg=True)
 if __name__ == "__main__":

biopipen/scripts/cnvkit/guess_baits.py CHANGED Viewed

@@ -25,10 +25,10 @@ import sys
 import numpy as np
 import pandas as pd
-import cnvlib
-from cnvlib import parallel
-from cnvlib.descriptives import modal_location
-from skgenome import tabio, GenomicArray as GA
+import cnvlib  # type: ignore
+from cnvlib import parallel  # type: ignore
+from cnvlib.descriptives import modal_location  # type: ignore
+from skgenome import tabio, GenomicArray as GA  # type: ignore
 logging.basicConfig(level=logging.INFO, format="%(message)s")
@@ -36,11 +36,12 @@ logging.basicConfig(level=logging.INFO, format="%(message)s")
 # ___________________________________________
 # Guided method: guess from potential targets
 def filter_targets(target_bed, sample_bams, procs, fasta):
     """Check if each potential target has significant coverage."""
     try:
-        baits = tabio.read(target_bed, 'bed4')
-    except:
+        baits = tabio.read(target_bed, "bed4")
+    except:  # noqa
         raise RuntimeError("Targets must be in BED format; try skg_convert.py")
     logging.info("Loaded %d candidate regions from %s", len(baits), target_bed)
     # Loop over BAMs to calculate weighted averages of bin coverage depths
@@ -48,47 +49,46 @@ def filter_targets(target_bed, sample_bams, procs, fasta):
     for bam_fname in sample_bams:
         logging.info("Evaluating targets in %s", bam_fname)
         sample = cnvlib.do_coverage(target_bed, bam_fname, processes=procs, fasta=fasta)
-        assert len(sample) == len(baits), \
-                "%d != %d" % (len(sample), len(baits))
-        total_depths += sample['depth'].values
-    baits['depth'] = total_depths / len(sample_bams)
-    logging.info("Average candidate-target depth:\n%s",
-                 baits['depth'].describe())
+        assert len(sample) == len(baits), "%d != %d" % (len(sample), len(baits))
+        total_depths += sample["depth"].values
+    baits["depth"] = total_depths / len(sample_bams)
+    logging.info("Average candidate-target depth:\n%s", baits["depth"].describe())
     return baits
 # _________________________________________
 # Unguided method: guess from raw depths
-def scan_targets(access_bed, sample_bams, min_depth, min_gap, min_length,
-                 procs):
+def scan_targets(access_bed, sample_bams, min_depth, min_gap, min_length, procs):
     """Estimate baited regions from a genome-wide, per-base depth profile."""
     bait_chunks = []
     # ENH: context manager to call rm on bed chunks? with to_chunks as pool, ck?
-    logging.info("Scanning for enriched regions in:\n  %s",
-                 '\n  '.join(sample_bams))
+    logging.info("Scanning for enriched regions in:\n  %s", "\n  ".join(sample_bams))
     #  with futures.ProcessPoolExecutor(procs) as pool:
     with parallel.pick_pool(procs) as pool:
-        args_iter = ((bed_chunk, sample_bams,
-                    min_depth, min_gap, min_length)
-                    for bed_chunk in parallel.to_chunks(access_bed))
+        args_iter = (
+            (bed_chunk, sample_bams, min_depth, min_gap, min_length)
+            for bed_chunk in parallel.to_chunks(access_bed)
+        )
         for bed_chunk_fname, bait_chunk in pool.map(_scan_depth, args_iter):
             bait_chunks.append(bait_chunk)
             parallel.rm(bed_chunk_fname)
     baits = GA(pd.concat(bait_chunks))
-    baits['depth'] /= len(sample_bams)
+    baits["depth"] /= len(sample_bams)
     return baits
 def _scan_depth(args):
     """Wrapper for parallel map"""
     bed_fname, bam_fnames, min_depth, min_gap, min_length = args
-    regions = list(drop_small(merge_gaps(scan_depth(bed_fname, bam_fnames,
-                                                    min_depth),
-                                         min_gap),
-                              min_length))
-    result = pd.DataFrame.from_records(list(regions),
-                                       columns=regions[0]._fields)
+    regions = list(
+        drop_small(
+            merge_gaps(scan_depth(bed_fname, bam_fnames, min_depth), min_gap),
+            min_length,
+        )
+    )
+    result = pd.DataFrame.from_records(list(regions), columns=regions[0]._fields)
     return bed_fname, result
@@ -100,32 +100,42 @@ def scan_depth(bed_fname, bam_fnames, min_depth):
     tuple
         Region coordinates (0-indexed, half-open): chromosome name, start, end
     """
-    Region = collections.namedtuple('Region', 'chromosome start end depth')
+    Region = collections.namedtuple("Region", "chromosome start end depth")
     nsamples = len(bam_fnames)
     if nsamples == 1:
         def get_depth(depths):
             return int(depths[0])
     else:
         min_depth *= nsamples
         # NB: samtools emits additional BAMs' depths as trailing columns
         def get_depth(depths):
             return sum(map(int, depths))
-    proc = subprocess.Popen([SAMTOOLS, 'depth',
-                             '-Q',  '1',  # Skip pseudogenes
-                             '-b', bed_fname,
-                            ] + bam_fnames,
-                            stdout=subprocess.PIPE,
-                            encoding='utf-8',
-                            shell=False)
+    proc = subprocess.Popen(
+        [
+            SAMTOOLS,
+            "depth",
+            "-Q",
+            "1",  # Skip pseudogenes
+            "-b",
+            bed_fname,
+        ]
+        + bam_fnames,
+        stdout=subprocess.PIPE,
+        encoding="utf-8",
+        shell=False,
+    )
     # Detect runs of >= min_depth; emit their coordinates
     chrom = start = depths = None
-    for line in proc.stdout:
-        fields = line.split('\t')
+    for line in proc.stdout:  # type: ignore
+        fields = line.split("\t")
         depth = get_depth(fields[2:])
-        is_enriched = (depth >= min_depth)
+        is_enriched = depth >= min_depth
         if start is None:
             if is_enriched:
                 # Entering a new captured region
@@ -137,7 +147,7 @@ def scan_depth(bed_fname, bam_fnames, min_depth):
                 continue
         elif is_enriched and fields[0] == chrom:
             # Still in a captured region -- extend it
-            depths.append(depth)
+            depths.append(depth)   # type: ignore
         else:
             # Exiting a captured region
             # Update target region boundaries
@@ -146,10 +156,12 @@ def scan_depth(bed_fname, bam_fnames, min_depth):
             ok_dp_idx = np.nonzero(darr >= half_depth)[0]
             start_idx = ok_dp_idx[0]
             end_idx = ok_dp_idx[-1] + 1
-            yield Region(chrom,
-                            start + start_idx,
-                            start + end_idx,
-                            darr[start_idx:end_idx].mean())
+            yield Region(
+                chrom,
+                start + start_idx,
+                start + end_idx,
+                darr[start_idx:end_idx].mean(),
+            )
             chrom = start = depths = None
@@ -170,75 +182,129 @@ def merge_gaps(regions, min_gap):
 def drop_small(regions, min_length):
     """Merge small gaps and filter by minimum length."""
-    return (reg for reg in regions
-            if reg.end - reg.start >= min_length)
+    return (reg for reg in regions if reg.end - reg.start >= min_length)
 # ___________________________________________
 # Shared
 def normalize_depth_log2_filter(baits, min_depth, enrich_ratio=0.1):
     """Calculate normalized depth, add log2 column, filter by enrich_ratio."""
     # Normalize depths to a neutral value of 1.0
-    dp_mode = modal_location(baits.data.loc[baits['depth'] > min_depth,
-                                            'depth'].values)
-    norm_depth = baits['depth'] / dp_mode
+    dp_mode = modal_location(baits.data.loc[baits["depth"] > min_depth, "depth"].values)
+    norm_depth = baits["depth"] / dp_mode
     # Drop low-coverage targets
-    keep_idx = (norm_depth >= enrich_ratio)
-    logging.info("Keeping %d/%d bins with coverage depth >= %f, modal depth %f",
-                 keep_idx.sum(), len(keep_idx), dp_mode * enrich_ratio, dp_mode)
+    keep_idx = norm_depth >= enrich_ratio
+    logging.info(
+        "Keeping %d/%d bins with coverage depth >= %f, modal depth %f",
+        keep_idx.sum(),
+        len(keep_idx),
+        dp_mode * enrich_ratio,
+        dp_mode,
+    )
     return baits[keep_idx]
-SAMTOOLS = 'samtools'
+SAMTOOLS = "samtools"
-if __name__ == '__main__':
+if __name__ == "__main__":
     AP = argparse.ArgumentParser(description=__doc__)
-    AP.add_argument('sample_bams', nargs='+',
-                    help="""Sample BAM file(s) to test for target coverage.""")
-    AP.add_argument('-o', '--output', metavar='FILENAME',
-                    help="""The inferred targets, in BED format.""")
-    AP.add_argument('-c', '--coverage', metavar='FILENAME',
-                    help="""Filename to output average coverage depths in .cnn
-                    format.""")
-    AP.add_argument('-p', '--processes', metavar='CPU',
-                    nargs='?', type=int, const=0, default=1,
-                    help="""Number of subprocesses to segment in parallel.
+    AP.add_argument(
+        "sample_bams",
+        nargs="+",
+        help="""Sample BAM file(s) to test for target coverage.""",
+    )
+    AP.add_argument(
+        "-o",
+        "--output",
+        metavar="FILENAME",
+        help="""The inferred targets, in BED format.""",
+    )
+    AP.add_argument(
+        "-c",
+        "--coverage",
+        metavar="FILENAME",
+        help="""Filename to output average coverage depths in .cnn
+                    format.""",
+    )
+    AP.add_argument(
+        "-p",
+        "--processes",
+        metavar="CPU",
+        nargs="?",
+        type=int,
+        const=0,
+        default=1,
+        help="""Number of subprocesses to segment in parallel.
                     If given without an argument, use the maximum number
-                    of available CPUs. [Default: use 1 process]""")
-    AP.add_argument('-f', '--fasta', metavar="FILENAME",
-            help="Reference genome, FASTA format (e.g. UCSC hg19.fa)")
-    AP.add_argument('-s', '--samtools', metavar="SAMTOOLS",
-            help="Path to samtools", default="samtools")
+                    of available CPUs. [Default: use 1 process]""",
+    )
+    AP.add_argument(
+        "-f",
+        "--fasta",
+        metavar="FILENAME",
+        help="Reference genome, FASTA format (e.g. UCSC hg19.fa)",
+    )
+    AP.add_argument(
+        "-s",
+        "--samtools",
+        metavar="SAMTOOLS",
+        help="Path to samtools",
+        default="samtools",
+    )
     AP_x = AP.add_mutually_exclusive_group(required=True)
-    AP_x.add_argument('-t', '--targets', metavar='TARGET_BED',
-                    help="""Potentially targeted genomic regions, e.g. all known
+    AP_x.add_argument(
+        "-t",
+        "--targets",
+        metavar="TARGET_BED",
+        help="""Potentially targeted genomic regions, e.g. all known
                     exons in the reference genome, in BED format. Each of these
                     regions will be tested as a whole for enrichment. (Faster
-                    method)""")
-    AP_x.add_argument('-a', '--access', metavar='ACCESS_BED',
-                    # default="../data/access-5k-mappable.grch37.bed",
-                    help="""Sequencing-accessible genomic regions (e.g. from
+                    method)""",
+    )
+    AP_x.add_argument(
+        "-a",
+        "--access",
+        metavar="ACCESS_BED",
+        # default="../data/access-5k-mappable.grch37.bed",
+        help="""Sequencing-accessible genomic regions (e.g. from
                     'cnvkit.py access'), or known genic regions in the reference
                     genome, in BED format. All bases will be tested for
-                    enrichment. (Slower method)""")
+                    enrichment. (Slower method)""",
+    )
     AP_target = AP.add_argument_group("With --targets only")
-    AP_target.add_argument('-d', '--min-depth', metavar='DEPTH',
-                    type=int, default=5,
-                    help="""Minimum sequencing read depth to accept as captured.
-                    [Default: %(default)s]""")
+    AP_target.add_argument(
+        "-d",
+        "--min-depth",
+        metavar="DEPTH",
+        type=int,
+        default=5,
+        help="""Minimum sequencing read depth to accept as captured.
+                    [Default: %(default)s]""",
+    )
     AP_access = AP.add_argument_group("With --access only")
-    AP_access.add_argument('-g', '--min-gap', metavar='GAP_SIZE',
-                    type=int, default=25,
-                    help="""Merge regions separated by gaps smaller than this.
-                    [Default: %(default)s]""")
-    AP_access.add_argument('-l', '--min-length', metavar='TARGET_SIZE',
-                    type=int, default=50,
-                    help="""Minimum region length to accept as captured.
-                    [Default: %(default)s]""")
+    AP_access.add_argument(
+        "-g",
+        "--min-gap",
+        metavar="GAP_SIZE",
+        type=int,
+        default=25,
+        help="""Merge regions separated by gaps smaller than this.
+                    [Default: %(default)s]""",
+    )
+    AP_access.add_argument(
+        "-l",
+        "--min-length",
+        metavar="TARGET_SIZE",
+        type=int,
+        default=50,
+        help="""Minimum region length to accept as captured.
+                    [Default: %(default)s]""",
+    )
     args = AP.parse_args()
     SAMTOOLS = args.samtools
@@ -247,13 +313,20 @@ if __name__ == '__main__':
         args.processes = None
     if args.targets:
-        baits = filter_targets(args.targets, args.sample_bams, args.processes, args.fasta)
+        baits = filter_targets(
+            args.targets, args.sample_bams, args.processes, args.fasta
+        )
     else:
-        baits = scan_targets(args.access, args.sample_bams,
-                             0.5 * args.min_depth,  # More sensitive 1st pass
-                             args.min_gap, args.min_length, args.processes)
+        baits = scan_targets(
+            args.access,
+            args.sample_bams,
+            0.5 * args.min_depth,  # More sensitive 1st pass
+            args.min_gap,
+            args.min_length,
+            args.processes,
+        )
     baits = normalize_depth_log2_filter(baits, args.min_depth)
-    tabio.write(baits, args.output or sys.stdout, 'bed')
+    tabio.write(baits, args.output or sys.stdout, "bed")
     if args.coverage:
-        baits['log2'] = np.log2(baits['depth'] / baits['depth'].median())
-        tabio.write(baits, args.coverage, 'tab')
+        baits["log2"] = np.log2(baits["depth"] / baits["depth"].median())
+        tabio.write(baits, args.coverage, "tab")

biopipen 0.32.3__py3-none-any.whl → 0.33.1__py3-none-any.whl

Potentially problematic release.

biopipen 0.32.3py3-none-any.whl → 0.33.1py3-none-any.whl