PyPI - biopipen - Versions diffs - 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +28 -0
biopipen/core/filters.py +79 -4
biopipen/core/proc.py +12 -3
biopipen/core/testing.py +75 -3
biopipen/ns/bam.py +148 -6
biopipen/ns/bed.py +75 -0
biopipen/ns/cellranger.py +186 -0
biopipen/ns/cellranger_pipeline.py +126 -0
biopipen/ns/cnv.py +19 -3
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/cnvkit_pipeline.py +20 -12
biopipen/ns/delim.py +34 -35
biopipen/ns/gene.py +68 -23
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +39 -14
biopipen/ns/plot.py +304 -1
biopipen/ns/protein.py +183 -0
biopipen/ns/regulatory.py +290 -0
biopipen/ns/rnaseq.py +142 -5
biopipen/ns/scrna.py +2053 -473
biopipen/ns/scrna_metabolic_landscape.py +228 -382
biopipen/ns/snp.py +659 -0
biopipen/ns/stats.py +484 -0
biopipen/ns/tcr.py +683 -98
biopipen/ns/vcf.py +236 -2
biopipen/ns/web.py +97 -6
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/common.svelte +15 -0
biopipen/reports/protein/ProdigySummary.svelte +16 -0
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
biopipen/reports/tcr/ClonalStats.svelte +16 -0
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -155
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +11 -15
biopipen/scripts/bam/BamSampling.py +90 -0
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +38 -0
biopipen/scripts/bam/CNAClinic.R +41 -5
biopipen/scripts/bam/CNVpytor.py +153 -54
biopipen/scripts/bam/ControlFREEC.py +13 -14
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +138 -0
biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
biopipen/scripts/cnv/AneuploidyScore.R +55 -20
biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
biopipen/scripts/cnv/TMADScore.R +25 -9
biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +116 -118
biopipen/scripts/gene/GeneNameConversion.R +67 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/gsea/Enrichr.R +5 -5
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/GSEA.R +2 -2
biopipen/scripts/gsea/PreRank.R +5 -5
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/plot/Heatmap.R +3 -3
biopipen/scripts/plot/Manhattan.R +147 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/plot/Scatter.R +112 -0
biopipen/scripts/plot/VennDiagram.R +5 -9
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +119 -0
biopipen/scripts/protein/ProdigySummary.R +140 -0
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
biopipen/scripts/regulatory/motifs-common.R +324 -0
biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
biopipen/scripts/rnaseq/Simulation.R +21 -0
biopipen/scripts/rnaseq/UnitConversion.R +325 -54
biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +150 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
biopipen/scripts/scrna/CellsDistribution.R +456 -167
biopipen/scripts/scrna/DimPlots.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
biopipen/scripts/scrna/ExprImputation.R +7 -0
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +679 -400
biopipen/scripts/scrna/MetaMarkers.R +265 -161
biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
biopipen/scripts/scrna/RadarPlots.R +355 -134
biopipen/scripts/scrna/ScFGSEA.R +298 -100
biopipen/scripts/scrna/ScSimulation.R +65 -0
biopipen/scripts/scrna/ScVelo.py +617 -0
biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
biopipen/scripts/scrna/SeuratClustering.R +36 -233
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +223 -173
biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
biopipen/scripts/scrna/SeuratTo10X.R +27 -0
biopipen/scripts/scrna/Slingshot.R +65 -0
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
biopipen/scripts/snp/MatrixEQTL.R +217 -0
biopipen/scripts/snp/Plink2GTMat.py +148 -0
biopipen/scripts/snp/PlinkCallRate.R +199 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +291 -0
biopipen/scripts/snp/PlinkFromVcf.py +81 -0
biopipen/scripts/snp/PlinkHWE.R +85 -0
biopipen/scripts/snp/PlinkHet.R +96 -0
biopipen/scripts/snp/PlinkIBD.R +196 -0
biopipen/scripts/snp/PlinkSimulation.py +124 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/ChowTest.R +146 -0
biopipen/scripts/stats/DiffCoexpr.R +152 -0
biopipen/scripts/stats/LiquidAssoc.R +135 -0
biopipen/scripts/stats/Mediation.R +108 -0
biopipen/scripts/stats/MetaPvalue.R +130 -0
biopipen/scripts/stats/MetaPvalue1.R +74 -0
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/Attach2Seurat.R +3 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
biopipen/scripts/tcr/CDR3Clustering.R +343 -0
biopipen/scripts/tcr/ClonalStats.R +526 -0
biopipen/scripts/tcr/CloneResidency.R +255 -131
biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/Immunarch-basic.R +31 -9
biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
biopipen/scripts/tcr/Immunarch.R +63 -11
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
biopipen/scripts/tcr/SampleDiversity.R +1 -1
biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
biopipen/scripts/tcr/ScRepLoading.R +166 -0
biopipen/scripts/tcr/TCRClusterStats.R +176 -22
biopipen/scripts/tcr/TCRDock.py +110 -0
biopipen/scripts/tcr/TESSA.R +102 -118
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/TruvariBench.sh +14 -7
biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
biopipen/scripts/vcf/TruvariConsistency.R +1 -1
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +13 -4
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
biopipen/scripts/web/gcloud_common.py +49 -0
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.py +146 -20
biopipen/utils/reference.py +64 -20
biopipen/utils/reporter.py +177 -0
biopipen/utils/vcf.py +1 -1
biopipen-0.34.26.dist-info/METADATA +27 -0
biopipen-0.34.26.dist-info/RECORD +292 -0
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
biopipen/ns/bcftools.py +0 -111
biopipen/ns/scrna_basic.py +0 -255
biopipen/reports/delim/SampleInfo.svelte +0 -36
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
biopipen/reports/scrna/ScFGSEA.svelte +0 -35
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
biopipen/scripts/scrna/ExprImpution.R +0 -7
biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
biopipen/scripts/scrna/Write10X.R +0 -11
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
biopipen/scripts/tcr/TCRClustering.R +0 -280
biopipen/utils/common_docstrs.py +0 -61
biopipen/utils/gene.R +0 -49
biopipen/utils/gsea.R +0 -193
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -114
biopipen/utils/mutate_helpers.R +0 -433
biopipen/utils/plot.R +0 -173
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -115
biopipen-0.21.0.dist-info/METADATA +0 -22
biopipen-0.21.0.dist-info/RECORD +0 -218

biopipen/scripts/vcf/BcftoolsSort.py ADDED Viewed

@@ -0,0 +1,113 @@
+from typing import Literal
+from pathlib import Path, PosixPath  # noqa: F401
+from biopipen.utils.misc import run_command, logger
+from biopipen.scripts.vcf.bcftools_utils import run_bcftools
+infile: str = {{in.infile | quote}}  # pyright: ignore # noqa: E999
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
+envs: dict = {{envs | dict | repr}}  # pyright: ignore
+outdir = Path(outfile).parent
+bcftools = envs.pop("bcftools")
+tabix = envs.pop("tabix")
+ncores = envs.pop("ncores")
+gz = envs.pop("gz")
+index = envs.pop("index")
+chrsize = envs.pop("chrsize")
+notfound = envs.pop("notfound")
+if chrsize:
+    class Contig:
+        def __init__(self, name: str, length: str):
+            self.name = name
+            self.length = length
+        def __str__(self) -> str:
+            return f"##contig=<ID={self.name},length={self.length}>"
+    def parse_header(header_file: Path) -> tuple[list[str], dict[str, Contig]]:
+        hlines = []
+        ctgs = {}
+        with open(header_file) as fh:
+            for line in fh:
+                if line.startswith("##contig"):
+                    ctg = line.strip().split("##contig=<ID=")[1].split(",length=")
+                    ctgs[ctg[0]] = Contig(ctg[0], ctg[1].replace(">", ""))
+                else:
+                    hlines.append(line.strip())
+        return hlines, ctgs
+    def match_contigs(
+        ctgs: dict[str, Contig],
+        chroms: list[str],
+        notfound: Literal["error", "remove", "start", "end"],
+    ) -> list[str]:
+        if (
+            ctgs
+            and chroms
+            and all(chrom.startswith("chr") for chrom in chroms)
+            and not any(chrom.startswith("chr") for chrom in ctgs)
+        ):
+            logger.warning(
+                "Removing 'chr' prefix from chromosomes in envs.chrsize file, "
+                "because the input VCF file does not have 'chr' prefix."
+            )
+            chroms = [chrom[3:] for chrom in chroms]
+        new_ctgs = []
+        for chrom in chroms:
+            if chrom in ctgs:
+                new_ctgs.append(str(ctgs[chrom]))
+                del ctgs[chrom]
+        if ctgs:
+            if notfound == "error":
+                raise ValueError(
+                    "Chromosomes not found in envs.chrsize file: "
+                    f"{', '.join(ctgs.keys())}"
+                )
+            elif notfound == "start":
+                new_ctgs = [str(ctg) for ctg in ctgs.values()] + new_ctgs
+            elif notfound == "end":
+                new_ctgs = new_ctgs + [str(ctg) for ctg in ctgs.values()]
+        return new_ctgs
+    chroms = []
+    with Path(chrsize).expanduser().open() as fh:
+        for line in fh:
+            chrom = line.strip().split()[0]
+            chroms.append(chrom)
+    header_file = outdir / "header.txt"
+    run_command(f'{bcftools} view -h {infile} > {header_file}', fg=True)
+    header_lines, contigs = parse_header(header_file)
+    new_contigs = match_contigs(contigs, chroms, notfound=notfound)
+    header_lines = [header_lines[0], *new_contigs, *header_lines[1:]]
+    reheader_file = outdir / "reheader.txt"
+    with open(reheader_file, "w") as fh:
+        fh.writelines([f"{line}\n" for line in header_lines])
+    reheader_vcf = outdir / f"{Path(infile).stem}_reheader.vcf"
+    run_command([
+        bcftools, "reheader",
+        "--header", reheader_file,
+        "-o", reheader_vcf,
+        infile
+    ], fg=True)
+    infile = str(reheader_vcf)
+envs[""] = [bcftools, "sort"]
+envs["_"] = infile
+envs["o"] = outfile
+if index and not gz:
+    logger.warning("Forcing envs.gz to True because envs.index is True.")
+    gz = True
+if "O" not in envs and "output-type" not in envs and "output_type" not in envs:
+    envs["O"] = "z" if gz else "v"
+run_bcftools(envs, bcftools=bcftools, index=index, tabix=tabix)

biopipen/scripts/vcf/BcftoolsView.py ADDED Viewed

@@ -0,0 +1,73 @@
+from contextlib import suppress
+# In case there are paths passed to envs
+from pathlib import PosixPath  # noqa: F401
+from biopipen.utils.misc import logger
+from biopipen.utils.reference import tabix_index
+from biopipen.scripts.vcf.bcftools_utils import run_bcftools
+infile: str = {{in.infile | quote}}  # pyright: ignore # noqa: #999
+regions_file: str = {{in.regions_file | quote}}  # pyright: ignore
+samples_file: str = {{in.samples_file | quote}}  # pyright: ignore
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
+envs: dict = {{envs | dict | repr}}  # pyright: ignore
+bcftools = envs.pop("bcftools")
+tabix = envs.pop("tabix")
+ncores = envs.pop("ncores")
+gz = envs.pop("gz")
+index = envs.pop("index")
+if regions_file:
+    if "R" in envs or "regions_file" in envs or "regions-file" in envs:
+        logger.warning(
+            r"Ignoring envs\[regions_file/regions-file/R] "
+            "because in.regionsfile is provided."
+        )
+        with suppress(KeyError):
+            del envs["regions_file"]
+        with suppress(KeyError):
+            del envs["regions-file"]
+        with suppress(KeyError):
+            del envs["R"]
+elif "R" in envs or "regions_file" in envs or "regions-file" in envs:
+    regions_file = (
+        envs.pop("regions_file", None)
+        or envs.pop("regions-file", None)
+        or envs.pop("R", None)
+    )
+if samples_file:
+    if "S" in envs or "samples_file" in envs or "samples-file" in envs:
+        logger.warning(
+            "Ignoring envs[samples_file/samples-file/S] "
+            "because in.samples_file is provided."
+        )
+        with suppress(KeyError):
+            del envs["samples_file"]
+        with suppress(KeyError):
+            del envs["samples-file"]
+        with suppress(KeyError):
+            del envs["S"]
+elif "S" in envs or "samples_file" in envs or "samples-file" in envs:
+    samples_file = (
+        envs.pop("samples_file", None)
+        or envs.pop("samples-file", None)
+        or envs.pop("S", None)
+    )
+if index and not gz:
+    logger.warning("Forcing envs.gz to True because envs.index is True.")
+    gz = True
+if "O" not in envs and "output-type" not in envs and "output_type" not in envs:
+    envs["O"] = "z" if gz else "v"
+envs[""] = [bcftools, "view"]
+envs["_"] = tabix_index(infile, "vcf", tabix=tabix)
+envs["o"] = outfile
+envs["threads"] = ncores
+envs["regions_file"] = regions_file
+envs["samples_file"] = samples_file
+run_bcftools(envs, bcftools=bcftools, index=index, tabix=tabix)

biopipen/scripts/vcf/TruvariBench.sh CHANGED Viewed

@@ -1,13 +1,15 @@
+# shellcheck disable=SC1083
 compvcf={{in.compvcf | quote}}
 basevcf={{in.basevcf | quote}}
 outdir={{out.outdir | quote}}
 truvari={{envs.truvari | quote}}
 ref={{envs.ref | quote}}
 refdist={{envs.refdist | quote}}
-pctsim={{envs.pctsim | quote}}
+pctseq={{envs.pctseq | quote}}
 pctsize={{envs.pctsize | quote}}
 pctovl={{envs.pctovl | quote}}
 sizemax={{envs.sizemax | default: 50000 | quote}}
+# shellcheck disable=SC1054
 {% if envs.typeignore %}
 typeignore="--typeignore"
 {% else %}
@@ -15,20 +17,25 @@ typeignore=""
 {% endif %}
 {% if envs.multimatch %}
 multimatch="--multimatch"
+# shellcheck disable=SC1009
 {% else %}
 multimatch=""
+# shellcheck disable=SC1073
 {% endif %}
 rm -rf $outdir
-$truvari bench \
-    -c "$compvcf" \
-    -b "$basevcf" \
-    -f "$ref" \
+cmd="$truvari bench \
+    -c '$compvcf' \
+    -b '$basevcf' \
+    -f '$ref' \
     --refdist $refdist \
-    --pctsim $pctsim \
+    --pctseq $pctseq \
     --pctsize $pctsize \
     --pctovl $pctovl \
     --sizemax $sizemax \
     $typeignore \
     $multimatch \
-    -o $outdir
+    -o $outdir"
+echo "$cmd"
+eval "$cmd"

biopipen/scripts/vcf/TruvariBenchSummary.R CHANGED Viewed

@@ -1,11 +1,7 @@
-source("{{biopipen_dir}}/utils/misc.R")
-source("{{biopipen_dir}}/utils/plot.R")
-library(ggprism)
 library(rjson)
+library(rlang)
 library(dplyr)
-theme_set(theme_prism(axis_text_angle = 90))
+library(plotthis)
 indirs = {{in.indirs | r}}
 outdir = {{out.outdir | r}}
@@ -17,7 +13,7 @@ read_summary = function() {
     summaries = NULL
     for (indir in indirs) {
-        summary = fromJSON(file=file.path(indir, "summary.txt"))
+        summary = fromJSON(file=file.path(indir, "summary.json"))
         summary$gt_matrix = NULL
         summary$Sample = sub(".truvari_bench", "", basename(indir), fixed=T)
         summaries = bind_rows(summaries, summary)
@@ -39,14 +35,21 @@ get_devpars = function() {
 plot_summary = function(col) {
     outfile = file.path(outdir, paste0(col, ".png"))
-    plotGG(
+    p <- plotthis::BarPlot(
         summaries,
-        "col",
-        list(mapping = aes_string(x = "Sample", y = bQuote(col), fill = "Sample")),
-        devpars = get_devpars(),
-        outfile = outfile
+        x = "Sample",
+        y = col,
+        x_text_angle = 90
+    )
+    devpars <- get_devpars()
+    png(
+        filename = outfile,
+        width = devpars$width,
+        height = devpars$height,
+        res = devpars$res
     )
+    print(p)
+    dev.off()
 }
 main = function() {

biopipen/scripts/vcf/TruvariConsistency.R CHANGED Viewed

@@ -1,4 +1,4 @@
-source("{{biopipen_dir}}/utils/plot.R")
+{{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
 # to compile the expressions
 library(ComplexHeatmap)

biopipen/scripts/vcf/Vcf2Bed.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from cyvcf2 import VCF, Variant
-infile = {{in.infile | quote}}  # pyright: ignore
-outfile = {{out.outfile | quote}}  # pyright: ignore
+infile: str = {{in.infile | quote}}  # pyright: ignore  # noqa: E999
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
 # vcf, default 1
 inbase = {{envs.inbase | int}}  # pyright: ignore
 # bed, default 0

biopipen/scripts/vcf/VcfAnno.py CHANGED Viewed

@@ -2,22 +2,22 @@ from os import path
 from biopipen.utils.misc import run_command, dict_to_cli_args
-infile = {{in.infile | quote}}  # pyright: ignore
-outfile = {{out.outfile | quote}}  # pyright: ignore
-joboutdir = {{job.outdir | quote}}  # pyright: ignore
+infile: str = {{in.infile | quote}}  # pyright: ignore  # noqa
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
+joboutdir: str = {{job.outdir | quote}}  # pyright: ignore
 vcfanno = {{envs.vcfanno | quote}}  # pyright: ignore
-ncores = {{envs.ncores | repr}}  # pyright: ignore
-args = {{envs.args | repr}}  # pyright: ignore
+ncores: int = {{envs.ncores | repr}}  # pyright: ignore
+args: dict = {{envs.args | dict}}  # pyright: ignore
-{% set conf = envs.conffile or in.conffile %}
-{% if conf | isinstance: dict %}
+{% set conf = envs.conffile or in.conffile %}  # pyright: ignore  # noqa
+{% if conf | isinstance: dict %}  # pyright: ignore  # noqa
 conffile = path.join(joboutdir, "config.toml")
-conf = {{ conf | toml | quote }}
+conf: str = {{ conf | toml | quote }}  # pyright: ignore  # noqa
 with open(conffile, "w") as f:
     f.write(conf)
-{% else %}
-conffile = {{conf | quote}}
-{% endif %}
+{% else %}  # pyright: ignore  # noqa
+conffile = {{conf | quote}}  # pyright: ignore  # noqa
+{% endif %}  # pyright: ignore  # noqa
 args["p"] = ncores
 args["_"] = [conffile, infile]

biopipen/scripts/vcf/VcfDownSample.sh CHANGED Viewed

@@ -1,25 +1,37 @@
+# shellcheck disable=SC2148
+# shellcheck disable=SC2036
+# shellcheck disable=SC2030
+# shellcheck disable=SC1083
+# shellcheck disable=SC2288
 infile={{in.infile | quote}}
 outfile={{out.outfile | quote}}
 n={{envs.n}}
+# shellcheck disable=SC2031
 if [[ $infile == *.gz ]]; then
-    outfile=$(echo $outfile | sed -r "s/\.gz$//")
-    nheader=$(zcat $infile | head -n 9999 | grep "^#" | wc -l | cut -d' ' -f1)
+    outfile=$(echo "$outfile" | sed -r "s/\.gz$//")
+    # shellcheck disable=SC2126
+    nheader=$(zcat "$infile" | head -n 9999 | grep "^#" | wc -l | cut -d' ' -f1)
     if [[ ! $n -gt 1 ]]; then
-        nrows=$(zcat $infile | wc -l | cut -d' ' -f1)
+        nrows=$(zcat "$infile" | wc -l | cut -d' ' -f1)
+        # shellcheck disable=SC2004
         nvars=$(($nrows - $nheader))
         n=$(echo "$nvars * $n" | bc)
     fi
-    zcat $infile | head -n $nheader > $outfile
-    zcat $infile | tail -n +$(($nheader + 1)) | shuf -n $n | LC_ALL=C sort -k1,1V -k2,2n >> $outfile
-    bgzip $outfile
+    zcat "$infile" | head -n "$nheader" > "$outfile"
+    # shellcheck disable=SC2004
+    zcat "$infile" | tail -n +$(($nheader + 1)) | shuf -n "$n" | LC_ALL=C sort -k1,1V -k2,2n >> "$outfile"
+    bgzip "$outfile"
 else
-    nheader=$(head -n 9999 $infile | grep "^#" | wc -l | cut -d' ' -f1)
+    # shellcheck disable=SC2126
+    nheader=$(head -n 9999 "$infile" | grep "^#" | wc -l | cut -d' ' -f1)
     if [[ ! $n -gt 1 ]]; then
-        nrows=$(wc -l $infile | cut -d' ' -f1)
+        nrows=$(wc -l "$infile" | cut -d' ' -f1)
+        # shellcheck disable=SC2004
         nvars=$(($nrows - $nheader))
         n=$(echo "$nvars * $n" | bc)
     fi
-    head -n $nheader $infile > $outfile
-    tail -n +$(($nheader + 1)) $infile | shuf -n $n | LC_ALL=C sort -k1,1V -k2,2n >> $outfile
+    head -n "$nheader" "$infile" > "$outfile"
+    # shellcheck disable=SC2004
+    tail -n +$(($nheader + 1)) "$infile" | shuf -n "$n" | LC_ALL=C sort -k1,1V -k2,2n >> "$outfile"
 fi

biopipen/scripts/vcf/VcfFilter.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from cyvcf2 import VCF, Writer, Variant
-infile = {{in.invcf | repr}}  # pyright: ignore
-outfile = {{out.outfile | repr}}  # pyright: ignore
+infile: str = {{in.invcf | quote}}  # pyright: ignore  # noqa: E999
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
-{{envs.helper}}
+{{envs.helper}}  # pyright: ignore  # noqa: E999
 keep = {{envs.keep | repr}}  # pyright: ignore
-filters = {{envs.filters | repr}}  # pyright: ignore
-filter_descs = {{envs.filter_descs | repr}}  # pyright: ignore
+filters: dict = {{envs.filters | repr}}  # pyright: ignore
+filter_descs: dict = {{envs.filter_descs | repr}}  # pyright: ignore
 # builtin filters
 BUILTIN_FILTERS = {}

biopipen/scripts/vcf/VcfFix.py CHANGED Viewed

@@ -7,17 +7,17 @@ from biopipen.scripts.vcf.VcfFix_utils import (  # noqa: F401
     HeaderContig,
     HeaderGeneral,
     Fields,
-    Info,
-    Format,
-    Alt,
-    Filter,
-    Sample,
-    Samples,
+    # Info,
+    # Format,
+    # Alt,
+    # Filter,
+    # Sample,
+    # Samples,
     Variant,
 )
 from biopipen.scripts.vcf.VcfFix_utils import fix_vcffile
-infile = {{in.infile | quote}}  # pyright: ignore
+infile = {{in.infile | quote}}  # pyright: ignore  # noqa: E999
 instem = {{in.infile | stem | quote}}  # pyright: ignore
 outfile = {{out.outfile | quote}}  # pyright: ignore

biopipen/scripts/vcf/VcfFix_utils.py CHANGED Viewed

@@ -1,6 +1,15 @@
 import re
 import gzip
-from biopipen.utils.vcf import *  # noqa: F401, F403
+from biopipen.utils.vcf import (
+    HeaderInfo,
+    HeaderFormat,
+    HeaderFilter,
+    HeaderContig,
+    HeaderGeneral,
+    Fields,
+    Variant,
+    HeaderItem,
+)
 def line_to_obj(line: str):
@@ -41,7 +50,7 @@ def handle_obj(obj, fixes: dict):
         regex = fix.get("regex")
         if regex:
-            if not re.search(regex, obj.raw):
+            if not re.search(regex, obj.raw):  # type: ignore
                 continue
             return fix["fix"](obj.raw if kind is None else obj)
@@ -63,11 +72,11 @@ def fix_vcffile(vcffile, outfile, fixes):
         else:
             modify_fixes.append(fix)
-    inopen = gzip.open if vcffile.endswith(".gz") else open
+    inopen = gzip.open if str(vcffile).endswith(".gz") else open
     with inopen(vcffile, "rt") as fin, open(outfile, "w") as fout:
         for line in fin:
             obj = line_to_obj(line)
-            out = handle_obj(obj, modify_fixes)
+            out = handle_obj(obj, modify_fixes)  # type: ignore
             if obj.kind == "fields":
                 for fix in header_append_fixes:
                     fout.write(str(fix["fix"](None)).rstrip("\n") + "\n")

biopipen/scripts/vcf/VcfIndex.py CHANGED Viewed

@@ -4,10 +4,10 @@ from os import path
 from biopipen.utils.reference import tabix_index
 from biopipen.utils.misc import run_command
-infile = {{in.infile | repr}}  # pyright: ignore
-outfile = Path({{out.outfile | repr}})  # pyright: ignore
+infile: str = {{in.infile | quote}}  # pyright: ignore  # noqa
+outfile = Path({{out.outfile | quote}})  # pyright: ignore
 outidx = {{out.outidx | repr}}  # pyright: ignore
-tabix = {{envs.tabix | repr}}  # pyright: ignore
+tabix: str = {{envs.tabix | repr}}  # pyright: ignore
 ncores = {{envs.ncores | repr}}  # pyright: ignore
 outfile_with_index = tabix_index(infile, "vcf", outfile.parent, tabix)

biopipen/scripts/vcf/VcfIntersect.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from biopipen.utils.misc import run_command, dict_to_cli_args
-infile1 = {{in.infile1 | repr}}  # pyright: ignore
-infile2 = {{in.infile2 | repr}}  # pyright: ignore
-outfile = {{out.outfile | repr}}  # pyright: ignore
+infile1: str = {{in.infile1 | quote}}  # pyright: ignore  # noqa
+infile2 = {{in.infile2 | quote}}  # pyright: ignore
+outfile = {{out.outfile | quote}}  # pyright: ignore
 bcftools = {{envs.bcftools | repr}}  # pyright: ignore
 gz = {{envs.gz | repr}}  # pyright: ignore
 index = {{envs.index | repr}}  # pyright: ignore

biopipen/scripts/vcf/VcfLiftOver.sh CHANGED Viewed

@@ -1,3 +1,5 @@
+# shellcheck disable=SC2148
+# shellcheck disable=SC1083
 invcf={{ in.invcf | quote }}
 outvcf={{ out.outvcf | quote }}
 rejfile={{ job.outdir | joinpaths: "rejected.vcf" | quote }}
@@ -6,12 +8,15 @@ chain={{ envs.chain | quote }}
 reffa={{ envs.reffa | quote }}
 args={{ envs.args | dict_to_cli_args: join=True }}
+# shellcheck disable=SC2154
 refdict="${reffa%.fa}.dict"
 if [[ ! -e "$refdict" ]]; then
     echo "Sequence dictionary does not exist: $refdict" 1>&2
     exit 1
 fi
+# shellcheck disable=SC2154
+# shellcheck disable=SC2086
 $gatk LiftoverVcf \
     $args \
     --INPUT "$invcf" \

biopipen/scripts/vcf/VcfSplitSamples.py CHANGED Viewed

@@ -3,12 +3,12 @@ import shlex
 import concurrent.futures
 from subprocess import Popen, check_output
-infile = {{in.infile | repr}}  # pyright: ignore
-outdir = {{out.outdir | repr}}  # pyright: ignore
-bcftools = {{envs.bcftools | repr}}  # pyright: ignore
+infile: str = {{in.infile | quote}}  # pyright: ignore  # noqa
+outdir: str = {{out.outdir | quote}}  # pyright: ignore
+bcftools: str = {{envs.bcftools | repr}}  # pyright: ignore
 gz = {{envs.gz | repr}}  # pyright: ignore
 index = {{envs.index | repr}}  # pyright: ignore
-ncores = {{envs.ncores | int}}  # pyright: ignore
+ncores: int = {{envs.ncores | int}}  # pyright: ignore
 private = {{envs.private | repr}}  # pyright: ignore
 if index:

biopipen/scripts/vcf/bcftools_utils.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""Utilities for bcftools"""
+from biopipen.utils.misc import run_command, dict_to_cli_args
+from biopipen.utils.reference import tabix_index
+def bcftools_version(bcftools: str) -> tuple[int, ...]:
+    """Get the version of bcftools
+    Args:
+        bcftools (str): Path to bcftools
+    Returns:
+        tuple[int, ...]: The version of bcftools
+    """
+    bversion = (
+        run_command([bcftools, "version"], stdout="return")
+        .splitlines()[0]  # bcftools 1.20  # type: ignore
+        .replace("bcftools", "")
+        .strip()  # 1.20
+        .split(".")
+    )
+    return tuple(map(int, bversion))
+def run_bcftools(
+    args: dict,
+    bcftools: str,  # TODO: get from the first argument of args
+    index: bool,
+    tabix: str
+) -> None:
+    """Run bcftools with the given arguments
+    Args:
+        args: Arguments to pass to bcftools
+        bcftools (str): Path to bcftools
+        index (bool): Whether to index the output
+        tabix (str): Path to tabix
+    """
+    if not index:
+        run_command(dict_to_cli_args(args, dashify=True), fg=True)
+    else:
+        bversion = bcftools_version(bcftools)
+        if bversion >= (1, 20):
+            # requires bcftools 1.20+
+            # '--write-index tbi' not working
+            # it has to be '--write-index=tbi'
+            args["write_index=tbi"] = True
+            run_command(dict_to_cli_args(args, dashify=True), fg=True)
+        else:
+            run_command(dict_to_cli_args(args, dashify=True), fg=True)
+            tabix_index(args["o"], "vcf", tmpdir=False, tabix=tabix)

biopipen/scripts/web/Download.py CHANGED Viewed

@@ -2,13 +2,13 @@ from pathlib import Path
 from biopipen.utils.misc import run_command, dict_to_cli_args
-url = {{in.url | repr}}  # pyright: ignore
-outfile = Path({{out.outfile | repr}})  # pyright: ignore
+url = {{in.url | quote}}  # pyright: ignore # noqa
+outfile = Path({{out.outfile | quote}})  # pyright: ignore
 tool = {{envs.tool | repr}}  # pyright: ignore
 wget = {{envs.wget | repr}}  # pyright: ignore
 aria2c = {{envs.aria2c | repr}}  # pyright: ignore
 ncores = {{envs.ncores | repr}}  # pyright: ignore
-args = {{envs.args | dict}}  # pyright: ignore
+args: dict = {{envs.args | dict}}  # pyright: ignore
 if tool == "wget":
     args["_"] = url
@@ -28,4 +28,8 @@ elif tool == "aria2c":
 else: # use python
     import urllib
-    urllib.urlretrieve(url, outfile)
+    try:
+        urllib.urlretrieve(url, outfile)  # type: ignore
+    except AttributeError:
+        urllib.request.urlretrieve(url, outfile)  # type: ignore

biopipen/scripts/web/DownloadList.py CHANGED Viewed

@@ -2,13 +2,13 @@ from pathlib import Path
 from biopipen.utils.misc import run_command, dict_to_cli_args
-urlfile = {{in.urlfile | repr}}  # pyright: ignore
-outdir = Path({{out.outdir | repr}})  # pyright: ignore
+urlfile: str = {{in.urlfile | quote}}  # pyright: ignore  # noqa
+outdir = Path({{out.outdir | quote}})  # pyright: ignore
 tool = {{envs.tool | repr}}  # pyright: ignore
 wget = {{envs.wget | repr}}  # pyright: ignore
 aria2c = {{envs.aria2c | repr}}  # pyright: ignore
 ncores = {{envs.ncores | repr}}  # pyright: ignore
-args = {{envs.args | repr}}  # pyright: ignore
+args: dict = {{envs.args | repr}}  # pyright: ignore
 if tool == "wget":
     args["i"] = urlfile
@@ -26,10 +26,10 @@ elif tool == "aria2c":
     run_command(dict_to_cli_args(args, dashify=True), fg=True)
 else: # use python
-    import urllib
+    from urllib.request import urlretrieve
     from urllib.parse import urlparse
     with open(urlfile, "r") as furl:
         for i, url in enumerate(furl.readlines()):
             parsed = urlparse(url)
             path = Path(parsed.path)
-            urllib.urlretrieve(url, f"{path.stem}-{i}{path.suffix}")
+            urlretrieve(url, f"{path.stem}-{i}{path.suffix}")

biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl