biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
from pathlib import Path, PosixPath # noqa: F401
|
|
3
|
+
|
|
4
|
+
from biopipen.utils.misc import run_command, logger
|
|
5
|
+
from biopipen.scripts.vcf.bcftools_utils import run_bcftools
|
|
6
|
+
|
|
7
|
+
infile: str = {{in.infile | quote}} # pyright: ignore # noqa: E999
|
|
8
|
+
outfile: str = {{out.outfile | quote}} # pyright: ignore
|
|
9
|
+
envs: dict = {{envs | dict | repr}} # pyright: ignore
|
|
10
|
+
|
|
11
|
+
outdir = Path(outfile).parent
|
|
12
|
+
bcftools = envs.pop("bcftools")
|
|
13
|
+
tabix = envs.pop("tabix")
|
|
14
|
+
ncores = envs.pop("ncores")
|
|
15
|
+
gz = envs.pop("gz")
|
|
16
|
+
index = envs.pop("index")
|
|
17
|
+
chrsize = envs.pop("chrsize")
|
|
18
|
+
notfound = envs.pop("notfound")
|
|
19
|
+
|
|
20
|
+
if chrsize:
|
|
21
|
+
class Contig:
|
|
22
|
+
def __init__(self, name: str, length: str):
|
|
23
|
+
self.name = name
|
|
24
|
+
self.length = length
|
|
25
|
+
|
|
26
|
+
def __str__(self) -> str:
|
|
27
|
+
return f"##contig=<ID={self.name},length={self.length}>"
|
|
28
|
+
|
|
29
|
+
def parse_header(header_file: Path) -> tuple[list[str], dict[str, Contig]]:
|
|
30
|
+
hlines = []
|
|
31
|
+
ctgs = {}
|
|
32
|
+
with open(header_file) as fh:
|
|
33
|
+
for line in fh:
|
|
34
|
+
if line.startswith("##contig"):
|
|
35
|
+
ctg = line.strip().split("##contig=<ID=")[1].split(",length=")
|
|
36
|
+
ctgs[ctg[0]] = Contig(ctg[0], ctg[1].replace(">", ""))
|
|
37
|
+
else:
|
|
38
|
+
hlines.append(line.strip())
|
|
39
|
+
return hlines, ctgs
|
|
40
|
+
|
|
41
|
+
def match_contigs(
|
|
42
|
+
ctgs: dict[str, Contig],
|
|
43
|
+
chroms: list[str],
|
|
44
|
+
notfound: Literal["error", "remove", "start", "end"],
|
|
45
|
+
) -> list[str]:
|
|
46
|
+
if (
|
|
47
|
+
ctgs
|
|
48
|
+
and chroms
|
|
49
|
+
and all(chrom.startswith("chr") for chrom in chroms)
|
|
50
|
+
and not any(chrom.startswith("chr") for chrom in ctgs)
|
|
51
|
+
):
|
|
52
|
+
logger.warning(
|
|
53
|
+
"Removing 'chr' prefix from chromosomes in envs.chrsize file, "
|
|
54
|
+
"because the input VCF file does not have 'chr' prefix."
|
|
55
|
+
)
|
|
56
|
+
chroms = [chrom[3:] for chrom in chroms]
|
|
57
|
+
|
|
58
|
+
new_ctgs = []
|
|
59
|
+
for chrom in chroms:
|
|
60
|
+
if chrom in ctgs:
|
|
61
|
+
new_ctgs.append(str(ctgs[chrom]))
|
|
62
|
+
del ctgs[chrom]
|
|
63
|
+
|
|
64
|
+
if ctgs:
|
|
65
|
+
if notfound == "error":
|
|
66
|
+
raise ValueError(
|
|
67
|
+
"Chromosomes not found in envs.chrsize file: "
|
|
68
|
+
f"{', '.join(ctgs.keys())}"
|
|
69
|
+
)
|
|
70
|
+
elif notfound == "start":
|
|
71
|
+
new_ctgs = [str(ctg) for ctg in ctgs.values()] + new_ctgs
|
|
72
|
+
elif notfound == "end":
|
|
73
|
+
new_ctgs = new_ctgs + [str(ctg) for ctg in ctgs.values()]
|
|
74
|
+
|
|
75
|
+
return new_ctgs
|
|
76
|
+
|
|
77
|
+
chroms = []
|
|
78
|
+
with Path(chrsize).expanduser().open() as fh:
|
|
79
|
+
for line in fh:
|
|
80
|
+
chrom = line.strip().split()[0]
|
|
81
|
+
chroms.append(chrom)
|
|
82
|
+
|
|
83
|
+
header_file = outdir / "header.txt"
|
|
84
|
+
run_command(f'{bcftools} view -h {infile} > {header_file}', fg=True)
|
|
85
|
+
header_lines, contigs = parse_header(header_file)
|
|
86
|
+
new_contigs = match_contigs(contigs, chroms, notfound=notfound)
|
|
87
|
+
header_lines = [header_lines[0], *new_contigs, *header_lines[1:]]
|
|
88
|
+
reheader_file = outdir / "reheader.txt"
|
|
89
|
+
with open(reheader_file, "w") as fh:
|
|
90
|
+
fh.writelines([f"{line}\n" for line in header_lines])
|
|
91
|
+
|
|
92
|
+
reheader_vcf = outdir / f"{Path(infile).stem}_reheader.vcf"
|
|
93
|
+
run_command([
|
|
94
|
+
bcftools, "reheader",
|
|
95
|
+
"--header", reheader_file,
|
|
96
|
+
"-o", reheader_vcf,
|
|
97
|
+
infile
|
|
98
|
+
], fg=True)
|
|
99
|
+
|
|
100
|
+
infile = str(reheader_vcf)
|
|
101
|
+
|
|
102
|
+
envs[""] = [bcftools, "sort"]
|
|
103
|
+
envs["_"] = infile
|
|
104
|
+
envs["o"] = outfile
|
|
105
|
+
|
|
106
|
+
if index and not gz:
|
|
107
|
+
logger.warning("Forcing envs.gz to True because envs.index is True.")
|
|
108
|
+
gz = True
|
|
109
|
+
|
|
110
|
+
if "O" not in envs and "output-type" not in envs and "output_type" not in envs:
|
|
111
|
+
envs["O"] = "z" if gz else "v"
|
|
112
|
+
|
|
113
|
+
run_bcftools(envs, bcftools=bcftools, index=index, tabix=tabix)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from contextlib import suppress
|
|
2
|
+
# In case there are paths passed to envs
|
|
3
|
+
from pathlib import PosixPath # noqa: F401
|
|
4
|
+
|
|
5
|
+
from biopipen.utils.misc import logger
|
|
6
|
+
from biopipen.utils.reference import tabix_index
|
|
7
|
+
from biopipen.scripts.vcf.bcftools_utils import run_bcftools
|
|
8
|
+
|
|
9
|
+
infile: str = {{in.infile | quote}} # pyright: ignore # noqa: #999
|
|
10
|
+
regions_file: str = {{in.regions_file | quote}} # pyright: ignore
|
|
11
|
+
samples_file: str = {{in.samples_file | quote}} # pyright: ignore
|
|
12
|
+
outfile: str = {{out.outfile | quote}} # pyright: ignore
|
|
13
|
+
envs: dict = {{envs | dict | repr}} # pyright: ignore
|
|
14
|
+
|
|
15
|
+
bcftools = envs.pop("bcftools")
|
|
16
|
+
tabix = envs.pop("tabix")
|
|
17
|
+
ncores = envs.pop("ncores")
|
|
18
|
+
gz = envs.pop("gz")
|
|
19
|
+
index = envs.pop("index")
|
|
20
|
+
|
|
21
|
+
if regions_file:
|
|
22
|
+
if "R" in envs or "regions_file" in envs or "regions-file" in envs:
|
|
23
|
+
logger.warning(
|
|
24
|
+
r"Ignoring envs\[regions_file/regions-file/R] "
|
|
25
|
+
"because in.regionsfile is provided."
|
|
26
|
+
)
|
|
27
|
+
with suppress(KeyError):
|
|
28
|
+
del envs["regions_file"]
|
|
29
|
+
with suppress(KeyError):
|
|
30
|
+
del envs["regions-file"]
|
|
31
|
+
with suppress(KeyError):
|
|
32
|
+
del envs["R"]
|
|
33
|
+
elif "R" in envs or "regions_file" in envs or "regions-file" in envs:
|
|
34
|
+
regions_file = (
|
|
35
|
+
envs.pop("regions_file", None)
|
|
36
|
+
or envs.pop("regions-file", None)
|
|
37
|
+
or envs.pop("R", None)
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
if samples_file:
|
|
41
|
+
if "S" in envs or "samples_file" in envs or "samples-file" in envs:
|
|
42
|
+
logger.warning(
|
|
43
|
+
"Ignoring envs[samples_file/samples-file/S] "
|
|
44
|
+
"because in.samples_file is provided."
|
|
45
|
+
)
|
|
46
|
+
with suppress(KeyError):
|
|
47
|
+
del envs["samples_file"]
|
|
48
|
+
with suppress(KeyError):
|
|
49
|
+
del envs["samples-file"]
|
|
50
|
+
with suppress(KeyError):
|
|
51
|
+
del envs["S"]
|
|
52
|
+
elif "S" in envs or "samples_file" in envs or "samples-file" in envs:
|
|
53
|
+
samples_file = (
|
|
54
|
+
envs.pop("samples_file", None)
|
|
55
|
+
or envs.pop("samples-file", None)
|
|
56
|
+
or envs.pop("S", None)
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
if index and not gz:
|
|
60
|
+
logger.warning("Forcing envs.gz to True because envs.index is True.")
|
|
61
|
+
gz = True
|
|
62
|
+
|
|
63
|
+
if "O" not in envs and "output-type" not in envs and "output_type" not in envs:
|
|
64
|
+
envs["O"] = "z" if gz else "v"
|
|
65
|
+
|
|
66
|
+
envs[""] = [bcftools, "view"]
|
|
67
|
+
envs["_"] = tabix_index(infile, "vcf", tabix=tabix)
|
|
68
|
+
envs["o"] = outfile
|
|
69
|
+
envs["threads"] = ncores
|
|
70
|
+
envs["regions_file"] = regions_file
|
|
71
|
+
envs["samples_file"] = samples_file
|
|
72
|
+
|
|
73
|
+
run_bcftools(envs, bcftools=bcftools, index=index, tabix=tabix)
|
|
@@ -1,13 +1,15 @@
|
|
|
1
|
+
# shellcheck disable=SC1083
|
|
1
2
|
compvcf={{in.compvcf | quote}}
|
|
2
3
|
basevcf={{in.basevcf | quote}}
|
|
3
4
|
outdir={{out.outdir | quote}}
|
|
4
5
|
truvari={{envs.truvari | quote}}
|
|
5
6
|
ref={{envs.ref | quote}}
|
|
6
7
|
refdist={{envs.refdist | quote}}
|
|
7
|
-
|
|
8
|
+
pctseq={{envs.pctseq | quote}}
|
|
8
9
|
pctsize={{envs.pctsize | quote}}
|
|
9
10
|
pctovl={{envs.pctovl | quote}}
|
|
10
11
|
sizemax={{envs.sizemax | default: 50000 | quote}}
|
|
12
|
+
# shellcheck disable=SC1054
|
|
11
13
|
{% if envs.typeignore %}
|
|
12
14
|
typeignore="--typeignore"
|
|
13
15
|
{% else %}
|
|
@@ -15,20 +17,25 @@ typeignore=""
|
|
|
15
17
|
{% endif %}
|
|
16
18
|
{% if envs.multimatch %}
|
|
17
19
|
multimatch="--multimatch"
|
|
20
|
+
# shellcheck disable=SC1009
|
|
18
21
|
{% else %}
|
|
19
22
|
multimatch=""
|
|
23
|
+
# shellcheck disable=SC1073
|
|
20
24
|
{% endif %}
|
|
21
25
|
|
|
22
26
|
rm -rf $outdir
|
|
23
|
-
$truvari bench \
|
|
24
|
-
-c
|
|
25
|
-
-b
|
|
26
|
-
-f
|
|
27
|
+
cmd="$truvari bench \
|
|
28
|
+
-c '$compvcf' \
|
|
29
|
+
-b '$basevcf' \
|
|
30
|
+
-f '$ref' \
|
|
27
31
|
--refdist $refdist \
|
|
28
|
-
--
|
|
32
|
+
--pctseq $pctseq \
|
|
29
33
|
--pctsize $pctsize \
|
|
30
34
|
--pctovl $pctovl \
|
|
31
35
|
--sizemax $sizemax \
|
|
32
36
|
$typeignore \
|
|
33
37
|
$multimatch \
|
|
34
|
-
-o $outdir
|
|
38
|
+
-o $outdir"
|
|
39
|
+
|
|
40
|
+
echo "$cmd"
|
|
41
|
+
eval "$cmd"
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
-
source("{{biopipen_dir}}/utils/plot.R")
|
|
3
|
-
|
|
4
|
-
library(ggprism)
|
|
5
1
|
library(rjson)
|
|
2
|
+
library(rlang)
|
|
6
3
|
library(dplyr)
|
|
7
|
-
|
|
8
|
-
theme_set(theme_prism(axis_text_angle = 90))
|
|
4
|
+
library(plotthis)
|
|
9
5
|
|
|
10
6
|
indirs = {{in.indirs | r}}
|
|
11
7
|
outdir = {{out.outdir | r}}
|
|
@@ -17,7 +13,7 @@ read_summary = function() {
|
|
|
17
13
|
|
|
18
14
|
summaries = NULL
|
|
19
15
|
for (indir in indirs) {
|
|
20
|
-
summary = fromJSON(file=file.path(indir, "summary.
|
|
16
|
+
summary = fromJSON(file=file.path(indir, "summary.json"))
|
|
21
17
|
summary$gt_matrix = NULL
|
|
22
18
|
summary$Sample = sub(".truvari_bench", "", basename(indir), fixed=T)
|
|
23
19
|
summaries = bind_rows(summaries, summary)
|
|
@@ -39,14 +35,21 @@ get_devpars = function() {
|
|
|
39
35
|
|
|
40
36
|
plot_summary = function(col) {
|
|
41
37
|
outfile = file.path(outdir, paste0(col, ".png"))
|
|
42
|
-
|
|
38
|
+
p <- plotthis::BarPlot(
|
|
43
39
|
summaries,
|
|
44
|
-
"
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
40
|
+
x = "Sample",
|
|
41
|
+
y = col,
|
|
42
|
+
x_text_angle = 90
|
|
43
|
+
)
|
|
44
|
+
devpars <- get_devpars()
|
|
45
|
+
png(
|
|
46
|
+
filename = outfile,
|
|
47
|
+
width = devpars$width,
|
|
48
|
+
height = devpars$height,
|
|
49
|
+
res = devpars$res
|
|
49
50
|
)
|
|
51
|
+
print(p)
|
|
52
|
+
dev.off()
|
|
50
53
|
}
|
|
51
54
|
|
|
52
55
|
main = function() {
|
biopipen/scripts/vcf/Vcf2Bed.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from cyvcf2 import VCF, Variant
|
|
2
2
|
|
|
3
|
-
infile = {{in.infile | quote}} # pyright: ignore
|
|
4
|
-
outfile = {{out.outfile | quote}} # pyright: ignore
|
|
3
|
+
infile: str = {{in.infile | quote}} # pyright: ignore # noqa: E999
|
|
4
|
+
outfile: str = {{out.outfile | quote}} # pyright: ignore
|
|
5
5
|
# vcf, default 1
|
|
6
6
|
inbase = {{envs.inbase | int}} # pyright: ignore
|
|
7
7
|
# bed, default 0
|
biopipen/scripts/vcf/VcfAnno.py
CHANGED
|
@@ -2,22 +2,22 @@ from os import path
|
|
|
2
2
|
|
|
3
3
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
4
4
|
|
|
5
|
-
infile = {{in.infile | quote}} # pyright: ignore
|
|
6
|
-
outfile = {{out.outfile | quote}} # pyright: ignore
|
|
7
|
-
joboutdir = {{job.outdir | quote}} # pyright: ignore
|
|
5
|
+
infile: str = {{in.infile | quote}} # pyright: ignore # noqa
|
|
6
|
+
outfile: str = {{out.outfile | quote}} # pyright: ignore
|
|
7
|
+
joboutdir: str = {{job.outdir | quote}} # pyright: ignore
|
|
8
8
|
vcfanno = {{envs.vcfanno | quote}} # pyright: ignore
|
|
9
|
-
ncores = {{envs.ncores | repr}} # pyright: ignore
|
|
10
|
-
args = {{envs.args |
|
|
9
|
+
ncores: int = {{envs.ncores | repr}} # pyright: ignore
|
|
10
|
+
args: dict = {{envs.args | dict}} # pyright: ignore
|
|
11
11
|
|
|
12
|
-
{% set conf = envs.conffile or in.conffile %}
|
|
13
|
-
{% if conf | isinstance: dict %}
|
|
12
|
+
{% set conf = envs.conffile or in.conffile %} # pyright: ignore # noqa
|
|
13
|
+
{% if conf | isinstance: dict %} # pyright: ignore # noqa
|
|
14
14
|
conffile = path.join(joboutdir, "config.toml")
|
|
15
|
-
conf = {{ conf | toml | quote }}
|
|
15
|
+
conf: str = {{ conf | toml | quote }} # pyright: ignore # noqa
|
|
16
16
|
with open(conffile, "w") as f:
|
|
17
17
|
f.write(conf)
|
|
18
|
-
{% else %}
|
|
19
|
-
conffile = {{conf | quote}}
|
|
20
|
-
{% endif %}
|
|
18
|
+
{% else %} # pyright: ignore # noqa
|
|
19
|
+
conffile = {{conf | quote}} # pyright: ignore # noqa
|
|
20
|
+
{% endif %} # pyright: ignore # noqa
|
|
21
21
|
|
|
22
22
|
args["p"] = ncores
|
|
23
23
|
args["_"] = [conffile, infile]
|
|
@@ -1,25 +1,37 @@
|
|
|
1
|
+
# shellcheck disable=SC2148
|
|
2
|
+
# shellcheck disable=SC2036
|
|
3
|
+
# shellcheck disable=SC2030
|
|
4
|
+
# shellcheck disable=SC1083
|
|
5
|
+
# shellcheck disable=SC2288
|
|
1
6
|
infile={{in.infile | quote}}
|
|
2
7
|
outfile={{out.outfile | quote}}
|
|
3
8
|
n={{envs.n}}
|
|
4
9
|
|
|
10
|
+
# shellcheck disable=SC2031
|
|
5
11
|
if [[ $infile == *.gz ]]; then
|
|
6
|
-
outfile=$(echo $outfile | sed -r "s/\.gz$//")
|
|
7
|
-
|
|
12
|
+
outfile=$(echo "$outfile" | sed -r "s/\.gz$//")
|
|
13
|
+
# shellcheck disable=SC2126
|
|
14
|
+
nheader=$(zcat "$infile" | head -n 9999 | grep "^#" | wc -l | cut -d' ' -f1)
|
|
8
15
|
if [[ ! $n -gt 1 ]]; then
|
|
9
|
-
nrows=$(zcat $infile | wc -l | cut -d' ' -f1)
|
|
16
|
+
nrows=$(zcat "$infile" | wc -l | cut -d' ' -f1)
|
|
17
|
+
# shellcheck disable=SC2004
|
|
10
18
|
nvars=$(($nrows - $nheader))
|
|
11
19
|
n=$(echo "$nvars * $n" | bc)
|
|
12
20
|
fi
|
|
13
|
-
zcat $infile | head -n $nheader > $outfile
|
|
14
|
-
|
|
15
|
-
|
|
21
|
+
zcat "$infile" | head -n "$nheader" > "$outfile"
|
|
22
|
+
# shellcheck disable=SC2004
|
|
23
|
+
zcat "$infile" | tail -n +$(($nheader + 1)) | shuf -n "$n" | LC_ALL=C sort -k1,1V -k2,2n >> "$outfile"
|
|
24
|
+
bgzip "$outfile"
|
|
16
25
|
else
|
|
17
|
-
|
|
26
|
+
# shellcheck disable=SC2126
|
|
27
|
+
nheader=$(head -n 9999 "$infile" | grep "^#" | wc -l | cut -d' ' -f1)
|
|
18
28
|
if [[ ! $n -gt 1 ]]; then
|
|
19
|
-
nrows=$(wc -l $infile | cut -d' ' -f1)
|
|
29
|
+
nrows=$(wc -l "$infile" | cut -d' ' -f1)
|
|
30
|
+
# shellcheck disable=SC2004
|
|
20
31
|
nvars=$(($nrows - $nheader))
|
|
21
32
|
n=$(echo "$nvars * $n" | bc)
|
|
22
33
|
fi
|
|
23
|
-
head -n $nheader $infile > $outfile
|
|
24
|
-
|
|
34
|
+
head -n "$nheader" "$infile" > "$outfile"
|
|
35
|
+
# shellcheck disable=SC2004
|
|
36
|
+
tail -n +$(($nheader + 1)) "$infile" | shuf -n "$n" | LC_ALL=C sort -k1,1V -k2,2n >> "$outfile"
|
|
25
37
|
fi
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
from cyvcf2 import VCF, Writer, Variant
|
|
2
2
|
|
|
3
|
-
infile = {{in.invcf |
|
|
4
|
-
outfile = {{out.outfile |
|
|
3
|
+
infile: str = {{in.invcf | quote}} # pyright: ignore # noqa: E999
|
|
4
|
+
outfile: str = {{out.outfile | quote}} # pyright: ignore
|
|
5
5
|
|
|
6
|
-
{{envs.helper}}
|
|
6
|
+
{{envs.helper}} # pyright: ignore # noqa: E999
|
|
7
7
|
|
|
8
8
|
keep = {{envs.keep | repr}} # pyright: ignore
|
|
9
|
-
filters = {{envs.filters | repr}} # pyright: ignore
|
|
10
|
-
filter_descs = {{envs.filter_descs | repr}} # pyright: ignore
|
|
9
|
+
filters: dict = {{envs.filters | repr}} # pyright: ignore
|
|
10
|
+
filter_descs: dict = {{envs.filter_descs | repr}} # pyright: ignore
|
|
11
11
|
|
|
12
12
|
# builtin filters
|
|
13
13
|
BUILTIN_FILTERS = {}
|
biopipen/scripts/vcf/VcfFix.py
CHANGED
|
@@ -7,17 +7,17 @@ from biopipen.scripts.vcf.VcfFix_utils import ( # noqa: F401
|
|
|
7
7
|
HeaderContig,
|
|
8
8
|
HeaderGeneral,
|
|
9
9
|
Fields,
|
|
10
|
-
Info,
|
|
11
|
-
Format,
|
|
12
|
-
Alt,
|
|
13
|
-
Filter,
|
|
14
|
-
Sample,
|
|
15
|
-
Samples,
|
|
10
|
+
# Info,
|
|
11
|
+
# Format,
|
|
12
|
+
# Alt,
|
|
13
|
+
# Filter,
|
|
14
|
+
# Sample,
|
|
15
|
+
# Samples,
|
|
16
16
|
Variant,
|
|
17
17
|
)
|
|
18
18
|
from biopipen.scripts.vcf.VcfFix_utils import fix_vcffile
|
|
19
19
|
|
|
20
|
-
infile = {{in.infile | quote}} # pyright: ignore
|
|
20
|
+
infile = {{in.infile | quote}} # pyright: ignore # noqa: E999
|
|
21
21
|
instem = {{in.infile | stem | quote}} # pyright: ignore
|
|
22
22
|
outfile = {{out.outfile | quote}} # pyright: ignore
|
|
23
23
|
|
|
@@ -1,6 +1,15 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import gzip
|
|
3
|
-
from biopipen.utils.vcf import
|
|
3
|
+
from biopipen.utils.vcf import (
|
|
4
|
+
HeaderInfo,
|
|
5
|
+
HeaderFormat,
|
|
6
|
+
HeaderFilter,
|
|
7
|
+
HeaderContig,
|
|
8
|
+
HeaderGeneral,
|
|
9
|
+
Fields,
|
|
10
|
+
Variant,
|
|
11
|
+
HeaderItem,
|
|
12
|
+
)
|
|
4
13
|
|
|
5
14
|
|
|
6
15
|
def line_to_obj(line: str):
|
|
@@ -41,7 +50,7 @@ def handle_obj(obj, fixes: dict):
|
|
|
41
50
|
|
|
42
51
|
regex = fix.get("regex")
|
|
43
52
|
if regex:
|
|
44
|
-
if not re.search(regex, obj.raw):
|
|
53
|
+
if not re.search(regex, obj.raw): # type: ignore
|
|
45
54
|
continue
|
|
46
55
|
|
|
47
56
|
return fix["fix"](obj.raw if kind is None else obj)
|
|
@@ -63,11 +72,11 @@ def fix_vcffile(vcffile, outfile, fixes):
|
|
|
63
72
|
else:
|
|
64
73
|
modify_fixes.append(fix)
|
|
65
74
|
|
|
66
|
-
inopen = gzip.open if vcffile.endswith(".gz") else open
|
|
75
|
+
inopen = gzip.open if str(vcffile).endswith(".gz") else open
|
|
67
76
|
with inopen(vcffile, "rt") as fin, open(outfile, "w") as fout:
|
|
68
77
|
for line in fin:
|
|
69
78
|
obj = line_to_obj(line)
|
|
70
|
-
out = handle_obj(obj, modify_fixes)
|
|
79
|
+
out = handle_obj(obj, modify_fixes) # type: ignore
|
|
71
80
|
if obj.kind == "fields":
|
|
72
81
|
for fix in header_append_fixes:
|
|
73
82
|
fout.write(str(fix["fix"](None)).rstrip("\n") + "\n")
|
biopipen/scripts/vcf/VcfIndex.py
CHANGED
|
@@ -4,10 +4,10 @@ from os import path
|
|
|
4
4
|
from biopipen.utils.reference import tabix_index
|
|
5
5
|
from biopipen.utils.misc import run_command
|
|
6
6
|
|
|
7
|
-
infile = {{in.infile |
|
|
8
|
-
outfile = Path({{out.outfile |
|
|
7
|
+
infile: str = {{in.infile | quote}} # pyright: ignore # noqa
|
|
8
|
+
outfile = Path({{out.outfile | quote}}) # pyright: ignore
|
|
9
9
|
outidx = {{out.outidx | repr}} # pyright: ignore
|
|
10
|
-
tabix = {{envs.tabix | repr}} # pyright: ignore
|
|
10
|
+
tabix: str = {{envs.tabix | repr}} # pyright: ignore
|
|
11
11
|
ncores = {{envs.ncores | repr}} # pyright: ignore
|
|
12
12
|
|
|
13
13
|
outfile_with_index = tabix_index(infile, "vcf", outfile.parent, tabix)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
2
2
|
|
|
3
|
-
infile1 = {{in.infile1 |
|
|
4
|
-
infile2 = {{in.infile2 |
|
|
5
|
-
outfile = {{out.outfile |
|
|
3
|
+
infile1: str = {{in.infile1 | quote}} # pyright: ignore # noqa
|
|
4
|
+
infile2 = {{in.infile2 | quote}} # pyright: ignore
|
|
5
|
+
outfile = {{out.outfile | quote}} # pyright: ignore
|
|
6
6
|
bcftools = {{envs.bcftools | repr}} # pyright: ignore
|
|
7
7
|
gz = {{envs.gz | repr}} # pyright: ignore
|
|
8
8
|
index = {{envs.index | repr}} # pyright: ignore
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# shellcheck disable=SC2148
|
|
2
|
+
# shellcheck disable=SC1083
|
|
1
3
|
invcf={{ in.invcf | quote }}
|
|
2
4
|
outvcf={{ out.outvcf | quote }}
|
|
3
5
|
rejfile={{ job.outdir | joinpaths: "rejected.vcf" | quote }}
|
|
@@ -6,12 +8,15 @@ chain={{ envs.chain | quote }}
|
|
|
6
8
|
reffa={{ envs.reffa | quote }}
|
|
7
9
|
args={{ envs.args | dict_to_cli_args: join=True }}
|
|
8
10
|
|
|
11
|
+
# shellcheck disable=SC2154
|
|
9
12
|
refdict="${reffa%.fa}.dict"
|
|
10
13
|
if [[ ! -e "$refdict" ]]; then
|
|
11
14
|
echo "Sequence dictionary does not exist: $refdict" 1>&2
|
|
12
15
|
exit 1
|
|
13
16
|
fi
|
|
14
17
|
|
|
18
|
+
# shellcheck disable=SC2154
|
|
19
|
+
# shellcheck disable=SC2086
|
|
15
20
|
$gatk LiftoverVcf \
|
|
16
21
|
$args \
|
|
17
22
|
--INPUT "$invcf" \
|
|
@@ -3,12 +3,12 @@ import shlex
|
|
|
3
3
|
import concurrent.futures
|
|
4
4
|
from subprocess import Popen, check_output
|
|
5
5
|
|
|
6
|
-
infile = {{in.infile |
|
|
7
|
-
outdir = {{out.outdir |
|
|
8
|
-
bcftools = {{envs.bcftools | repr}} # pyright: ignore
|
|
6
|
+
infile: str = {{in.infile | quote}} # pyright: ignore # noqa
|
|
7
|
+
outdir: str = {{out.outdir | quote}} # pyright: ignore
|
|
8
|
+
bcftools: str = {{envs.bcftools | repr}} # pyright: ignore
|
|
9
9
|
gz = {{envs.gz | repr}} # pyright: ignore
|
|
10
10
|
index = {{envs.index | repr}} # pyright: ignore
|
|
11
|
-
ncores = {{envs.ncores | int}} # pyright: ignore
|
|
11
|
+
ncores: int = {{envs.ncores | int}} # pyright: ignore
|
|
12
12
|
private = {{envs.private | repr}} # pyright: ignore
|
|
13
13
|
|
|
14
14
|
if index:
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Utilities for bcftools"""
|
|
2
|
+
|
|
3
|
+
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
4
|
+
from biopipen.utils.reference import tabix_index
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def bcftools_version(bcftools: str) -> tuple[int, ...]:
|
|
8
|
+
"""Get the version of bcftools
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
bcftools (str): Path to bcftools
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
tuple[int, ...]: The version of bcftools
|
|
15
|
+
"""
|
|
16
|
+
bversion = (
|
|
17
|
+
run_command([bcftools, "version"], stdout="return")
|
|
18
|
+
.splitlines()[0] # bcftools 1.20 # type: ignore
|
|
19
|
+
.replace("bcftools", "")
|
|
20
|
+
.strip() # 1.20
|
|
21
|
+
.split(".")
|
|
22
|
+
)
|
|
23
|
+
return tuple(map(int, bversion))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def run_bcftools(
|
|
27
|
+
args: dict,
|
|
28
|
+
bcftools: str, # TODO: get from the first argument of args
|
|
29
|
+
index: bool,
|
|
30
|
+
tabix: str
|
|
31
|
+
) -> None:
|
|
32
|
+
"""Run bcftools with the given arguments
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
args: Arguments to pass to bcftools
|
|
36
|
+
bcftools (str): Path to bcftools
|
|
37
|
+
index (bool): Whether to index the output
|
|
38
|
+
tabix (str): Path to tabix
|
|
39
|
+
"""
|
|
40
|
+
if not index:
|
|
41
|
+
run_command(dict_to_cli_args(args, dashify=True), fg=True)
|
|
42
|
+
else:
|
|
43
|
+
bversion = bcftools_version(bcftools)
|
|
44
|
+
if bversion >= (1, 20):
|
|
45
|
+
# requires bcftools 1.20+
|
|
46
|
+
# '--write-index tbi' not working
|
|
47
|
+
# it has to be '--write-index=tbi'
|
|
48
|
+
args["write_index=tbi"] = True
|
|
49
|
+
run_command(dict_to_cli_args(args, dashify=True), fg=True)
|
|
50
|
+
else:
|
|
51
|
+
run_command(dict_to_cli_args(args, dashify=True), fg=True)
|
|
52
|
+
tabix_index(args["o"], "vcf", tmpdir=False, tabix=tabix)
|
biopipen/scripts/web/Download.py
CHANGED
|
@@ -2,13 +2,13 @@ from pathlib import Path
|
|
|
2
2
|
|
|
3
3
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
4
4
|
|
|
5
|
-
url = {{in.url |
|
|
6
|
-
outfile = Path({{out.outfile |
|
|
5
|
+
url = {{in.url | quote}} # pyright: ignore # noqa
|
|
6
|
+
outfile = Path({{out.outfile | quote}}) # pyright: ignore
|
|
7
7
|
tool = {{envs.tool | repr}} # pyright: ignore
|
|
8
8
|
wget = {{envs.wget | repr}} # pyright: ignore
|
|
9
9
|
aria2c = {{envs.aria2c | repr}} # pyright: ignore
|
|
10
10
|
ncores = {{envs.ncores | repr}} # pyright: ignore
|
|
11
|
-
args = {{envs.args | dict}} # pyright: ignore
|
|
11
|
+
args: dict = {{envs.args | dict}} # pyright: ignore
|
|
12
12
|
|
|
13
13
|
if tool == "wget":
|
|
14
14
|
args["_"] = url
|
|
@@ -28,4 +28,8 @@ elif tool == "aria2c":
|
|
|
28
28
|
|
|
29
29
|
else: # use python
|
|
30
30
|
import urllib
|
|
31
|
-
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
urllib.urlretrieve(url, outfile) # type: ignore
|
|
34
|
+
except AttributeError:
|
|
35
|
+
urllib.request.urlretrieve(url, outfile) # type: ignore
|
|
@@ -2,13 +2,13 @@ from pathlib import Path
|
|
|
2
2
|
|
|
3
3
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
4
4
|
|
|
5
|
-
urlfile = {{in.urlfile |
|
|
6
|
-
outdir = Path({{out.outdir |
|
|
5
|
+
urlfile: str = {{in.urlfile | quote}} # pyright: ignore # noqa
|
|
6
|
+
outdir = Path({{out.outdir | quote}}) # pyright: ignore
|
|
7
7
|
tool = {{envs.tool | repr}} # pyright: ignore
|
|
8
8
|
wget = {{envs.wget | repr}} # pyright: ignore
|
|
9
9
|
aria2c = {{envs.aria2c | repr}} # pyright: ignore
|
|
10
10
|
ncores = {{envs.ncores | repr}} # pyright: ignore
|
|
11
|
-
args = {{envs.args | repr}} # pyright: ignore
|
|
11
|
+
args: dict = {{envs.args | repr}} # pyright: ignore
|
|
12
12
|
|
|
13
13
|
if tool == "wget":
|
|
14
14
|
args["i"] = urlfile
|
|
@@ -26,10 +26,10 @@ elif tool == "aria2c":
|
|
|
26
26
|
run_command(dict_to_cli_args(args, dashify=True), fg=True)
|
|
27
27
|
|
|
28
28
|
else: # use python
|
|
29
|
-
import
|
|
29
|
+
from urllib.request import urlretrieve
|
|
30
30
|
from urllib.parse import urlparse
|
|
31
31
|
with open(urlfile, "r") as furl:
|
|
32
32
|
for i, url in enumerate(furl.readlines()):
|
|
33
33
|
parsed = urlparse(url)
|
|
34
34
|
path = Path(parsed.path)
|
|
35
|
-
|
|
35
|
+
urlretrieve(url, f"{path.stem}-{i}{path.suffix}")
|