biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
biopipen/scripts/bam/CNVpytor.py
CHANGED
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
+
import warnings
|
|
3
4
|
import pandas
|
|
4
|
-
from
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from diot import Diot # pyright: ignore
|
|
5
7
|
from biopipen.utils.reference import bam_index
|
|
6
|
-
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
8
|
+
from biopipen.utils.misc import run_command, dict_to_cli_args, logger
|
|
7
9
|
|
|
8
|
-
bamfile = {{in.bamfile | quote}} # pyright: ignore
|
|
9
|
-
snpfile = {{in.snpfile |
|
|
10
|
+
bamfile: str = {{in.bamfile | quote}} # pyright: ignore # noqa
|
|
11
|
+
snpfile: str = {{in.snpfile | quote}} # pyright: ignore
|
|
10
12
|
outdir = Path({{out.outdir | quote}}) # pyright: ignore
|
|
11
|
-
cnvpytor = {{envs.cnvpytor | quote}} # pyright: ignore
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
ncores = {{envs.ncores | int}} # pyright: ignore
|
|
13
|
+
cnvpytor: str = {{envs.cnvpytor | quote}} # pyright: ignore
|
|
14
|
+
samtools: str = {{envs.samtools | quote}} # pyright: ignore
|
|
15
|
+
ncores: int = {{envs.ncores | int}} # pyright: ignore
|
|
15
16
|
refdir = {{envs.refdir | quote}} # pyright: ignore
|
|
16
17
|
genome = {{envs.genome | quote}} # pyright: ignore
|
|
17
|
-
chrsize = {{envs.chrsize | quote}} # pyright: ignore
|
|
18
|
-
filters = {{envs.filters | repr}} # pyright: ignore
|
|
19
|
-
args = {{envs | repr}} # pyright: ignore
|
|
18
|
+
chrsize: str = {{envs.chrsize | quote}} # pyright: ignore
|
|
19
|
+
filters: dict = {{envs.filters | repr}} # pyright: ignore
|
|
20
|
+
args: Diot = {{envs | repr}} # pyright: ignore
|
|
20
21
|
|
|
21
22
|
del args['cnvpytor']
|
|
22
23
|
del args['ncores']
|
|
23
|
-
del args['cnvnator2vcf']
|
|
24
24
|
del args['samtools']
|
|
25
25
|
del args['refdir']
|
|
26
26
|
del args['genome']
|
|
@@ -28,7 +28,7 @@ del args['chrsize']
|
|
|
28
28
|
del args['filters']
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
bamfile = bam_index(bamfile, outdir, samtools, ncores)
|
|
31
|
+
bamfile: Path = bam_index(bamfile, str(outdir), samtools, ncores=ncores)
|
|
32
32
|
|
|
33
33
|
NOSNP_COLS = [
|
|
34
34
|
"CNVtype",
|
|
@@ -236,47 +236,138 @@ def load_chrsize():
|
|
|
236
236
|
yield chrom, int(size)
|
|
237
237
|
|
|
238
238
|
|
|
239
|
-
def
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
{
|
|
245
|
-
"": cnvpytor2vcf,
|
|
246
|
-
"reference": genome,
|
|
247
|
-
"_": [infile, refdir],
|
|
248
|
-
},
|
|
249
|
-
prefix="-",
|
|
250
|
-
),
|
|
251
|
-
stdout="return",
|
|
252
|
-
)
|
|
253
|
-
if fix:
|
|
254
|
-
unfixedfile.write_text(stdout)
|
|
239
|
+
def parse_chrom(chrom, chromdir):
|
|
240
|
+
file = Path(chromdir) / f"{chrom}.fa"
|
|
241
|
+
if not file.exists():
|
|
242
|
+
warnings.warn(f"Chromosome file not found in refdir: {chrom}")
|
|
243
|
+
return ""
|
|
255
244
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
245
|
+
seq = ""
|
|
246
|
+
with open(file) as f:
|
|
247
|
+
for line in f:
|
|
248
|
+
line = line.strip()
|
|
249
|
+
if not line:
|
|
250
|
+
continue
|
|
251
|
+
if line.startswith(">"):
|
|
252
|
+
seq = ""
|
|
253
|
+
else:
|
|
254
|
+
seq += line
|
|
255
|
+
return seq
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def cnvpytor2vcf(infile, snp):
|
|
259
|
+
# snp: in case to be used in the future
|
|
260
|
+
outfile = Path(infile).with_suffix(f".vcf")
|
|
261
|
+
# stdout = run_command(
|
|
262
|
+
# dict_to_cli_args(
|
|
263
|
+
# {
|
|
264
|
+
# "": cnvnator2vcf,
|
|
265
|
+
# "reference": genome,
|
|
266
|
+
# "_": [infile, refdir],
|
|
267
|
+
# },
|
|
268
|
+
# prefix="-",
|
|
269
|
+
# ),
|
|
270
|
+
# stdout="return",
|
|
271
|
+
# )
|
|
272
|
+
## command hangs
|
|
273
|
+
with open(infile) as fin, open(outfile, "w") as fout:
|
|
274
|
+
fout.write("##fileformat=VCFv4.2\n")
|
|
275
|
+
fout.write(f"##fileDate={datetime.now().strftime('%Y%m%d')}\n")
|
|
276
|
+
fout.write(f"##reference={genome}\n")
|
|
277
|
+
fout.write(f"##source=CNVpytor\n")
|
|
267
278
|
for chrom, size in load_chrsize():
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
279
|
+
fout.write(f"##contig=<ID={chrom},length={size}>\n")
|
|
280
|
+
fout.write('##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">\n')
|
|
281
|
+
fout.write('##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">\n')
|
|
282
|
+
fout.write('##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">\n')
|
|
283
|
+
fout.write('##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">\n')
|
|
284
|
+
fout.write('##INFO=<ID=natorRD,Number=1,Type=Float,Description="Normalized RD">\n')
|
|
285
|
+
fout.write('##INFO=<ID=natorP1,Number=1,Type=Float,Description="e-val by t-test">\n')
|
|
286
|
+
fout.write('##INFO=<ID=natorP2,Number=1,Type=Float,Description="e-val by Gaussian tail">\n')
|
|
287
|
+
fout.write('##INFO=<ID=natorP3,Number=1,Type=Float,Description="e-val by t-test (middle)">\n')
|
|
288
|
+
fout.write('##INFO=<ID=natorP4,Number=1,Type=Float,Description="e-val by Gaussian tail (middle)">\n')
|
|
289
|
+
fout.write('##INFO=<ID=natorQ0,Number=1,Type=Float,Description="Fraction of reads with 0 mapping quality">\n')
|
|
290
|
+
fout.write('##INFO=<ID=natorPE,Number=1,Type=Integer,Description="Number of paired-ends support the event">\n')
|
|
291
|
+
fout.write('##INFO=<ID=SAMPLES,Number=.,Type=String,Description="Sample genotyped to have the variant">\n')
|
|
292
|
+
fout.write('##ALT=<ID=DEL,Description="Deletion">\n')
|
|
293
|
+
fout.write('##ALT=<ID=DUP,Description="Duplication">\n')
|
|
294
|
+
fout.write('##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n')
|
|
295
|
+
fout.write('##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">\n')
|
|
296
|
+
fout.write('##FORMAT=<ID=PE,Number=1,Type=String,Description="Number of paired-ends that support the event">\n')
|
|
297
|
+
fout.write(f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{bamfile.stem}\n")
|
|
298
|
+
prev_chrom, chrom_seq, count = "", "", 0
|
|
299
|
+
for line in fin:
|
|
300
|
+
# type, coor, length, rd, p1, p2, p3, p4, q0, pe = line.strip("\n").split()
|
|
301
|
+
items = line.strip("\n").split()
|
|
302
|
+
type, coor, length = items[:3]
|
|
303
|
+
rd = float(items[3]) if len(items) > 3 else False
|
|
304
|
+
p1 = items[4] if len(items) > 4 else ""
|
|
305
|
+
p2 = items[5] if len(items) > 5 else ""
|
|
306
|
+
p3 = items[6] if len(items) > 6 else ""
|
|
307
|
+
p4 = items[7] if len(items) > 7 else ""
|
|
308
|
+
q0 = items[8] if len(items) > 8 else ""
|
|
309
|
+
pe = items[9] if len(items) > 9 else ""
|
|
310
|
+
chrom, pos = coor.split(":")
|
|
311
|
+
start, end = pos.split("-")
|
|
312
|
+
start, end = int(start), int(end)
|
|
313
|
+
is_del = type == "deletion"
|
|
314
|
+
is_dup = type == "duplication"
|
|
315
|
+
|
|
316
|
+
if not is_del and not is_dup:
|
|
317
|
+
warnings.warn(f"Skipping unrecognized CNV type: {type}")
|
|
318
|
+
continue
|
|
276
319
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
320
|
+
if chrom != prev_chrom:
|
|
321
|
+
chrom_seq = parse_chrom(chrom, refdir)
|
|
322
|
+
prev_chrom = chrom
|
|
323
|
+
|
|
324
|
+
count += 1
|
|
325
|
+
info = f"END={end}"
|
|
326
|
+
info += f";SVTYPE=DEL;SVLEN=-{length}" if is_del else f";SVTYPE=DUP;SVLEN={length}"
|
|
327
|
+
info += ";IMPRECISE"
|
|
328
|
+
info += f";natorRD={rd}" if rd is not False else ""
|
|
329
|
+
info += f";natorP1={p1}" if p1 else ""
|
|
330
|
+
info += f";natorP2={p2}" if p2 else ""
|
|
331
|
+
info += f";natorP3={p3}" if p3 else ""
|
|
332
|
+
info += f";natorP4={p4}" if p4 else ""
|
|
333
|
+
info += f";natorQ0={q0}" if q0 else ""
|
|
334
|
+
info += f";natorPE={pe}" if pe else ""
|
|
335
|
+
|
|
336
|
+
gt = "GT"
|
|
337
|
+
if rd is not False:
|
|
338
|
+
gt += ":CN"
|
|
339
|
+
gt += ":PE" if pe else ""
|
|
340
|
+
gt += "\t"
|
|
341
|
+
if is_del and rd < 0.25:
|
|
342
|
+
gt += "1/1:0"
|
|
343
|
+
elif is_del and rd >= 0.25:
|
|
344
|
+
gt += "0/1:1"
|
|
345
|
+
elif rd <= 1.75:
|
|
346
|
+
gt += "0/1:2"
|
|
347
|
+
elif rd > 1.75 and rd <= 2.25:
|
|
348
|
+
gt += "1/1:2"
|
|
349
|
+
elif rd > 2.25:
|
|
350
|
+
gt += f"./2:{rd:.0f}"
|
|
351
|
+
else:
|
|
352
|
+
gt = "GT:PE\t./." if pe else "GT\t./."
|
|
353
|
+
|
|
354
|
+
gt += f":{pe}" if pe else ""
|
|
355
|
+
else:
|
|
356
|
+
gt += "\t./."
|
|
357
|
+
|
|
358
|
+
fout.write("\t".join(
|
|
359
|
+
[
|
|
360
|
+
chrom,
|
|
361
|
+
str(start),
|
|
362
|
+
f"CNVpytor_{'del_' if is_del else 'dup_'}{count}",
|
|
363
|
+
chrom_seq[start - 1] if start < len(chrom_seq) else "N",
|
|
364
|
+
"<DEL>" if is_del else "<DUP>",
|
|
365
|
+
".",
|
|
366
|
+
"PASS",
|
|
367
|
+
info,
|
|
368
|
+
gt,
|
|
369
|
+
]
|
|
370
|
+
) + "\n")
|
|
280
371
|
|
|
281
372
|
|
|
282
373
|
def do_case():
|
|
@@ -290,7 +381,7 @@ def do_case():
|
|
|
290
381
|
rootfile = outdir / "file.pytor"
|
|
291
382
|
case["j"] = case.get("j", ncores)
|
|
292
383
|
|
|
293
|
-
|
|
384
|
+
logger.info("Reading depth signals ...")
|
|
294
385
|
run_command(
|
|
295
386
|
dict_to_cli_args(
|
|
296
387
|
{
|
|
@@ -305,7 +396,7 @@ def do_case():
|
|
|
305
396
|
fg=True,
|
|
306
397
|
)
|
|
307
398
|
|
|
308
|
-
|
|
399
|
+
logger.info("Predicting CNVs ...")
|
|
309
400
|
run_command(
|
|
310
401
|
dict_to_cli_args(
|
|
311
402
|
{
|
|
@@ -314,6 +405,7 @@ def do_case():
|
|
|
314
405
|
"his": binsizes,
|
|
315
406
|
},
|
|
316
407
|
prefix="-",
|
|
408
|
+
dup_key=False,
|
|
317
409
|
),
|
|
318
410
|
fg=True,
|
|
319
411
|
)
|
|
@@ -326,6 +418,7 @@ def do_case():
|
|
|
326
418
|
"partition": binsizes,
|
|
327
419
|
},
|
|
328
420
|
prefix="-",
|
|
421
|
+
dup_key=False,
|
|
329
422
|
),
|
|
330
423
|
fg=True,
|
|
331
424
|
)
|
|
@@ -336,6 +429,7 @@ def do_case():
|
|
|
336
429
|
mask_snps = snp.pop("mask_snps", True)
|
|
337
430
|
baf_nomask = snp.pop("baf_nomask", False)
|
|
338
431
|
|
|
432
|
+
logger.info("Importing SNP data ...")
|
|
339
433
|
run_command(
|
|
340
434
|
dict_to_cli_args(
|
|
341
435
|
{
|
|
@@ -350,6 +444,7 @@ def do_case():
|
|
|
350
444
|
)
|
|
351
445
|
|
|
352
446
|
if mask_snps:
|
|
447
|
+
logger.info("Masking 1000 Genome SNPs ...")
|
|
353
448
|
run_command(
|
|
354
449
|
dict_to_cli_args(
|
|
355
450
|
{
|
|
@@ -362,6 +457,7 @@ def do_case():
|
|
|
362
457
|
fg=True,
|
|
363
458
|
)
|
|
364
459
|
|
|
460
|
+
logger.info("Calculating BAF histograms ...")
|
|
365
461
|
run_command(
|
|
366
462
|
dict_to_cli_args(
|
|
367
463
|
{
|
|
@@ -375,8 +471,9 @@ def do_case():
|
|
|
375
471
|
fg=True,
|
|
376
472
|
)
|
|
377
473
|
|
|
378
|
-
|
|
474
|
+
logger.info("Predicting CNV regions using joint caller ...")
|
|
379
475
|
for binsize in binsizes:
|
|
476
|
+
logger.info(f"- binsize: {binsize}")
|
|
380
477
|
outfile = outdir / f"calls{'.combined' if snp is not False else ''}.{binsize}.tsv"
|
|
381
478
|
outfile_filtered = outdir / f"calls{'.combined' if snp is not False else ''}.{binsize}.filtered.tsv"
|
|
382
479
|
run_command(
|
|
@@ -392,6 +489,7 @@ def do_case():
|
|
|
392
489
|
stdout=outfile,
|
|
393
490
|
)
|
|
394
491
|
|
|
492
|
+
logger.info(" Converting to other formats ...")
|
|
395
493
|
cnvpytor2other(outfile, bool(snp), "gff")
|
|
396
494
|
cnvpytor2other(outfile, bool(snp), "bed")
|
|
397
495
|
cnvpytor2vcf(outfile, bool(snp))
|
|
@@ -424,6 +522,7 @@ def do_case():
|
|
|
424
522
|
cnvpytor2vcf(outfile_filtered, bool(snp))
|
|
425
523
|
|
|
426
524
|
# plots
|
|
525
|
+
logger.info(" Plotting ...")
|
|
427
526
|
manplot = outdir / f"manhattan.{binsize}.png"
|
|
428
527
|
run_command(
|
|
429
528
|
dict_to_cli_args(
|
|
@@ -1,24 +1,23 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import glob
|
|
3
|
-
import rtoml
|
|
4
3
|
import shutil
|
|
5
|
-
from diot import Diot
|
|
4
|
+
from diot import Diot # type: ignore
|
|
6
5
|
from biopipen.utils.misc import dict_to_cli_args, run_command
|
|
7
6
|
|
|
8
|
-
bamfile = {{ in.bamfile |
|
|
9
|
-
snpfile = {{ in.snpfile |
|
|
10
|
-
outdir = {{ out.outdir |
|
|
11
|
-
freec = {{ envs.freec | repr }} # pyright: ignore
|
|
7
|
+
bamfile = {{ in.bamfile | quote }} # pyright: ignore # noqa
|
|
8
|
+
snpfile = {{ in.snpfile | quote }} # pyright: ignore
|
|
9
|
+
outdir = {{ out.outdir | quote }} # pyright: ignore
|
|
10
|
+
freec: str = {{ envs.freec | repr }} # pyright: ignore
|
|
12
11
|
ncores = {{ envs.ncores | repr }} # pyright: ignore
|
|
13
12
|
bedtools = {{ envs.bedtools | repr }} # pyright: ignore
|
|
14
13
|
sambamba = {{ envs.sambamba | repr }} # pyright: ignore
|
|
15
14
|
samtools = {{ envs.samtools | repr }} # pyright: ignore
|
|
16
15
|
tabix = {{ envs.tabix | repr }} # pyright: ignore
|
|
17
|
-
rscript = {{ envs.rscript | repr }} # pyright: ignore
|
|
18
|
-
ref = {{ envs.ref |
|
|
19
|
-
refdir = {{ envs.refdir |
|
|
16
|
+
rscript: str = {{ envs.rscript | repr }} # pyright: ignore
|
|
17
|
+
ref = {{ envs.ref | quote }} # pyright: ignore
|
|
18
|
+
refdir = {{ envs.refdir | quote }} # pyright: ignore
|
|
20
19
|
binsize = {{ envs.binsize | repr }} # pyright: ignore
|
|
21
|
-
args = {{ envs.args |
|
|
20
|
+
args = {{ envs.args | dict }} # pyright: ignore
|
|
22
21
|
|
|
23
22
|
chrLenFile = f"{ref}.fai"
|
|
24
23
|
if snpfile:
|
|
@@ -33,7 +32,7 @@ if snpfile:
|
|
|
33
32
|
}
|
|
34
33
|
),
|
|
35
34
|
stdout="return",
|
|
36
|
-
).strip().splitlines()
|
|
35
|
+
).strip().splitlines() # type: ignore
|
|
37
36
|
|
|
38
37
|
kept_seqs = []
|
|
39
38
|
with open(chrLenFile, "r") as fin, open(chrLenFile2, "w") as fout:
|
|
@@ -79,7 +78,7 @@ config.BAF |= Diot(
|
|
|
79
78
|
|
|
80
79
|
os.makedirs(f"{outdir}/FREEC-output", exist_ok=True)
|
|
81
80
|
|
|
82
|
-
config_ini =
|
|
81
|
+
config_ini = config.to_toml().replace('"', "") # type: ignore
|
|
83
82
|
|
|
84
83
|
with open(configfile, "w") as fconf:
|
|
85
84
|
fconf.write(config_ini)
|
|
@@ -92,7 +91,7 @@ run_command(
|
|
|
92
91
|
|
|
93
92
|
# plot cnvs
|
|
94
93
|
# get makeGraph.R
|
|
95
|
-
freec_path = os.path.realpath(shutil.which(freec).strip())
|
|
94
|
+
freec_path = os.path.realpath(shutil.which(freec).strip()) # type: ignore
|
|
96
95
|
mkgraph = os.path.join(os.path.dirname(freec_path), "makeGraph.R")
|
|
97
96
|
if not os.path.exists(mkgraph):
|
|
98
97
|
raise RuntimeError("makeGraph.R not found")
|
|
@@ -102,7 +101,7 @@ try:
|
|
|
102
101
|
except IndexError:
|
|
103
102
|
raise RuntimeError("Control-FREEC failed to run") from None
|
|
104
103
|
|
|
105
|
-
rscript_path = os.path.realpath(shutil.which(rscript).strip())
|
|
104
|
+
rscript_path = os.path.realpath(shutil.which(rscript).strip()) # type: ignore
|
|
106
105
|
rpath = os.path.join(os.path.dirname(rscript_path), "R")
|
|
107
106
|
|
|
108
107
|
plotcmd = f"cat {mkgraph} | R --slave --args {config.general.ploidy} {ratiofile}"
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from pathlib import PosixPath # type: ignore # noqa
|
|
2
|
+
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
3
|
+
from biopipen.utils.reference import bam_index
|
|
4
|
+
|
|
5
|
+
bamfile: str = {{ in.bamfile | quote }} # pyright: ignore # noqa
|
|
6
|
+
outfile: str = {{ out.outfile | quote }} # pyright: ignore # noqa
|
|
7
|
+
envs: dict = {{envs | attr: "to_dict" | call}} # pyright: ignore # noqa
|
|
8
|
+
ncores = envs.pop("ncores")
|
|
9
|
+
samtools = envs.pop("samtools")
|
|
10
|
+
should_index = envs.pop("index")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def run_samtools(infile):
|
|
14
|
+
cmd = [
|
|
15
|
+
samtools,
|
|
16
|
+
"view",
|
|
17
|
+
"-b",
|
|
18
|
+
"--threads",
|
|
19
|
+
str(ncores),
|
|
20
|
+
"-o",
|
|
21
|
+
outfile,
|
|
22
|
+
] + dict_to_cli_args(envs, dashify=True) + [infile]
|
|
23
|
+
|
|
24
|
+
run_command(cmd, fg=True)
|
|
25
|
+
if should_index:
|
|
26
|
+
bam_index(outfile, tool="samtools", samtools=samtools, ncores=ncores)
|
|
27
|
+
|
|
28
|
+
return outfile
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
if __name__ == "__main__":
|
|
32
|
+
infile = bam_index(bamfile, tool="samtools", samtools=samtools, ncores=ncores)
|
|
33
|
+
run_samtools(infile)
|
biopipen/scripts/bed/Bed2Vcf.py
CHANGED
|
@@ -8,14 +8,14 @@ from pysam import FastaFile
|
|
|
8
8
|
|
|
9
9
|
from biopipen.utils.misc import run_command
|
|
10
10
|
|
|
11
|
-
inbed = {{in.inbed | quote}} # pyright: ignore
|
|
12
|
-
outvcf = {{out.outvcf | quote}} # pyright: ignore
|
|
13
|
-
tmpoutvcf = {{out.outvcf | append: ".tmp" | quote}} # pyright: ignore
|
|
11
|
+
inbed: str = {{in.inbed | quote}} # pyright: ignore # noqa
|
|
12
|
+
outvcf: str = {{out.outvcf | quote}} # pyright: ignore
|
|
13
|
+
tmpoutvcf: str = {{out.outvcf | str | append: ".tmp" | quote}} # pyright: ignore
|
|
14
14
|
joboutdir = Path({{job.outdir | quote}}) # pyright: ignore
|
|
15
|
-
ref = {{envs.ref | quote}} # pyright: ignore
|
|
15
|
+
ref: str = {{envs.ref | quote}} # pyright: ignore
|
|
16
16
|
headers = {{envs.headers | repr}} # pyright: ignore
|
|
17
17
|
infos = {{envs.infos | repr}} # pyright: ignore
|
|
18
|
-
base = {{envs.base | int}} # pyright: ignore
|
|
18
|
+
base: int = {{envs.base | int}} # pyright: ignore
|
|
19
19
|
formats = {{envs.formats | repr}} # pyright: ignore
|
|
20
20
|
index = {{envs.index | repr}} # pyright: ignore
|
|
21
21
|
bcftools = {{envs.bcftools | quote}} # pyright: ignore
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
from math import ceil
|
|
3
|
-
from pathlib import Path
|
|
3
|
+
from pathlib import Path, PosixPath # noqa: F401
|
|
4
4
|
|
|
5
5
|
from biopipen.utils.misc import run_command
|
|
6
6
|
|
|
7
|
-
bedfiles = {{in.bedfiles |
|
|
8
|
-
outfile = Path({{out.outbed |
|
|
7
|
+
bedfiles: list[Path] = {{in.bedfiles | each: as_path}} # pyright: ignore # noqa
|
|
8
|
+
outfile = Path({{out.outbed | quote}}) # pyright: ignore
|
|
9
9
|
bedtools_path = {{envs.bedtools | repr}} # pyright: ignore
|
|
10
|
-
cutoff = {{envs.cutoff | repr}} # pyright: ignore
|
|
10
|
+
cutoff: float = {{envs.cutoff | repr}} # pyright: ignore
|
|
11
11
|
distance = {{envs.distance | repr}} # pyright: ignore
|
|
12
12
|
chrsize = {{envs.chrsize | repr}} # pyright: ignore
|
|
13
|
-
bedfiles = [Path(bedfile) for bedfile in bedfiles]
|
|
13
|
+
# bedfiles = [Path(bedfile) for bedfile in bedfiles]
|
|
14
14
|
# In case there are duplicated stems
|
|
15
15
|
stems = [f"{bedfile.stem}__{i}" for i, bedfile in enumerate(bedfiles)]
|
|
16
16
|
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
+
# shellcheck disable=SC2148,SC1083
|
|
1
2
|
inbed={{ in.inbed | quote }}
|
|
2
3
|
outbed={{ out.outbed | quote }}
|
|
3
4
|
rejfile={{ job.outdir | joinpaths: "rejected.bed" | quote }}
|
|
4
5
|
liftover={{ envs.liftover | quote }}
|
|
5
6
|
chain={{ envs.chain | quote }}
|
|
6
7
|
|
|
8
|
+
# shellcheck disable=SC2154
|
|
7
9
|
$liftover \
|
|
8
|
-
$inbed \
|
|
9
|
-
$chain \
|
|
10
|
-
$outbed \
|
|
11
|
-
$rejfile
|
|
10
|
+
"$inbed" \
|
|
11
|
+
"$chain" \
|
|
12
|
+
"$outbed" \
|
|
13
|
+
"$rejfile"
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from biopipen.utils.misc import run_command, dict_to_cli_args, logger
|
|
3
|
+
|
|
4
|
+
afile = Path({{in.afile | quote}}) # pyright: ignore # noqa: #999
|
|
5
|
+
bfile = Path({{in.bfile | quote}}) # pyright: ignore
|
|
6
|
+
outfile: str = {{out.outfile | quote}} # pyright: ignore
|
|
7
|
+
envs: dict = {{envs | dict}} # pyright: ignore
|
|
8
|
+
|
|
9
|
+
bedtools = envs.pop("bedtools")
|
|
10
|
+
sort = envs.pop("sort")
|
|
11
|
+
chrsize = envs.pop("chrsize")
|
|
12
|
+
postcmd = envs.pop("postcmd", None)
|
|
13
|
+
outdir = Path(outfile).parent
|
|
14
|
+
|
|
15
|
+
if chrsize and "g" in envs:
|
|
16
|
+
logger.warning("Ignoring envs.g because envs.chrsize is provided.")
|
|
17
|
+
envs["g"] = Path(chrsize).expanduser()
|
|
18
|
+
elif chrsize:
|
|
19
|
+
envs["g"] = Path(chrsize).expanduser()
|
|
20
|
+
|
|
21
|
+
if sort:
|
|
22
|
+
afile_sorted = outdir / f"{afile.stem}_sorted{afile.suffix}"
|
|
23
|
+
bfile_sorted = outdir / f"{bfile.stem}_sorted{bfile.suffix}"
|
|
24
|
+
run_command(
|
|
25
|
+
[bedtools, "sort", "-g", envs["g"], "-i", afile],
|
|
26
|
+
stdout=afile_sorted,
|
|
27
|
+
)
|
|
28
|
+
run_command(
|
|
29
|
+
[bedtools, "sort", "-g", envs["g"], "-i", bfile],
|
|
30
|
+
stdout=bfile_sorted,
|
|
31
|
+
)
|
|
32
|
+
afile = afile_sorted
|
|
33
|
+
bfile = bfile_sorted
|
|
34
|
+
|
|
35
|
+
envs[""] = [bedtools, "intersect"]
|
|
36
|
+
envs["a"] = afile
|
|
37
|
+
envs["b"] = bfile
|
|
38
|
+
envs.setdefault("sorted", True)
|
|
39
|
+
|
|
40
|
+
if envs["sorted"] and not "g" in envs:
|
|
41
|
+
raise ValueError("envs.g is required or manullay set envs.sorted to False.")
|
|
42
|
+
|
|
43
|
+
if postcmd:
|
|
44
|
+
ofile = Path(outfile).with_suffix(".prior.bt")
|
|
45
|
+
run_command(dict_to_cli_args(envs, prefix="-"), stdout=ofile)
|
|
46
|
+
postcmd_file = outdir / "_postcmd.sh"
|
|
47
|
+
postcmd_file.write_text(postcmd)
|
|
48
|
+
run_command(
|
|
49
|
+
["bash", postcmd_file],
|
|
50
|
+
env={"infile": ofile, "outfile": outfile, "outdir": outdir},
|
|
51
|
+
fg=True,
|
|
52
|
+
)
|
|
53
|
+
else:
|
|
54
|
+
run_command(dict_to_cli_args(envs, prefix="-"), stdout=outfile)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from biopipen.utils.misc import run_command, logger
|
|
3
|
+
|
|
4
|
+
infile = Path({{in.afile | quote}}) # pyright: ignore # noqa: #999
|
|
5
|
+
outfile = Path({{in.bfile | quote}}) # pyright: ignore
|
|
6
|
+
bedtools: str = {{envs.bedtools | quote}} # pyright: ignore
|
|
7
|
+
window = {{envs.window | repr}} # pyright: ignore
|
|
8
|
+
step = {{envs.step | repr}} # pyright: ignore
|
|
9
|
+
nwin = {{envs.nwin | repr}} # pyright: ignore
|
|
10
|
+
reverse = {{envs.reverse | repr}} # pyright: ignore
|
|
11
|
+
name = {{envs.name | repr}} # pyright: ignore
|
|
12
|
+
|
|
13
|
+
if nwin is None and window is None:
|
|
14
|
+
raise ValueError("Either `nwin` or `window` should be provided.")
|
|
15
|
+
|
|
16
|
+
if nwin is not None and window is not None:
|
|
17
|
+
raise ValueError("Either `nwin` or `window` should be provided, not both.")
|
|
18
|
+
|
|
19
|
+
# detect if infile is a genome size file or a bed file
|
|
20
|
+
with infile.open() as f:
|
|
21
|
+
line = f.readline().strip()
|
|
22
|
+
if len(line.split("\t")) > 2:
|
|
23
|
+
is_bed = True
|
|
24
|
+
else:
|
|
25
|
+
is_bed = False
|
|
26
|
+
|
|
27
|
+
if is_bed:
|
|
28
|
+
logger.info("BED file is detected as input.")
|
|
29
|
+
cmd = [bedtools, "makewindows", "-b", infile]
|
|
30
|
+
else:
|
|
31
|
+
logger.info("Genome size file is detected as input.")
|
|
32
|
+
cmd = [bedtools, "makewindows", "-g", infile]
|
|
33
|
+
|
|
34
|
+
if nwin:
|
|
35
|
+
cmd.extend(["-n", nwin])
|
|
36
|
+
elif step is not None:
|
|
37
|
+
cmd.extend(["-w", window, "-s", step])
|
|
38
|
+
else:
|
|
39
|
+
cmd.extend(["-w", window])
|
|
40
|
+
|
|
41
|
+
if reverse:
|
|
42
|
+
cmd.append("-reverse")
|
|
43
|
+
|
|
44
|
+
if name != "none":
|
|
45
|
+
cmd.extend(["-name", name])
|
|
46
|
+
|
|
47
|
+
run_command(cmd, stdout=outfile)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from biopipen.utils import run_command, dict_to_cli_args
|
|
1
|
+
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
2
2
|
|
|
3
|
-
inbed = {{in.inbed |
|
|
4
|
-
outbed = {{out.outbed |
|
|
5
|
-
envs = {{envs |
|
|
3
|
+
inbed = {{in.inbed | quote}} # pyright: ignore # noqa: #999
|
|
4
|
+
outbed = {{out.outbed | quote}} # pyright: ignore
|
|
5
|
+
envs: dict = {{envs | dict}} # pyright: ignore
|
|
6
6
|
bedtools = envs.pop("bedtools", "bedtools")
|
|
7
7
|
|
|
8
8
|
envs[""] = [bedtools, "merge"]
|