biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
biopipen/scripts/bam/BamMerge.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from biopipen.utils.misc import run_command
|
|
2
|
+
from biopipen.utils.misc import run_command, logger
|
|
3
3
|
|
|
4
|
-
bamfiles = {{in.bamfiles |
|
|
5
|
-
outfile = Path({{out.outfile |
|
|
4
|
+
bamfiles = {{in.bamfiles | default: [] | each: str}} # pyright: ignore # noqa
|
|
5
|
+
outfile = Path({{out.outfile | quote}}) # pyright: ignore
|
|
6
6
|
ncores = {{envs.ncores | int}} # pyright: ignore
|
|
7
7
|
tool = {{envs.tool | quote}} # pyright: ignore
|
|
8
8
|
samtools = {{envs.samtools | quote}} # pyright: ignore
|
|
@@ -18,7 +18,7 @@ if should_index and not should_sort:
|
|
|
18
18
|
|
|
19
19
|
def use_samtools():
|
|
20
20
|
"""Use samtools to merge bam files"""
|
|
21
|
-
|
|
21
|
+
logger.info("Using samtools ...")
|
|
22
22
|
ofile = (
|
|
23
23
|
outfile
|
|
24
24
|
if not should_sort
|
|
@@ -43,11 +43,11 @@ def use_samtools():
|
|
|
43
43
|
*merge_args,
|
|
44
44
|
*bamfiles,
|
|
45
45
|
]
|
|
46
|
-
|
|
46
|
+
logger.info("- Merging the bam files ...")
|
|
47
47
|
run_command(cmd)
|
|
48
48
|
|
|
49
49
|
if should_sort:
|
|
50
|
-
|
|
50
|
+
logger.info("- Sorting the merged bam file ...")
|
|
51
51
|
for key in ["-o", "-@", "--threads"]:
|
|
52
52
|
if key in sort_args:
|
|
53
53
|
raise ValueError(
|
|
@@ -67,16 +67,14 @@ def use_samtools():
|
|
|
67
67
|
run_command(cmd)
|
|
68
68
|
|
|
69
69
|
if should_index:
|
|
70
|
-
|
|
70
|
+
logger.info("- Indexing the output bam file ...")
|
|
71
71
|
cmd = [samtools, "index", "-@", ncores, outfile]
|
|
72
72
|
run_command(cmd)
|
|
73
73
|
|
|
74
|
-
print("Done")
|
|
75
|
-
|
|
76
74
|
|
|
77
75
|
def use_sambamba():
|
|
78
76
|
"""Use sambamba to merge bam files"""
|
|
79
|
-
|
|
77
|
+
logger.info("Using sambamba ...")
|
|
80
78
|
ofile = (
|
|
81
79
|
outfile
|
|
82
80
|
if not should_sort
|
|
@@ -90,11 +88,11 @@ def use_sambamba():
|
|
|
90
88
|
)
|
|
91
89
|
|
|
92
90
|
cmd = [sambamba, "merge", "-t", ncores, *merge_args, ofile, *bamfiles]
|
|
93
|
-
|
|
91
|
+
logger.info("- Merging the bam files ...")
|
|
94
92
|
run_command(cmd)
|
|
95
93
|
|
|
96
94
|
if should_sort:
|
|
97
|
-
|
|
95
|
+
logger.info("- Sorting the merged bam file ...")
|
|
98
96
|
for key in ["-t", "--nthreads", "-o", "--out"]:
|
|
99
97
|
if key in sort_args:
|
|
100
98
|
raise ValueError(
|
|
@@ -115,12 +113,10 @@ def use_sambamba():
|
|
|
115
113
|
run_command(cmd)
|
|
116
114
|
|
|
117
115
|
if should_index:
|
|
118
|
-
|
|
116
|
+
logger.info("- Indexing the output bam file ...")
|
|
119
117
|
cmd = [sambamba, "index", "-t", ncores, outfile]
|
|
120
118
|
run_command(cmd)
|
|
121
119
|
|
|
122
|
-
print("Done")
|
|
123
|
-
|
|
124
120
|
|
|
125
121
|
if __name__ == "__main__":
|
|
126
122
|
if tool == "samtools":
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from biopipen.utils.misc import run_command, logger
|
|
3
|
+
|
|
4
|
+
# using:
|
|
5
|
+
# samtools view --subsample 0.1 --subsample-seed 1234 --threads 4 -b -o out.bam in.bam
|
|
6
|
+
|
|
7
|
+
bamfile = {{ in.bamfile | quote }} # pyright: ignore # noqa
|
|
8
|
+
outfile = Path({{ out.outfile | quote }}) # pyright: ignore
|
|
9
|
+
ncores = {{ envs.ncores | int }} # pyright: ignore
|
|
10
|
+
samtools = {{ envs.samtools | repr }} # pyright: ignore
|
|
11
|
+
tool = {{ envs.tool | repr }} # pyright: ignore
|
|
12
|
+
fraction: float = {{ envs.fraction | repr }} # pyright: ignore
|
|
13
|
+
seed = {{ envs.seed | int }} # pyright: ignore
|
|
14
|
+
should_index = {{ envs.index | repr }} # pyright: ignore
|
|
15
|
+
should_sort = {{ envs.sort | repr }} # pyright: ignore
|
|
16
|
+
sort_args = {{ envs.sort_args | repr }} # pyright: ignore
|
|
17
|
+
|
|
18
|
+
if should_index and not should_sort:
|
|
19
|
+
raise ValueError("Indexing requires sorting")
|
|
20
|
+
|
|
21
|
+
if fraction is None:
|
|
22
|
+
raise ValueError("'envs.fraction' must be provided.")
|
|
23
|
+
|
|
24
|
+
if tool != "samtools":
|
|
25
|
+
raise ValueError(
|
|
26
|
+
f"Tool {tool} is not supported. "
|
|
27
|
+
"Currently only samtools is supported."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
if fraction > 1:
|
|
31
|
+
# calculate the fraction based on the number of reads
|
|
32
|
+
logger.info("Converting fraction > 1 to a fraction of reads.")
|
|
33
|
+
cmd = [
|
|
34
|
+
samtools,
|
|
35
|
+
"view",
|
|
36
|
+
"--threads",
|
|
37
|
+
ncores,
|
|
38
|
+
"-c",
|
|
39
|
+
bamfile
|
|
40
|
+
]
|
|
41
|
+
nreads = run_command(cmd, stdout="return").strip() # type: ignore
|
|
42
|
+
fraction = fraction / float(int(nreads))
|
|
43
|
+
|
|
44
|
+
ofile = (
|
|
45
|
+
outfile
|
|
46
|
+
if not should_sort
|
|
47
|
+
else outfile.with_stem(f"{outfile.stem}.unsorted")
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
cmd = [
|
|
51
|
+
samtools,
|
|
52
|
+
"view",
|
|
53
|
+
"--subsample",
|
|
54
|
+
fraction,
|
|
55
|
+
"--subsample-seed",
|
|
56
|
+
seed,
|
|
57
|
+
"--threads",
|
|
58
|
+
ncores,
|
|
59
|
+
"-b",
|
|
60
|
+
"-o",
|
|
61
|
+
ofile,
|
|
62
|
+
bamfile
|
|
63
|
+
]
|
|
64
|
+
run_command(cmd, fg=True)
|
|
65
|
+
|
|
66
|
+
if should_sort:
|
|
67
|
+
logger.info("Sorting the output bam file.")
|
|
68
|
+
for key in ["-o", "-@", "--threads"]:
|
|
69
|
+
if key in sort_args:
|
|
70
|
+
raise ValueError(
|
|
71
|
+
f"envs.sort_args cannot contain {key}, "
|
|
72
|
+
"which is managed by the script"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
cmd = [
|
|
76
|
+
samtools,
|
|
77
|
+
"sort",
|
|
78
|
+
"-@",
|
|
79
|
+
ncores,
|
|
80
|
+
*sort_args,
|
|
81
|
+
"-o",
|
|
82
|
+
outfile,
|
|
83
|
+
ofile
|
|
84
|
+
]
|
|
85
|
+
run_command(cmd, fg=True)
|
|
86
|
+
|
|
87
|
+
if should_index:
|
|
88
|
+
logger.info("Indexing the output bam file.")
|
|
89
|
+
cmd = [samtools, "index", "-@", ncores, outfile]
|
|
90
|
+
run_command(cmd, fg=True)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from hashlib import md5
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
4
|
+
|
|
5
|
+
infile: str = {{ in.bamfile | quote }} # pyright: ignore # noqa
|
|
6
|
+
outfile = Path({{ out.outfile | quote }}) # pyright: ignore
|
|
7
|
+
args: dict = {{ envs | dict | repr }} # pyright: ignore
|
|
8
|
+
ncores = args.pop("ncores")
|
|
9
|
+
tool = args.pop("tool")
|
|
10
|
+
samtools = args.pop("samtools")
|
|
11
|
+
sambamba = args.pop("sambamba")
|
|
12
|
+
tmpdir = args.pop("tmpdir")
|
|
13
|
+
byname = args.pop("byname")
|
|
14
|
+
should_index = args.pop("index")
|
|
15
|
+
sig = md5(infile.encode()).hexdigest()
|
|
16
|
+
tmpdir = Path(tmpdir).joinpath(
|
|
17
|
+
f"biopipen_BamSort_{{job.index}}_{sig}_{Path(infile).name}"
|
|
18
|
+
)
|
|
19
|
+
tmpdir.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
tmpdir = str(tmpdir)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def use_samtools():
|
|
24
|
+
"""Use samtools to sort/index bam file.
|
|
25
|
+
|
|
26
|
+
Usage: samtools sort [options...] [in.bam]
|
|
27
|
+
Options:
|
|
28
|
+
-l INT Set compression level, from 0 (uncompressed) to 9 (best)
|
|
29
|
+
-u Output uncompressed data (equivalent to -l 0)
|
|
30
|
+
-m INT Set maximum memory per thread; suffix K/M/G recognized [768M]
|
|
31
|
+
-M Use minimiser for clustering unaligned/unplaced reads
|
|
32
|
+
-K INT Kmer size to use for minimiser [20]
|
|
33
|
+
-n Sort by read name (not compatible with samtools index command)
|
|
34
|
+
-t TAG Sort by value of TAG. Uses position as secondary index (or read name if -n is set)
|
|
35
|
+
-o FILE Write final output to FILE rather than standard output
|
|
36
|
+
-T PREFIX Write temporary files to PREFIX.nnnn.bam
|
|
37
|
+
--no-PG
|
|
38
|
+
Do not add a PG line
|
|
39
|
+
--template-coordinate
|
|
40
|
+
Sort by template-coordinate
|
|
41
|
+
--input-fmt-option OPT[=VAL]
|
|
42
|
+
Specify a single input file format option in the form
|
|
43
|
+
of OPTION or OPTION=VALUE
|
|
44
|
+
-O, --output-fmt FORMAT[,OPT[=VAL]]...
|
|
45
|
+
Specify output format (SAM, BAM, CRAM)
|
|
46
|
+
--output-fmt-option OPT[=VAL]
|
|
47
|
+
Specify a single output file format option in the form
|
|
48
|
+
of OPTION or OPTION=VALUE
|
|
49
|
+
--reference FILE
|
|
50
|
+
Reference sequence FASTA FILE [null]
|
|
51
|
+
-@, --threads INT
|
|
52
|
+
Number of additional threads to use [0]
|
|
53
|
+
--write-index
|
|
54
|
+
Automatically index the output files [off]
|
|
55
|
+
--verbosity INT
|
|
56
|
+
Set level of verbosity
|
|
57
|
+
""" # noqa
|
|
58
|
+
sargs = args.copy()
|
|
59
|
+
sargs["n"] = byname
|
|
60
|
+
sargs["T"] = f"{tmpdir}/tmp"
|
|
61
|
+
sargs["threads"] = ncores
|
|
62
|
+
|
|
63
|
+
if should_index:
|
|
64
|
+
sargs["write-index"] = True
|
|
65
|
+
# https://github.com/samtools/samtools/issues/1196
|
|
66
|
+
sargs["o"] = f"{outfile}##idx##{outfile}.bai"
|
|
67
|
+
else:
|
|
68
|
+
sargs["o"] = outfile
|
|
69
|
+
|
|
70
|
+
n_outfmt = sum(["O" in sargs, "output-fmt" in sargs])
|
|
71
|
+
if n_outfmt > 1:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
"envs.args cannot contain both 'O' and 'output-fmt'"
|
|
74
|
+
)
|
|
75
|
+
if n_outfmt == 0:
|
|
76
|
+
sargs["O"] = "BAM"
|
|
77
|
+
|
|
78
|
+
cmd = [
|
|
79
|
+
samtools,
|
|
80
|
+
"sort",
|
|
81
|
+
*dict_to_cli_args(sargs),
|
|
82
|
+
infile,
|
|
83
|
+
]
|
|
84
|
+
run_command(cmd)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def use_sambamba():
|
|
88
|
+
"""Use sambamba to sort/index bam file.
|
|
89
|
+
|
|
90
|
+
sambamba 0.8.2
|
|
91
|
+
by Artem Tarasov and Pjotr Prins (C) 2012-2021
|
|
92
|
+
LDC 1.28.1 / DMD v2.098.1 / LLVM12.0.0 / bootstrap LDC - the LLVM D compiler (1.28.1)
|
|
93
|
+
|
|
94
|
+
Usage: sambamba-sort [options] <input.bam>
|
|
95
|
+
|
|
96
|
+
Options: -m, --memory-limit=LIMIT
|
|
97
|
+
approximate total memory limit for all threads (by default 2GB)
|
|
98
|
+
--tmpdir=TMPDIR
|
|
99
|
+
directory for storing intermediate files; default is system directory for temporary files
|
|
100
|
+
-o, --out=OUTPUTFILE
|
|
101
|
+
output file name; if not provided, the result is written to a file with .sorted.bam extension
|
|
102
|
+
-n, --sort-by-name
|
|
103
|
+
sort by read name instead of coordinate (lexicographical order)
|
|
104
|
+
--sort-picard
|
|
105
|
+
sort by query name like in picard
|
|
106
|
+
-N, --natural-sort
|
|
107
|
+
sort by read name instead of coordinate (so-called 'natural' sort as in samtools)
|
|
108
|
+
-M, --match-mates
|
|
109
|
+
pull mates of the same alignment together when sorting by read name
|
|
110
|
+
-l, --compression-level=COMPRESSION_LEVEL
|
|
111
|
+
level of compression for sorted BAM, from 0 to 9
|
|
112
|
+
-u, --uncompressed-chunks
|
|
113
|
+
write sorted chunks as uncompressed BAM (default is writing with compression level 1), that might be faster in some cases but uses more disk space
|
|
114
|
+
-p, --show-progress
|
|
115
|
+
show progressbar in STDERR
|
|
116
|
+
-t, --nthreads=NTHREADS
|
|
117
|
+
use specified number of threads
|
|
118
|
+
-F, --filter=FILTER
|
|
119
|
+
keep only reads that satisfy FILTER
|
|
120
|
+
""" # noqa
|
|
121
|
+
sargs = args.copy()
|
|
122
|
+
sargs["nthreads"] = ncores
|
|
123
|
+
sargs["n"] = byname
|
|
124
|
+
sargs["tmpdir"] = tmpdir
|
|
125
|
+
sargs["o"] = outfile
|
|
126
|
+
cmd = [
|
|
127
|
+
sambamba,
|
|
128
|
+
"sort",
|
|
129
|
+
*dict_to_cli_args(sargs, sep="="),
|
|
130
|
+
infile,
|
|
131
|
+
]
|
|
132
|
+
run_command(cmd)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
if __name__ == "__main__":
|
|
136
|
+
if tool == "samtools":
|
|
137
|
+
use_samtools()
|
|
138
|
+
elif tool == "sambamba":
|
|
139
|
+
use_sambamba()
|
|
140
|
+
else:
|
|
141
|
+
raise ValueError(f"Unknown tool: {tool}")
|
|
@@ -2,12 +2,12 @@ from pathlib import Path
|
|
|
2
2
|
from biopipen.utils.misc import run_command
|
|
3
3
|
from biopipen.utils.reference import bam_index
|
|
4
4
|
|
|
5
|
-
bamfile = {{in.bamfile | quote}} # pyright: ignore
|
|
6
|
-
outdir = {{out.outdir | quote}} # pyright: ignore
|
|
7
|
-
tool = {{envs.tool | quote}} # pyright: ignore
|
|
8
|
-
samtools = {{envs.samtools | quote}} # pyright: ignore
|
|
9
|
-
sambamba = {{envs.sambamba | quote}} # pyright: ignore
|
|
10
|
-
ncores = {{envs.ncores | repr}} # pyright: ignore
|
|
5
|
+
bamfile: str = {{in.bamfile | quote}} # pyright: ignore # noqa
|
|
6
|
+
outdir: str = {{out.outdir | quote}} # pyright: ignore
|
|
7
|
+
tool: str = {{envs.tool | quote}} # pyright: ignore
|
|
8
|
+
samtools: str = {{envs.samtools | quote}} # pyright: ignore
|
|
9
|
+
sambamba: str = {{envs.sambamba | quote}} # pyright: ignore
|
|
10
|
+
ncores: int = {{envs.ncores | repr}} # pyright: ignore
|
|
11
11
|
keep_other_sq = {{envs.keep_other_sq | repr}} # pyright: ignore
|
|
12
12
|
chroms_to_keep = {{envs.chroms | repr}} # pyright: ignore
|
|
13
13
|
should_index = {{envs.index | bool}} # pyright: ignore
|
|
@@ -17,13 +17,13 @@ def _remove_other_sq(infile, chrom, outfile):
|
|
|
17
17
|
exe = samtools if tool == "samtools" else sambamba
|
|
18
18
|
print("\nRemoving other chromosomes in @SQ in header")
|
|
19
19
|
header_cmd = [exe, "view", "-H", infile]
|
|
20
|
-
header_p = run_command(
|
|
20
|
+
header_p = run_command( # type: ignore
|
|
21
21
|
header_cmd,
|
|
22
22
|
stdout=True,
|
|
23
23
|
wait=False,
|
|
24
24
|
print_command=True,
|
|
25
25
|
)
|
|
26
|
-
header = header_p.stdout.read().decode().strip().splitlines()
|
|
26
|
+
header = header_p.stdout.read().decode().strip().splitlines() # type: ignore
|
|
27
27
|
new_header = []
|
|
28
28
|
for line in header:
|
|
29
29
|
if line.startswith("@SQ"):
|
|
@@ -63,7 +63,7 @@ def use_samtools():
|
|
|
63
63
|
"| grep '^@SQ' | cut -f 2 | cut -d ':' -f 2"
|
|
64
64
|
)
|
|
65
65
|
p = run_command(cmd, stdout=True, wait=False)
|
|
66
|
-
chroms = p.stdout.read().decode().strip().splitlines()
|
|
66
|
+
chroms = p.stdout.read().decode().strip().splitlines() # type: ignore
|
|
67
67
|
else:
|
|
68
68
|
print("\nUsing provided chromosomes")
|
|
69
69
|
chroms = chroms_to_keep
|
|
@@ -121,7 +121,7 @@ def use_sambamba():
|
|
|
121
121
|
"| grep '^@SQ' | cut -f 2 | cut -d ':' -f 2"
|
|
122
122
|
)
|
|
123
123
|
p = run_command(cmd, stdout=True, wait=False)
|
|
124
|
-
chroms = p.stdout.read().decode().splitlines()
|
|
124
|
+
chroms = p.stdout.read().decode().splitlines() # type: ignore
|
|
125
125
|
else:
|
|
126
126
|
print("\nUsing provided chromosomes")
|
|
127
127
|
chroms = chroms_to_keep
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from biopipen.utils.misc import run_command, logger
|
|
3
|
+
|
|
4
|
+
# using:
|
|
5
|
+
# samtools view --subsample 0.1 --subsample-seed 1234 --threads 4 -b -o out.bam in.bam
|
|
6
|
+
|
|
7
|
+
bamfile = {{ in.bamfile | quote }} # pyright: ignore # noqa
|
|
8
|
+
bedfile = {{ in.bedfile | quote }} # pyright: ignore # noqa
|
|
9
|
+
outfile = Path({{ out.outfile | quote }}) # pyright: ignore
|
|
10
|
+
ncores = {{ envs.ncores | int }} # pyright: ignore
|
|
11
|
+
samtools = {{ envs.samtools | repr }} # pyright: ignore
|
|
12
|
+
tool = {{ envs.tool | repr }} # pyright: ignore
|
|
13
|
+
should_index = {{ envs.index | repr }} # pyright: ignore
|
|
14
|
+
|
|
15
|
+
if tool != "samtools":
|
|
16
|
+
raise ValueError(
|
|
17
|
+
f"Tool {tool} is not supported. "
|
|
18
|
+
"Currently only samtools is supported."
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
cmd = [
|
|
22
|
+
samtools,
|
|
23
|
+
"view",
|
|
24
|
+
"--target-file",
|
|
25
|
+
bedfile,
|
|
26
|
+
"-b",
|
|
27
|
+
"--threads",
|
|
28
|
+
ncores,
|
|
29
|
+
"-o",
|
|
30
|
+
outfile,
|
|
31
|
+
bamfile
|
|
32
|
+
]
|
|
33
|
+
run_command(cmd, fg=True)
|
|
34
|
+
|
|
35
|
+
if should_index:
|
|
36
|
+
logger.info("Indexing the output bam file.")
|
|
37
|
+
cmd = [samtools, "index", "-@", ncores, outfile]
|
|
38
|
+
run_command(cmd, fg=True)
|
biopipen/scripts/bam/CNAClinic.R
CHANGED
|
@@ -1,13 +1,40 @@
|
|
|
1
|
-
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
1
|
library(parallel)
|
|
3
2
|
library(dplyr)
|
|
3
|
+
library(biopipen.utils)
|
|
4
4
|
library(CNAclinic)
|
|
5
5
|
|
|
6
|
+
# https://github.com/sdchandra/CNAclinic/issues/4
|
|
7
|
+
.reorderByChrom.patched <- function(x){
|
|
8
|
+
chromosome <- as.character(x$chromosome)
|
|
9
|
+
chromosome[which(chromosome == "X")] <- "23"
|
|
10
|
+
chromosome[which(chromosome == "Y")] <- "24"
|
|
11
|
+
chromosome[which(chromosome == "MT")] <- "25"
|
|
12
|
+
|
|
13
|
+
x$chromosome <- as.numeric(chromosome)
|
|
14
|
+
# Error in xtfrm.data.frame(x) : cannot xtfrm data frames
|
|
15
|
+
# x <- x[order(x["chromosome"], x["start"]), ]
|
|
16
|
+
x <- x[order(x[, "chromosome"], x[, "start"]), ]
|
|
17
|
+
|
|
18
|
+
x$chromosome <- as.character(x$chromosome)
|
|
19
|
+
# Replace 23 by X:
|
|
20
|
+
x$chromosome[which(x$chromosome == "23")] <- "X"
|
|
21
|
+
|
|
22
|
+
# Replace 24 by Y
|
|
23
|
+
x$chromosome[which(x$chromosome == "24")] <- "Y"
|
|
24
|
+
|
|
25
|
+
# Replace 25 by MT
|
|
26
|
+
x$chromosome[which(x$chromosome == "25")] <- "MT"
|
|
27
|
+
|
|
28
|
+
return(x)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
monkey_patch("CNAclinic", ".reorderByChrom", .reorderByChrom.patched)
|
|
32
|
+
|
|
6
33
|
metafile = {{in.metafile | r}}
|
|
7
34
|
outdir = {{out.outdir | r}}
|
|
8
35
|
ncores = {{envs.ncores | int}}
|
|
9
36
|
binsizer = {{envs.binsizer | r}}
|
|
10
|
-
binsize = {{envs.binsize |
|
|
37
|
+
binsize = {{envs.binsize | r}}
|
|
11
38
|
seed = {{envs.seed | int}}
|
|
12
39
|
genome = {{envs.genome | r}}
|
|
13
40
|
run_args = {{envs.run_args | r}}
|
|
@@ -29,7 +56,11 @@ if (("Group" %in% metacols) && !("Patient" %in% metacols)) {
|
|
|
29
56
|
}
|
|
30
57
|
|
|
31
58
|
if (!("Binsizer" %in% metacols) && is.null(binsizer) && is.null(binsize)) {
|
|
32
|
-
stop(
|
|
59
|
+
stop(
|
|
60
|
+
"The metadata file must have a column named 'Binsizer' or ",
|
|
61
|
+
"the `envs.binsizer` must be specified when no `envs.binsize` is provided. ",
|
|
62
|
+
"The Binsizer column should indicate which samples are to be used for binsize selection."
|
|
63
|
+
)
|
|
33
64
|
}
|
|
34
65
|
|
|
35
66
|
# add missing columns
|
|
@@ -108,7 +139,7 @@ do_one_sample = function(i) {
|
|
|
108
139
|
bamfile,
|
|
109
140
|
sample,
|
|
110
141
|
refSamples=refSamples,
|
|
111
|
-
binSize=binsize
|
|
142
|
+
binSize=binsize / 1000
|
|
112
143
|
)
|
|
113
144
|
|
|
114
145
|
run_args_i = run_args
|
|
@@ -118,7 +149,12 @@ do_one_sample = function(i) {
|
|
|
118
149
|
|
|
119
150
|
plot_args_i = plot_args
|
|
120
151
|
plot_args_i$object = CNAData
|
|
121
|
-
genomewide_plot
|
|
152
|
+
genomewide_plot <- tryCatch({
|
|
153
|
+
do_call(plotSampleData, plot_args_i)
|
|
154
|
+
}, error = function(e) {
|
|
155
|
+
message("Error in plotting genomewide data for sample ", sample, ": ", e$message)
|
|
156
|
+
return(ggplot2::ggplot() + ggplot2::labs(title = paste("Error in plotting genomewide data for sample", sample)))
|
|
157
|
+
})
|
|
122
158
|
|
|
123
159
|
odir = file.path(outdir, sample)
|
|
124
160
|
dir.create(odir, recursive = TRUE, showWarnings = FALSE)
|