biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -1,8 +1,8 @@
1
1
  from pathlib import Path
2
- from biopipen.utils.misc import run_command
2
+ from biopipen.utils.misc import run_command, logger
3
3
 
4
- bamfiles = {{in.bamfiles | repr}} # pyright: ignore
5
- outfile = Path({{out.outfile | repr}}) # pyright: ignore
4
+ bamfiles = {{in.bamfiles | default: [] | each: str}} # pyright: ignore # noqa
5
+ outfile = Path({{out.outfile | quote}}) # pyright: ignore
6
6
  ncores = {{envs.ncores | int}} # pyright: ignore
7
7
  tool = {{envs.tool | quote}} # pyright: ignore
8
8
  samtools = {{envs.samtools | quote}} # pyright: ignore
@@ -18,7 +18,7 @@ if should_index and not should_sort:
18
18
 
19
19
  def use_samtools():
20
20
  """Use samtools to merge bam files"""
21
- print("Using samtools")
21
+ logger.info("Using samtools ...")
22
22
  ofile = (
23
23
  outfile
24
24
  if not should_sort
@@ -43,11 +43,11 @@ def use_samtools():
43
43
  *merge_args,
44
44
  *bamfiles,
45
45
  ]
46
- print("- Merging")
46
+ logger.info("- Merging the bam files ...")
47
47
  run_command(cmd)
48
48
 
49
49
  if should_sort:
50
- print("- Sorting")
50
+ logger.info("- Sorting the merged bam file ...")
51
51
  for key in ["-o", "-@", "--threads"]:
52
52
  if key in sort_args:
53
53
  raise ValueError(
@@ -67,16 +67,14 @@ def use_samtools():
67
67
  run_command(cmd)
68
68
 
69
69
  if should_index:
70
- print("- Indexing")
70
+ logger.info("- Indexing the output bam file ...")
71
71
  cmd = [samtools, "index", "-@", ncores, outfile]
72
72
  run_command(cmd)
73
73
 
74
- print("Done")
75
-
76
74
 
77
75
  def use_sambamba():
78
76
  """Use sambamba to merge bam files"""
79
- print("Using sambamba")
77
+ logger.info("Using sambamba ...")
80
78
  ofile = (
81
79
  outfile
82
80
  if not should_sort
@@ -90,11 +88,11 @@ def use_sambamba():
90
88
  )
91
89
 
92
90
  cmd = [sambamba, "merge", "-t", ncores, *merge_args, ofile, *bamfiles]
93
- print("- Merging")
91
+ logger.info("- Merging the bam files ...")
94
92
  run_command(cmd)
95
93
 
96
94
  if should_sort:
97
- print("- Sorting")
95
+ logger.info("- Sorting the merged bam file ...")
98
96
  for key in ["-t", "--nthreads", "-o", "--out"]:
99
97
  if key in sort_args:
100
98
  raise ValueError(
@@ -115,12 +113,10 @@ def use_sambamba():
115
113
  run_command(cmd)
116
114
 
117
115
  if should_index:
118
- print("- Indexing")
116
+ logger.info("- Indexing the output bam file ...")
119
117
  cmd = [sambamba, "index", "-t", ncores, outfile]
120
118
  run_command(cmd)
121
119
 
122
- print("Done")
123
-
124
120
 
125
121
  if __name__ == "__main__":
126
122
  if tool == "samtools":
@@ -0,0 +1,90 @@
1
+ from pathlib import Path
2
+ from biopipen.utils.misc import run_command, logger
3
+
4
+ # using:
5
+ # samtools view --subsample 0.1 --subsample-seed 1234 --threads 4 -b -o out.bam in.bam
6
+
7
+ bamfile = {{ in.bamfile | quote }} # pyright: ignore # noqa
8
+ outfile = Path({{ out.outfile | quote }}) # pyright: ignore
9
+ ncores = {{ envs.ncores | int }} # pyright: ignore
10
+ samtools = {{ envs.samtools | repr }} # pyright: ignore
11
+ tool = {{ envs.tool | repr }} # pyright: ignore
12
+ fraction: float = {{ envs.fraction | repr }} # pyright: ignore
13
+ seed = {{ envs.seed | int }} # pyright: ignore
14
+ should_index = {{ envs.index | repr }} # pyright: ignore
15
+ should_sort = {{ envs.sort | repr }} # pyright: ignore
16
+ sort_args = {{ envs.sort_args | repr }} # pyright: ignore
17
+
18
+ if should_index and not should_sort:
19
+ raise ValueError("Indexing requires sorting")
20
+
21
+ if fraction is None:
22
+ raise ValueError("'envs.fraction' must be provided.")
23
+
24
+ if tool != "samtools":
25
+ raise ValueError(
26
+ f"Tool {tool} is not supported. "
27
+ "Currently only samtools is supported."
28
+ )
29
+
30
+ if fraction > 1:
31
+ # calculate the fraction based on the number of reads
32
+ logger.info("Converting fraction > 1 to a fraction of reads.")
33
+ cmd = [
34
+ samtools,
35
+ "view",
36
+ "--threads",
37
+ ncores,
38
+ "-c",
39
+ bamfile
40
+ ]
41
+ nreads = run_command(cmd, stdout="return").strip() # type: ignore
42
+ fraction = fraction / float(int(nreads))
43
+
44
+ ofile = (
45
+ outfile
46
+ if not should_sort
47
+ else outfile.with_stem(f"{outfile.stem}.unsorted")
48
+ )
49
+
50
+ cmd = [
51
+ samtools,
52
+ "view",
53
+ "--subsample",
54
+ fraction,
55
+ "--subsample-seed",
56
+ seed,
57
+ "--threads",
58
+ ncores,
59
+ "-b",
60
+ "-o",
61
+ ofile,
62
+ bamfile
63
+ ]
64
+ run_command(cmd, fg=True)
65
+
66
+ if should_sort:
67
+ logger.info("Sorting the output bam file.")
68
+ for key in ["-o", "-@", "--threads"]:
69
+ if key in sort_args:
70
+ raise ValueError(
71
+ f"envs.sort_args cannot contain {key}, "
72
+ "which is managed by the script"
73
+ )
74
+
75
+ cmd = [
76
+ samtools,
77
+ "sort",
78
+ "-@",
79
+ ncores,
80
+ *sort_args,
81
+ "-o",
82
+ outfile,
83
+ ofile
84
+ ]
85
+ run_command(cmd, fg=True)
86
+
87
+ if should_index:
88
+ logger.info("Indexing the output bam file.")
89
+ cmd = [samtools, "index", "-@", ncores, outfile]
90
+ run_command(cmd, fg=True)
@@ -0,0 +1,141 @@
1
+ from hashlib import md5
2
+ from pathlib import Path
3
+ from biopipen.utils.misc import run_command, dict_to_cli_args
4
+
5
+ infile: str = {{ in.bamfile | quote }} # pyright: ignore # noqa
6
+ outfile = Path({{ out.outfile | quote }}) # pyright: ignore
7
+ args: dict = {{ envs | dict | repr }} # pyright: ignore
8
+ ncores = args.pop("ncores")
9
+ tool = args.pop("tool")
10
+ samtools = args.pop("samtools")
11
+ sambamba = args.pop("sambamba")
12
+ tmpdir = args.pop("tmpdir")
13
+ byname = args.pop("byname")
14
+ should_index = args.pop("index")
15
+ sig = md5(infile.encode()).hexdigest()
16
+ tmpdir = Path(tmpdir).joinpath(
17
+ f"biopipen_BamSort_{{job.index}}_{sig}_{Path(infile).name}"
18
+ )
19
+ tmpdir.mkdir(parents=True, exist_ok=True)
20
+ tmpdir = str(tmpdir)
21
+
22
+
23
+ def use_samtools():
24
+ """Use samtools to sort/index bam file.
25
+
26
+ Usage: samtools sort [options...] [in.bam]
27
+ Options:
28
+ -l INT Set compression level, from 0 (uncompressed) to 9 (best)
29
+ -u Output uncompressed data (equivalent to -l 0)
30
+ -m INT Set maximum memory per thread; suffix K/M/G recognized [768M]
31
+ -M Use minimiser for clustering unaligned/unplaced reads
32
+ -K INT Kmer size to use for minimiser [20]
33
+ -n Sort by read name (not compatible with samtools index command)
34
+ -t TAG Sort by value of TAG. Uses position as secondary index (or read name if -n is set)
35
+ -o FILE Write final output to FILE rather than standard output
36
+ -T PREFIX Write temporary files to PREFIX.nnnn.bam
37
+ --no-PG
38
+ Do not add a PG line
39
+ --template-coordinate
40
+ Sort by template-coordinate
41
+ --input-fmt-option OPT[=VAL]
42
+ Specify a single input file format option in the form
43
+ of OPTION or OPTION=VALUE
44
+ -O, --output-fmt FORMAT[,OPT[=VAL]]...
45
+ Specify output format (SAM, BAM, CRAM)
46
+ --output-fmt-option OPT[=VAL]
47
+ Specify a single output file format option in the form
48
+ of OPTION or OPTION=VALUE
49
+ --reference FILE
50
+ Reference sequence FASTA FILE [null]
51
+ -@, --threads INT
52
+ Number of additional threads to use [0]
53
+ --write-index
54
+ Automatically index the output files [off]
55
+ --verbosity INT
56
+ Set level of verbosity
57
+ """ # noqa
58
+ sargs = args.copy()
59
+ sargs["n"] = byname
60
+ sargs["T"] = f"{tmpdir}/tmp"
61
+ sargs["threads"] = ncores
62
+
63
+ if should_index:
64
+ sargs["write-index"] = True
65
+ # https://github.com/samtools/samtools/issues/1196
66
+ sargs["o"] = f"{outfile}##idx##{outfile}.bai"
67
+ else:
68
+ sargs["o"] = outfile
69
+
70
+ n_outfmt = sum(["O" in sargs, "output-fmt" in sargs])
71
+ if n_outfmt > 1:
72
+ raise ValueError(
73
+ "envs.args cannot contain both 'O' and 'output-fmt'"
74
+ )
75
+ if n_outfmt == 0:
76
+ sargs["O"] = "BAM"
77
+
78
+ cmd = [
79
+ samtools,
80
+ "sort",
81
+ *dict_to_cli_args(sargs),
82
+ infile,
83
+ ]
84
+ run_command(cmd)
85
+
86
+
87
+ def use_sambamba():
88
+ """Use sambamba to sort/index bam file.
89
+
90
+ sambamba 0.8.2
91
+ by Artem Tarasov and Pjotr Prins (C) 2012-2021
92
+ LDC 1.28.1 / DMD v2.098.1 / LLVM12.0.0 / bootstrap LDC - the LLVM D compiler (1.28.1)
93
+
94
+ Usage: sambamba-sort [options] <input.bam>
95
+
96
+ Options: -m, --memory-limit=LIMIT
97
+ approximate total memory limit for all threads (by default 2GB)
98
+ --tmpdir=TMPDIR
99
+ directory for storing intermediate files; default is system directory for temporary files
100
+ -o, --out=OUTPUTFILE
101
+ output file name; if not provided, the result is written to a file with .sorted.bam extension
102
+ -n, --sort-by-name
103
+ sort by read name instead of coordinate (lexicographical order)
104
+ --sort-picard
105
+ sort by query name like in picard
106
+ -N, --natural-sort
107
+ sort by read name instead of coordinate (so-called 'natural' sort as in samtools)
108
+ -M, --match-mates
109
+ pull mates of the same alignment together when sorting by read name
110
+ -l, --compression-level=COMPRESSION_LEVEL
111
+ level of compression for sorted BAM, from 0 to 9
112
+ -u, --uncompressed-chunks
113
+ write sorted chunks as uncompressed BAM (default is writing with compression level 1), that might be faster in some cases but uses more disk space
114
+ -p, --show-progress
115
+ show progressbar in STDERR
116
+ -t, --nthreads=NTHREADS
117
+ use specified number of threads
118
+ -F, --filter=FILTER
119
+ keep only reads that satisfy FILTER
120
+ """ # noqa
121
+ sargs = args.copy()
122
+ sargs["nthreads"] = ncores
123
+ sargs["n"] = byname
124
+ sargs["tmpdir"] = tmpdir
125
+ sargs["o"] = outfile
126
+ cmd = [
127
+ sambamba,
128
+ "sort",
129
+ *dict_to_cli_args(sargs, sep="="),
130
+ infile,
131
+ ]
132
+ run_command(cmd)
133
+
134
+
135
+ if __name__ == "__main__":
136
+ if tool == "samtools":
137
+ use_samtools()
138
+ elif tool == "sambamba":
139
+ use_sambamba()
140
+ else:
141
+ raise ValueError(f"Unknown tool: {tool}")
@@ -2,12 +2,12 @@ from pathlib import Path
2
2
  from biopipen.utils.misc import run_command
3
3
  from biopipen.utils.reference import bam_index
4
4
 
5
- bamfile = {{in.bamfile | quote}} # pyright: ignore
6
- outdir = {{out.outdir | quote}} # pyright: ignore
7
- tool = {{envs.tool | quote}} # pyright: ignore
8
- samtools = {{envs.samtools | quote}} # pyright: ignore
9
- sambamba = {{envs.sambamba | quote}} # pyright: ignore
10
- ncores = {{envs.ncores | repr}} # pyright: ignore
5
+ bamfile: str = {{in.bamfile | quote}} # pyright: ignore # noqa
6
+ outdir: str = {{out.outdir | quote}} # pyright: ignore
7
+ tool: str = {{envs.tool | quote}} # pyright: ignore
8
+ samtools: str = {{envs.samtools | quote}} # pyright: ignore
9
+ sambamba: str = {{envs.sambamba | quote}} # pyright: ignore
10
+ ncores: int = {{envs.ncores | repr}} # pyright: ignore
11
11
  keep_other_sq = {{envs.keep_other_sq | repr}} # pyright: ignore
12
12
  chroms_to_keep = {{envs.chroms | repr}} # pyright: ignore
13
13
  should_index = {{envs.index | bool}} # pyright: ignore
@@ -17,13 +17,13 @@ def _remove_other_sq(infile, chrom, outfile):
17
17
  exe = samtools if tool == "samtools" else sambamba
18
18
  print("\nRemoving other chromosomes in @SQ in header")
19
19
  header_cmd = [exe, "view", "-H", infile]
20
- header_p = run_command(
20
+ header_p = run_command( # type: ignore
21
21
  header_cmd,
22
22
  stdout=True,
23
23
  wait=False,
24
24
  print_command=True,
25
25
  )
26
- header = header_p.stdout.read().decode().strip().splitlines()
26
+ header = header_p.stdout.read().decode().strip().splitlines() # type: ignore
27
27
  new_header = []
28
28
  for line in header:
29
29
  if line.startswith("@SQ"):
@@ -63,7 +63,7 @@ def use_samtools():
63
63
  "| grep '^@SQ' | cut -f 2 | cut -d ':' -f 2"
64
64
  )
65
65
  p = run_command(cmd, stdout=True, wait=False)
66
- chroms = p.stdout.read().decode().strip().splitlines()
66
+ chroms = p.stdout.read().decode().strip().splitlines() # type: ignore
67
67
  else:
68
68
  print("\nUsing provided chromosomes")
69
69
  chroms = chroms_to_keep
@@ -121,7 +121,7 @@ def use_sambamba():
121
121
  "| grep '^@SQ' | cut -f 2 | cut -d ':' -f 2"
122
122
  )
123
123
  p = run_command(cmd, stdout=True, wait=False)
124
- chroms = p.stdout.read().decode().splitlines()
124
+ chroms = p.stdout.read().decode().splitlines() # type: ignore
125
125
  else:
126
126
  print("\nUsing provided chromosomes")
127
127
  chroms = chroms_to_keep
@@ -0,0 +1,38 @@
1
+ from pathlib import Path
2
+ from biopipen.utils.misc import run_command, logger
3
+
4
+ # using:
5
+ # samtools view --subsample 0.1 --subsample-seed 1234 --threads 4 -b -o out.bam in.bam
6
+
7
+ bamfile = {{ in.bamfile | quote }} # pyright: ignore # noqa
8
+ bedfile = {{ in.bedfile | quote }} # pyright: ignore # noqa
9
+ outfile = Path({{ out.outfile | quote }}) # pyright: ignore
10
+ ncores = {{ envs.ncores | int }} # pyright: ignore
11
+ samtools = {{ envs.samtools | repr }} # pyright: ignore
12
+ tool = {{ envs.tool | repr }} # pyright: ignore
13
+ should_index = {{ envs.index | repr }} # pyright: ignore
14
+
15
+ if tool != "samtools":
16
+ raise ValueError(
17
+ f"Tool {tool} is not supported. "
18
+ "Currently only samtools is supported."
19
+ )
20
+
21
+ cmd = [
22
+ samtools,
23
+ "view",
24
+ "--target-file",
25
+ bedfile,
26
+ "-b",
27
+ "--threads",
28
+ ncores,
29
+ "-o",
30
+ outfile,
31
+ bamfile
32
+ ]
33
+ run_command(cmd, fg=True)
34
+
35
+ if should_index:
36
+ logger.info("Indexing the output bam file.")
37
+ cmd = [samtools, "index", "-@", ncores, outfile]
38
+ run_command(cmd, fg=True)
@@ -1,13 +1,40 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
1
  library(parallel)
3
2
  library(dplyr)
3
+ library(biopipen.utils)
4
4
  library(CNAclinic)
5
5
 
6
+ # https://github.com/sdchandra/CNAclinic/issues/4
7
+ .reorderByChrom.patched <- function(x){
8
+ chromosome <- as.character(x$chromosome)
9
+ chromosome[which(chromosome == "X")] <- "23"
10
+ chromosome[which(chromosome == "Y")] <- "24"
11
+ chromosome[which(chromosome == "MT")] <- "25"
12
+
13
+ x$chromosome <- as.numeric(chromosome)
14
+ # Error in xtfrm.data.frame(x) : cannot xtfrm data frames
15
+ # x <- x[order(x["chromosome"], x["start"]), ]
16
+ x <- x[order(x[, "chromosome"], x[, "start"]), ]
17
+
18
+ x$chromosome <- as.character(x$chromosome)
19
+ # Replace 23 by X:
20
+ x$chromosome[which(x$chromosome == "23")] <- "X"
21
+
22
+ # Replace 24 by Y
23
+ x$chromosome[which(x$chromosome == "24")] <- "Y"
24
+
25
+ # Replace 25 by MT
26
+ x$chromosome[which(x$chromosome == "25")] <- "MT"
27
+
28
+ return(x)
29
+ }
30
+
31
+ monkey_patch("CNAclinic", ".reorderByChrom", .reorderByChrom.patched)
32
+
6
33
  metafile = {{in.metafile | r}}
7
34
  outdir = {{out.outdir | r}}
8
35
  ncores = {{envs.ncores | int}}
9
36
  binsizer = {{envs.binsizer | r}}
10
- binsize = {{envs.binsize | int}}
37
+ binsize = {{envs.binsize | r}}
11
38
  seed = {{envs.seed | int}}
12
39
  genome = {{envs.genome | r}}
13
40
  run_args = {{envs.run_args | r}}
@@ -29,7 +56,11 @@ if (("Group" %in% metacols) && !("Patient" %in% metacols)) {
29
56
  }
30
57
 
31
58
  if (!("Binsizer" %in% metacols) && is.null(binsizer) && is.null(binsize)) {
32
- stop("The metadata file must have a column named 'Binsizer' or the `envs.binsizer` must be specified")
59
+ stop(
60
+ "The metadata file must have a column named 'Binsizer' or ",
61
+ "the `envs.binsizer` must be specified when no `envs.binsize` is provided. ",
62
+ "The Binsizer column should indicate which samples are to be used for binsize selection."
63
+ )
33
64
  }
34
65
 
35
66
  # add missing columns
@@ -108,7 +139,7 @@ do_one_sample = function(i) {
108
139
  bamfile,
109
140
  sample,
110
141
  refSamples=refSamples,
111
- binSize=binsize
142
+ binSize=binsize / 1000
112
143
  )
113
144
 
114
145
  run_args_i = run_args
@@ -118,7 +149,12 @@ do_one_sample = function(i) {
118
149
 
119
150
  plot_args_i = plot_args
120
151
  plot_args_i$object = CNAData
121
- genomewide_plot = do_call(plotSampleData, plot_args_i)
152
+ genomewide_plot <- tryCatch({
153
+ do_call(plotSampleData, plot_args_i)
154
+ }, error = function(e) {
155
+ message("Error in plotting genomewide data for sample ", sample, ": ", e$message)
156
+ return(ggplot2::ggplot() + ggplot2::labs(title = paste("Error in plotting genomewide data for sample", sample)))
157
+ })
122
158
 
123
159
  odir = file.path(outdir, sample)
124
160
  dir.create(odir, recursive = TRUE, showWarnings = FALSE)