biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
biopipen/ns/vcf.py CHANGED
@@ -335,6 +335,8 @@ class TruvariBench(Proc):
335
335
  """Run `truvari bench` to compare a VCF with CNV calls and
336
336
  base CNV standards
337
337
 
338
+ Requires truvari v4+
339
+
338
340
  See https://github.com/ACEnglish/truvari/wiki/bench
339
341
 
340
342
  Input:
@@ -358,7 +360,7 @@ class TruvariBench(Proc):
358
360
  "truvari": config.exe.truvari,
359
361
  "ref": config.ref.reffa,
360
362
  "refdist": 500,
361
- "pctsim": 0.7,
363
+ "pctseq": 0.7,
362
364
  "pctsize": 0.7,
363
365
  "pctovl": 0.0,
364
366
  "typeignore": False,
@@ -402,7 +404,7 @@ class TruvariBenchSummary(Proc):
402
404
  output = "outdir:dir:truvari_bench.summary"
403
405
  lang = config.lang.rscript
404
406
  envs = {
405
- "plots": ["call cnt", "base cnt", "precision", "recall", "f1"],
407
+ "plots": ["comp cnt", "base cnt", "precision", "recall", "f1"],
406
408
  "devpars": None,
407
409
  }
408
410
  script = "file://../scripts/vcf/TruvariBenchSummary.R"
@@ -414,6 +416,8 @@ class TruvariConsistency(Proc):
414
416
 
415
417
  See https://github.com/ACEnglish/truvari/wiki/consistency
416
418
 
419
+ Requires truvari v4+
420
+
417
421
  Input:
418
422
  vcfs: The vcf files with CNV calls
419
423
 
@@ -439,3 +443,233 @@ class TruvariConsistency(Proc):
439
443
  envs = {"truvari": config.exe.truvari, "heatmap": {}}
440
444
  script = "file://../scripts/vcf/TruvariConsistency.R"
441
445
  plugin_opts = {"report": "file://../reports/vcf/TruvariConsistency.svelte"}
446
+
447
+
448
+ class BcftoolsAnnotate(Proc):
449
+ """Add or remove annotations from VCF files
450
+
451
+ See also: <https://samtools.github.io/bcftools/bcftools.html#annotate>
452
+
453
+ Input:
454
+ infile: The input VCF file
455
+ annfile: The annotation file.
456
+ Currently only VCF files are supported.
457
+
458
+ Output:
459
+ outfile: The VCF file with annotations added or removed.
460
+
461
+ Envs:
462
+ bcftools: Path to bcftools
463
+ tabix: Path to tabix, used to index infile and annfile
464
+ annfile: The annotation file. If `in.annfile` is provided,
465
+ this is ignored
466
+ ncores (type=int): Number of cores (`--threads`) to use
467
+ columns (auto): Comma-separated or list of columns or tags to carry over from
468
+ the annotation file. Overrides `-c, --columns`
469
+ remove (auto): Remove the specified columns from the input file
470
+ header (list): Headers to be added
471
+ gz (flag): Whether to gzip the output file
472
+ index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
473
+ <more>: Other arguments for `bcftools annotate`
474
+ See also <https://samtools.github.io/bcftools/bcftools.html#annotate>
475
+ Note that the underscore `_` will be replaced with dash `-` in the
476
+ argument name.
477
+ """
478
+ input = "infile:file, annfile:file"
479
+ output = (
480
+ "outfile:file:{{in.infile | stem: 'gz'}}.vcf"
481
+ "{{'.gz' if envs.index or envs.gz else ''}}"
482
+ )
483
+ lang = config.lang.python
484
+ envs = {
485
+ "bcftools": config.exe.bcftools,
486
+ "tabix": config.exe.tabix,
487
+ "annfile": None,
488
+ "columns": [],
489
+ "remove": [],
490
+ "header": [],
491
+ "gz": True,
492
+ "index": True,
493
+ "ncores": config.misc.ncores,
494
+ }
495
+ script = "file://../scripts/vcf/BcftoolsAnnotate.py"
496
+
497
+
498
+ class BcftoolsFilter(Proc):
499
+ """Apply fixed threshold filters to VCF files
500
+
501
+ Input:
502
+ infile: The input VCF file
503
+
504
+ Output:
505
+ outfile: The filtered VCF file. If the `in.infile` is gzipped, this is
506
+ gzipped as well.
507
+
508
+ Envs:
509
+ bcftools: Path to bcftools
510
+ tabix: Path to tabix, used to index infile/outfile
511
+ ncores (type=int): Number of cores (`--threads`) to use
512
+ keep: Whether we should keep the filtered variants or not.
513
+ If True, the filtered variants will be kept in the output file, but
514
+ with a new FILTER.
515
+ includes: and
516
+ excludes: include/exclude only sites for which EXPRESSION is true.
517
+ See: <https://samtools.github.io/bcftools/bcftools.html#expressions>
518
+ If provided, `envs.include/exclude` will be ignored.
519
+ If `str`/`list` used, The filter names will be `Filter_<type>_<index>`.
520
+ A dict is used where keys are filter names and values are expressions
521
+ gz (flag): Whether to gzip the output file
522
+ index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
523
+ <more>: Other arguments for `bcftools filter`
524
+ See also <https://samtools.github.io/bcftools/bcftools.html#filter>
525
+ """
526
+ input = "infile:file"
527
+ output = (
528
+ "outfile:file:{{in.infile | stem: 'gz'}}.vcf"
529
+ "{{'.gz' if envs.index or envs.gz else ''}}"
530
+ )
531
+ lang = config.lang.python
532
+ envs = {
533
+ "bcftools": config.exe.bcftools,
534
+ "tabix": config.exe.tabix,
535
+ "ncores": config.misc.ncores,
536
+ "keep": True,
537
+ "includes": None,
538
+ "excludes": None,
539
+ "gz": True,
540
+ "index": True,
541
+ }
542
+ script = "file://../scripts/vcf/BcftoolsFilter.py"
543
+
544
+
545
+ class BcftoolsSort(Proc):
546
+ """Sort VCF files using `bcftools sort`.
547
+
548
+ `bcftools sort` is used to sort VCF files by chromosome and position based on the
549
+ order of contigs in the header.
550
+
551
+ Here we provide a chrsize file to first sort the contigs in the header and then
552
+ sort the VCF file using `bcftools sort`.
553
+
554
+ Input:
555
+ infile: The input VCF file
556
+
557
+ Output:
558
+ outfile: The sorted VCF file.
559
+
560
+ Envs:
561
+ bcftools: Path to bcftools
562
+ tabix: Path to tabix, used to index infile/outfile
563
+ ncores (type=int): Number of cores (`--threads`) to use
564
+ gz (flag): Whether to gzip the output file
565
+ index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
566
+ chrsize: The chromosome size file, from which the chromosome order is used
567
+ to sort the contig in the header first.
568
+ If not provided, `bcftools sort` will be used directly.
569
+ notfound (choice): What if the contig in the VCF file is not found in the
570
+ `chrsize` file.
571
+ - error: Report error
572
+ - remove: Remove the contig from the header.
573
+ Note that if there are records with the removed contig, an error will
574
+ be raised by `bcftools sort`
575
+ - start: Move the contig to the start of the contigs from `chrsize`
576
+ - end: Move the contig to the end of the contigs from `chrsize`
577
+ <more>: Other arguments for `bcftools sort`. For example `max_mem`.
578
+ See also <https://samtools.github.io/bcftools/bcftools.html#sort>
579
+ """
580
+ input = "infile:file"
581
+ output = (
582
+ "outfile:file:{{in.infile | stem: 'gz'}}.vcf"
583
+ "{{'.gz' if envs.index or envs.gz else ''}}"
584
+ )
585
+ lang = config.lang.python
586
+ envs = {
587
+ "bcftools": config.exe.bcftools,
588
+ "tabix": config.exe.tabix,
589
+ "ncores": config.misc.ncores,
590
+ "chrsize": config.ref.chrsize,
591
+ "notfound": "remove",
592
+ "gz": True,
593
+ "index": True,
594
+ }
595
+ script = "file://../scripts/vcf/BcftoolsSort.py"
596
+
597
+
598
+ class BcftoolsMerge(Proc):
599
+ """Merge multiple VCF files using `bcftools merge`.
600
+
601
+ Input:
602
+ infiles: The input VCF files
603
+
604
+ Output:
605
+ outfile: The merged VCF file.
606
+
607
+ Envs:
608
+ bcftools: Path to bcftools
609
+ tabix: Path to tabix, used to index infile/outfile
610
+ ncores (type=int): Number of cores (`--threads`) to use
611
+ gz (flag): Whether to gzip the output file
612
+ index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
613
+ <more>: Other arguments for `bcftools merge`.
614
+ See also <https://samtools.github.io/bcftools/bcftools.html#merge>
615
+ """
616
+ input = "infiles:files"
617
+ output = (
618
+ "outfile:file:{{in.infiles | first | stem | append: '_etc_merged'}}.vcf"
619
+ "{{'.gz' if envs.index or envs.gz else ''}}"
620
+ )
621
+ lang = config.lang.python
622
+ envs = {
623
+ "bcftools": config.exe.bcftools,
624
+ "tabix": config.exe.tabix,
625
+ "ncores": config.misc.ncores,
626
+ "gz": True,
627
+ "index": True,
628
+ }
629
+ script = "file://../scripts/vcf/BcftoolsMerge.py"
630
+
631
+
632
+ class BcftoolsView(Proc):
633
+ """View, subset and filter VCF files by position and filtering expression.
634
+
635
+ Also convert between VCF and BCF.
636
+
637
+ Input:
638
+ infile: The input VCF file
639
+ regions_file: The region file used to subset the input VCF file.
640
+ samples_file: The samples file used to subset the input VCF file.
641
+
642
+ Output:
643
+ outfile: The output VCF file.
644
+
645
+ Envs:
646
+ bcftools: Path to bcftools
647
+ tabix: Path to tabix, used to index infile/outfile
648
+ ncores (type=int): Number of cores (`--threads`) to use
649
+ regions_file: The region file used to subset the input VCF file.
650
+ If `in.regions_file` is provided, this is ignored.
651
+ samples_file: The samples file used to subset the input VCF file.
652
+ If `in.samples_file` is provided, this is ignored.
653
+ gz (flag): Whether to gzip the output file
654
+ index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
655
+ <more>: Other arguments for `bcftools view`.
656
+ See also https://samtools.github.io/bcftools/bcftools.html#view
657
+ Note that the underscore `_` will be replaced with dash `-` in the
658
+ argument name.
659
+ """
660
+ input = "infile:file, regions_file:file, samples_file:file"
661
+ output = (
662
+ "outfile:file:{{in.infile | stem: 'gz'}}.vcf"
663
+ "{{'.gz' if envs.index or envs.gz else ''}}"
664
+ )
665
+ lang = config.lang.python
666
+ envs = {
667
+ "bcftools": config.exe.bcftools,
668
+ "tabix": config.exe.tabix,
669
+ "ncores": config.misc.ncores,
670
+ "regions_file": None,
671
+ "samples_file": None,
672
+ "gz": True,
673
+ "index": True,
674
+ }
675
+ script = "file://../scripts/vcf/BcftoolsView.py"
biopipen/ns/web.py CHANGED
@@ -13,8 +13,11 @@ class Download(Proc):
13
13
  outfile: The file downloaded
14
14
 
15
15
  Envs:
16
- tool: Which tool to use to download the data
17
- wget, aria2c or python's urllib
16
+ tool (choice): Which tool to use to download the data
17
+ - wget: Use wget
18
+ - aria2c: Use aria2c
19
+ - urllib: Use python's urllib
20
+ - aria: Alias for aria2c
18
21
  wget: Path to wget
19
22
  aria2c: Path to aria2c
20
23
  args: The arguments to pass to the tool
@@ -28,8 +31,17 @@ class Download(Proc):
28
31
  """
29
32
  input = "url"
30
33
  output = (
34
+ # Need to replace http:// and https:// to avoid cloudpathlib.AnyPath to get
35
+ # the basename for something like "https://example.com/data/?file=datafile.txt"
36
+ # as data, but "?file=datafile.txt"
31
37
  "outfile:file:"
32
- "{{in.url | basename | replace: '%2E', '.' | slugify: separator='.'}}"
38
+ """{{in.url
39
+ | replace: 'http://', ''
40
+ | replace: 'https://', ''
41
+ | basename
42
+ | url_decode
43
+ | slugify: separator='.', lowercase=False, regex_pattern='[^-a-zA-Z0-9_]+'
44
+ }}"""
33
45
  )
34
46
  lang = config.lang.python
35
47
  envs = {
@@ -43,7 +55,10 @@ class Download(Proc):
43
55
 
44
56
 
45
57
  class DownloadList(Proc):
46
- """Download data from URLs in a file
58
+ """Download data from URLs in a file.
59
+
60
+ This does not work by iterating over the URLs in the file. The whole file is
61
+ passed to `wget` or `aria2c` at once.
47
62
 
48
63
  Input:
49
64
  urlfile: The file containing the URLs to download data from
@@ -52,8 +67,11 @@ class DownloadList(Proc):
52
67
  outdir: The directory containing the downloaded files
53
68
 
54
69
  Envs:
55
- tool: Which tool to use to download the data
56
- wget, aria2c or python's urllib
70
+ tool (choice): Which tool to use to download the data
71
+ - wget: Use wget
72
+ - aria2c: Use aria2c
73
+ - urllib: Use python's urllib
74
+ - aria: Alias for aria2c
57
75
  wget: Path to wget
58
76
  aria2c: Path to aria2c
59
77
  args: The arguments to pass to the tool
@@ -76,3 +94,76 @@ class DownloadList(Proc):
76
94
  "ncores": config.misc.ncores,
77
95
  }
78
96
  script = "file://../scripts/web/DownloadList.py"
97
+
98
+
99
+ class GCloudStorageDownloadFile(Proc):
100
+ """Download file from Google Cloud Storage
101
+
102
+ Before using this, make sure you have the `gcloud` tool installed and
103
+ logged in with the appropriate credentials using `gcloud auth login`.
104
+
105
+ Also make sure you have [`google-crc32c`](https://pypi.org/project/google-crc32c/)
106
+ installed to verify the integrity of the downloaded files.
107
+
108
+ Input:
109
+ url: The URL to download data from.
110
+ It should be in the format gs://bucket/path/to/file
111
+
112
+ Output:
113
+ outfile: The file downloaded
114
+
115
+ Envs:
116
+ gcloud: Path to gcloud
117
+ args (ns): Other arguments to pass to the `gcloud storage cp` command
118
+ - do_not_decompress (flag): Do not decompress the file.
119
+ - <more>: More arguments to pass to the `gcloud storage cp` command
120
+ See `gcloud storage cp --help` for more information
121
+ """
122
+ input = "url:var"
123
+ output = "outfile:file:{{in.url | replace: 'gs://', '/' | basename}}"
124
+ lang = config.lang.python
125
+ envs = {
126
+ "gcloud": config.exe.gcloud,
127
+ "args": {"do_not_decompress": True},
128
+ }
129
+ script = "file://../scripts/web/GCloudStorageDownloadFile.py"
130
+
131
+
132
+ class GCloudStorageDownloadBucket(Proc):
133
+ """Download all files from a Google Cloud Storage bucket
134
+
135
+ Before using this, make sure you have the `gcloud` tool installed and
136
+ logged in with the appropriate credentials using `gcloud auth login`.
137
+
138
+ Note that this will not use the `--recursive` flag of `gcloud storage cp`.
139
+ The files will be listed and downloaded one by one so that they can be parallelized.
140
+
141
+ Also make sure you have [`google-crc32c`](https://pypi.org/project/google-crc32c/)
142
+ installed to verify the integrity of the downloaded files.
143
+
144
+ Input:
145
+ url: The URL to download data from.
146
+ It should be in the format gs://bucket
147
+
148
+ Output:
149
+ outdir: The directory containing the downloaded files
150
+
151
+ Envs:
152
+ gcloud: Path to gcloud
153
+ keep_structure (flag): Keep the directory structure of the bucket
154
+ ncores (type=int): The number of cores to use to download the files in parallel
155
+ args (ns): Other arguments to pass to the `gcloud storage cp` command
156
+ - do_not_decompress (flag): Do not decompress the file.
157
+ - <more>: More arguments to pass to the `gcloud storage cp` command
158
+ See `gcloud storage cp --help` for more information
159
+ """
160
+ input = "url:var"
161
+ output = "outdir:dir:{{in.url | replace: 'gs://', ''}}"
162
+ lang = config.lang.python
163
+ envs = {
164
+ "gcloud": config.exe.gcloud,
165
+ "keep_structure": True,
166
+ "ncores": config.misc.ncores,
167
+ "args": {"do_not_decompress": True},
168
+ }
169
+ script = "file://../scripts/web/GCloudStorageDownloadBucket.py"
@@ -4,19 +4,16 @@
4
4
  import { Tabs, Tab, TabContent } from "$ccs";
5
5
  </script>
6
6
 
7
- {% for case in envs.cases %}
8
- <h1>{{case}}</h1>
9
-
10
- {% for binsize in envs.cases[case].binsizes %}
11
- <h2>Binsize: {{binsize}}</h2>
7
+ {% for binsize in envs.binsizes %}
8
+ <h1>Binsize: {{binsize}}</h1>
12
9
 
13
10
  {% from_ os.path import join, basename %}
14
11
  {% assign manplots = [] %}
15
12
  {% assign circplots = [] %}
16
13
  {% assign samples = [] %}
17
14
  {% for job in jobs %}
18
- {% set manplot = job.out.outdir | joinpaths: case, "manhattan."+str(binsize)+".*.png" | glob %}
19
- {% set circplot = job.out.outdir | joinpaths: case, "circular."+str(binsize)+".*.png" | glob %}
15
+ {% set manplot = job.out.outdir | glob: "manhattan."+str(binsize)+".*.png" %}
16
+ {% set circplot = job.out.outdir | glob: "circular."+str(binsize)+".*.png" %}
20
17
  {% set _ = manplots.append(manplot[0]) %}
21
18
  {% if len(circplot) > 0 %}
22
19
  {% set _ = circplots.append(circplot[0]) %}
@@ -45,6 +42,4 @@
45
42
  </div>
46
43
  </Tabs>
47
44
 
48
- {% endfor %}
49
-
50
45
  {% endfor %}
@@ -0,0 +1,18 @@
1
+ {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
+ <script>
3
+ import { Iframe } from "$libs";
4
+ </script>
5
+
6
+ {%- macro report_job(job, h=1) -%}
7
+ <Iframe
8
+ src="{{job.out.outdir}}/outs/web_summary.html"
9
+ width="100%"
10
+ frameborder="0"
11
+ style="min-height: 60vh" />
12
+ {%- endmacro -%}
13
+
14
+ {%- macro head_job(job) -%}
15
+ <h1>{{job.out.outdir | basename | escape}}</h1>
16
+ {%- endmacro -%}
17
+
18
+ {{ report_jobs(jobs, head_job, report_job) }}
@@ -0,0 +1,16 @@
1
+ {% from "utils/misc.liq" import report_jobs -%}
2
+
3
+ <script>
4
+ import { Image, DataTable, Descr } from "$libs";
5
+ </script>
6
+
7
+ {%- macro report_job(job, h=1) -%}
8
+ {{ job | render_job: h=h }}
9
+ {%- endmacro -%}
10
+
11
+
12
+ {%- macro head_job(job) -%}
13
+ <h1>{{job.out.outdir | stem | escape}}</h1>
14
+ {%- endmacro -%}
15
+
16
+ {{ report_jobs(jobs, head_job, report_job) }}
@@ -0,0 +1,18 @@
1
+ {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
+ <script>
3
+ import { Iframe } from "$libs";
4
+ </script>
5
+
6
+ {%- macro report_job(job, h=1) -%}
7
+ <Iframe
8
+ src="{{job.out.outdir}}/outs/web_summary.html"
9
+ width="100%"
10
+ frameborder="0"
11
+ style="min-height: 60vh" />
12
+ {%- endmacro -%}
13
+
14
+ {%- macro head_job(job) -%}
15
+ <h1>{{job.out.outdir | basename | escape}}</h1>
16
+ {%- endmacro -%}
17
+
18
+ {{ report_jobs(jobs, head_job, report_job) }}
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
7
+ {%- for pngfile in job.out.outdir | glob: '*.png' -%}
8
8
  <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
9
9
  <Image src="{{pngfile}}" />
10
10
  {%- endfor -%}
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
7
+ {%- for pngfile in job.out.outdir | glob: '*.png' -%}
8
8
  <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
9
9
  <Image src="{{pngfile}}" />
10
10
  {%- endfor -%}
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
7
+ {%- for pngfile in job.out.outdir | glob: '*.png' -%}
8
8
  <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
9
9
  <Image src="{{pngfile}}" />
10
10
  {%- endfor -%}
@@ -0,0 +1,15 @@
1
+ {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
+ <script>
3
+ import { Image, DataTable, Descr } from "$libs";
4
+ import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification } from "$ccs";
5
+ </script>
6
+
7
+ {%- macro report_job(job, h=1) -%}
8
+ {{ job | render_job: h=h }}
9
+ {%- endmacro -%}
10
+
11
+ {%- macro head_job(job) -%}
12
+ <h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
13
+ {%- endmacro -%}
14
+
15
+ {{ report_jobs(jobs, head_job, report_job) }}
@@ -0,0 +1,16 @@
1
+ {% from "utils/misc.liq" import report_jobs -%}
2
+
3
+ <script>
4
+ import { Image, DataTable, Descr } from "$libs";
5
+ </script>
6
+
7
+ {%- macro report_job(job, h=1) -%}
8
+ {{ job | render_job: h=h }}
9
+ {%- endmacro -%}
10
+
11
+
12
+ {%- macro head_job(job) -%}
13
+ <h1>{{job.out.outdir | stem | escape}}</h1>
14
+ {%- endmacro -%}
15
+
16
+ {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,50 +1,15 @@
1
1
  {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
2
  <script>
3
- import { Image } from "$libs";
3
+ import { Image, DataTable, Descr } from "$libs";
4
+ import { Tabs, Tab, TabContent } from "$ccs";
4
5
  </script>
5
6
 
6
7
  {%- macro report_job(job, h=1) -%}
7
- {%- set secdirs = job.out.outdir | glob: "*" -%}
8
- {%- if len(secdirs) == 1 -%}
9
- {%- set secname = secdirs | first | basename -%}
10
- {%- if secdirs[0] | joinpaths: "venn.png" | exists -%}
11
- {%- if secname == "DEFAULT" -%}
12
- <h{{h}}>Case overlapping</h{{h}}>
13
- {%- else -%}
14
- <h{{h}}>{{ secname | escape }} - Case overlapping</h{{h}}>
15
- {%- endif -%}
16
- {{ table_of_images(
17
- [joinpaths(secdirs[0], "venn.png"), joinpaths(secdirs[0], "upset.png")],
18
- ["Venn plot", "Upset plot"]) }}
19
- {%- endif -%}
20
- {%- for plotfile in secdirs[0] | glob: "case-*.png" -%}
21
- {%- if secname == "DEFAULT" -%}
22
- <h{{h}}>{{ plotfile | stem | escape }}</h{{h}}>
23
- {%- else -%}
24
- <h{{h}}>{{ secname | escape }} - {{ plotfile | stem | escape }}</h{{h}}>
25
- {%- endif -%}
26
- <Image src={{plotfile | quote}} />
27
- {%- endfor -%}
28
- {%- else -%}
29
- {%- for secdir in secdirs -%}
30
- {%- set sec = secdir | basename -%}
31
- <h{{h}}>{{sec | escape}}</h{{h}}>
32
- {%- if secdir | joinpaths: "venn.png" | exists -%}
33
- <h{{h+1}}>Case overlapping</h{{h+1}}>
34
- {{ table_of_images(
35
- [joinpaths(secdir, "venn.png"), joinpaths(secdir, "upset.png")],
36
- ["Venn plot", "Upset plot"]) }}
37
- {%- endif -%}
38
- {%- for plotfile in secdir | glob: "case-*.png" -%}
39
- <h{{h+1}}>{{ plotfile | stem }}</h{{h+1}}>
40
- <Image src={{plotfile | quote}} />
41
- {%- endfor -%}
42
- {%- endfor -%}
43
- {%- endif -%}
8
+ {{ job | render_job: h=h }}
44
9
  {%- endmacro -%}
45
10
 
46
11
  {%- macro head_job(job) -%}
47
- <h1>{{job.in.srtobj | stem0 | escape}}</h1>
12
+ <h1>{{job.in.srtobj | stem0 | escape}}</h1>
48
13
  {%- endmacro -%}
49
14
 
50
15
  {{ report_jobs(jobs, head_job, report_job) }}
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {% set images = job.out.outdir | joinpaths: "*.png" | glob %}
7
+ {% set images = job.out.outdir | glob: "*.png" %}
8
8
  {{ table_of_images(images) }}
9
9
  {%- endmacro -%}
10
10