biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -0,0 +1,113 @@
1
+ from typing import Literal
2
+ from pathlib import Path, PosixPath # noqa: F401
3
+
4
+ from biopipen.utils.misc import run_command, logger
5
+ from biopipen.scripts.vcf.bcftools_utils import run_bcftools
6
+
7
+ infile: str = {{in.infile | quote}} # pyright: ignore # noqa: E999
8
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
9
+ envs: dict = {{envs | dict | repr}} # pyright: ignore
10
+
11
+ outdir = Path(outfile).parent
12
+ bcftools = envs.pop("bcftools")
13
+ tabix = envs.pop("tabix")
14
+ ncores = envs.pop("ncores")
15
+ gz = envs.pop("gz")
16
+ index = envs.pop("index")
17
+ chrsize = envs.pop("chrsize")
18
+ notfound = envs.pop("notfound")
19
+
20
+ if chrsize:
21
+ class Contig:
22
+ def __init__(self, name: str, length: str):
23
+ self.name = name
24
+ self.length = length
25
+
26
+ def __str__(self) -> str:
27
+ return f"##contig=<ID={self.name},length={self.length}>"
28
+
29
+ def parse_header(header_file: Path) -> tuple[list[str], dict[str, Contig]]:
30
+ hlines = []
31
+ ctgs = {}
32
+ with open(header_file) as fh:
33
+ for line in fh:
34
+ if line.startswith("##contig"):
35
+ ctg = line.strip().split("##contig=<ID=")[1].split(",length=")
36
+ ctgs[ctg[0]] = Contig(ctg[0], ctg[1].replace(">", ""))
37
+ else:
38
+ hlines.append(line.strip())
39
+ return hlines, ctgs
40
+
41
+ def match_contigs(
42
+ ctgs: dict[str, Contig],
43
+ chroms: list[str],
44
+ notfound: Literal["error", "remove", "start", "end"],
45
+ ) -> list[str]:
46
+ if (
47
+ ctgs
48
+ and chroms
49
+ and all(chrom.startswith("chr") for chrom in chroms)
50
+ and not any(chrom.startswith("chr") for chrom in ctgs)
51
+ ):
52
+ logger.warning(
53
+ "Removing 'chr' prefix from chromosomes in envs.chrsize file, "
54
+ "because the input VCF file does not have 'chr' prefix."
55
+ )
56
+ chroms = [chrom[3:] for chrom in chroms]
57
+
58
+ new_ctgs = []
59
+ for chrom in chroms:
60
+ if chrom in ctgs:
61
+ new_ctgs.append(str(ctgs[chrom]))
62
+ del ctgs[chrom]
63
+
64
+ if ctgs:
65
+ if notfound == "error":
66
+ raise ValueError(
67
+ "Chromosomes not found in envs.chrsize file: "
68
+ f"{', '.join(ctgs.keys())}"
69
+ )
70
+ elif notfound == "start":
71
+ new_ctgs = [str(ctg) for ctg in ctgs.values()] + new_ctgs
72
+ elif notfound == "end":
73
+ new_ctgs = new_ctgs + [str(ctg) for ctg in ctgs.values()]
74
+
75
+ return new_ctgs
76
+
77
+ chroms = []
78
+ with Path(chrsize).expanduser().open() as fh:
79
+ for line in fh:
80
+ chrom = line.strip().split()[0]
81
+ chroms.append(chrom)
82
+
83
+ header_file = outdir / "header.txt"
84
+ run_command(f'{bcftools} view -h {infile} > {header_file}', fg=True)
85
+ header_lines, contigs = parse_header(header_file)
86
+ new_contigs = match_contigs(contigs, chroms, notfound=notfound)
87
+ header_lines = [header_lines[0], *new_contigs, *header_lines[1:]]
88
+ reheader_file = outdir / "reheader.txt"
89
+ with open(reheader_file, "w") as fh:
90
+ fh.writelines([f"{line}\n" for line in header_lines])
91
+
92
+ reheader_vcf = outdir / f"{Path(infile).stem}_reheader.vcf"
93
+ run_command([
94
+ bcftools, "reheader",
95
+ "--header", reheader_file,
96
+ "-o", reheader_vcf,
97
+ infile
98
+ ], fg=True)
99
+
100
+ infile = str(reheader_vcf)
101
+
102
+ envs[""] = [bcftools, "sort"]
103
+ envs["_"] = infile
104
+ envs["o"] = outfile
105
+
106
+ if index and not gz:
107
+ logger.warning("Forcing envs.gz to True because envs.index is True.")
108
+ gz = True
109
+
110
+ if "O" not in envs and "output-type" not in envs and "output_type" not in envs:
111
+ envs["O"] = "z" if gz else "v"
112
+
113
+ run_bcftools(envs, bcftools=bcftools, index=index, tabix=tabix)
@@ -0,0 +1,73 @@
1
+ from contextlib import suppress
2
+ # In case there are paths passed to envs
3
+ from pathlib import PosixPath # noqa: F401
4
+
5
+ from biopipen.utils.misc import logger
6
+ from biopipen.utils.reference import tabix_index
7
+ from biopipen.scripts.vcf.bcftools_utils import run_bcftools
8
+
9
+ infile: str = {{in.infile | quote}} # pyright: ignore # noqa: #999
10
+ regions_file: str = {{in.regions_file | quote}} # pyright: ignore
11
+ samples_file: str = {{in.samples_file | quote}} # pyright: ignore
12
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
13
+ envs: dict = {{envs | dict | repr}} # pyright: ignore
14
+
15
+ bcftools = envs.pop("bcftools")
16
+ tabix = envs.pop("tabix")
17
+ ncores = envs.pop("ncores")
18
+ gz = envs.pop("gz")
19
+ index = envs.pop("index")
20
+
21
+ if regions_file:
22
+ if "R" in envs or "regions_file" in envs or "regions-file" in envs:
23
+ logger.warning(
24
+ r"Ignoring envs\[regions_file/regions-file/R] "
25
+ "because in.regionsfile is provided."
26
+ )
27
+ with suppress(KeyError):
28
+ del envs["regions_file"]
29
+ with suppress(KeyError):
30
+ del envs["regions-file"]
31
+ with suppress(KeyError):
32
+ del envs["R"]
33
+ elif "R" in envs or "regions_file" in envs or "regions-file" in envs:
34
+ regions_file = (
35
+ envs.pop("regions_file", None)
36
+ or envs.pop("regions-file", None)
37
+ or envs.pop("R", None)
38
+ )
39
+
40
+ if samples_file:
41
+ if "S" in envs or "samples_file" in envs or "samples-file" in envs:
42
+ logger.warning(
43
+ "Ignoring envs[samples_file/samples-file/S] "
44
+ "because in.samples_file is provided."
45
+ )
46
+ with suppress(KeyError):
47
+ del envs["samples_file"]
48
+ with suppress(KeyError):
49
+ del envs["samples-file"]
50
+ with suppress(KeyError):
51
+ del envs["S"]
52
+ elif "S" in envs or "samples_file" in envs or "samples-file" in envs:
53
+ samples_file = (
54
+ envs.pop("samples_file", None)
55
+ or envs.pop("samples-file", None)
56
+ or envs.pop("S", None)
57
+ )
58
+
59
+ if index and not gz:
60
+ logger.warning("Forcing envs.gz to True because envs.index is True.")
61
+ gz = True
62
+
63
+ if "O" not in envs and "output-type" not in envs and "output_type" not in envs:
64
+ envs["O"] = "z" if gz else "v"
65
+
66
+ envs[""] = [bcftools, "view"]
67
+ envs["_"] = tabix_index(infile, "vcf", tabix=tabix)
68
+ envs["o"] = outfile
69
+ envs["threads"] = ncores
70
+ envs["regions_file"] = regions_file
71
+ envs["samples_file"] = samples_file
72
+
73
+ run_bcftools(envs, bcftools=bcftools, index=index, tabix=tabix)
@@ -1,13 +1,15 @@
1
+ # shellcheck disable=SC1083
1
2
  compvcf={{in.compvcf | quote}}
2
3
  basevcf={{in.basevcf | quote}}
3
4
  outdir={{out.outdir | quote}}
4
5
  truvari={{envs.truvari | quote}}
5
6
  ref={{envs.ref | quote}}
6
7
  refdist={{envs.refdist | quote}}
7
- pctsim={{envs.pctsim | quote}}
8
+ pctseq={{envs.pctseq | quote}}
8
9
  pctsize={{envs.pctsize | quote}}
9
10
  pctovl={{envs.pctovl | quote}}
10
11
  sizemax={{envs.sizemax | default: 50000 | quote}}
12
+ # shellcheck disable=SC1054
11
13
  {% if envs.typeignore %}
12
14
  typeignore="--typeignore"
13
15
  {% else %}
@@ -15,20 +17,25 @@ typeignore=""
15
17
  {% endif %}
16
18
  {% if envs.multimatch %}
17
19
  multimatch="--multimatch"
20
+ # shellcheck disable=SC1009
18
21
  {% else %}
19
22
  multimatch=""
23
+ # shellcheck disable=SC1073
20
24
  {% endif %}
21
25
 
22
26
  rm -rf $outdir
23
- $truvari bench \
24
- -c "$compvcf" \
25
- -b "$basevcf" \
26
- -f "$ref" \
27
+ cmd="$truvari bench \
28
+ -c '$compvcf' \
29
+ -b '$basevcf' \
30
+ -f '$ref' \
27
31
  --refdist $refdist \
28
- --pctsim $pctsim \
32
+ --pctseq $pctseq \
29
33
  --pctsize $pctsize \
30
34
  --pctovl $pctovl \
31
35
  --sizemax $sizemax \
32
36
  $typeignore \
33
37
  $multimatch \
34
- -o $outdir
38
+ -o $outdir"
39
+
40
+ echo "$cmd"
41
+ eval "$cmd"
@@ -1,11 +1,7 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/plot.R")
3
-
4
- library(ggprism)
5
1
  library(rjson)
2
+ library(rlang)
6
3
  library(dplyr)
7
-
8
- theme_set(theme_prism(axis_text_angle = 90))
4
+ library(plotthis)
9
5
 
10
6
  indirs = {{in.indirs | r}}
11
7
  outdir = {{out.outdir | r}}
@@ -17,7 +13,7 @@ read_summary = function() {
17
13
 
18
14
  summaries = NULL
19
15
  for (indir in indirs) {
20
- summary = fromJSON(file=file.path(indir, "summary.txt"))
16
+ summary = fromJSON(file=file.path(indir, "summary.json"))
21
17
  summary$gt_matrix = NULL
22
18
  summary$Sample = sub(".truvari_bench", "", basename(indir), fixed=T)
23
19
  summaries = bind_rows(summaries, summary)
@@ -39,14 +35,21 @@ get_devpars = function() {
39
35
 
40
36
  plot_summary = function(col) {
41
37
  outfile = file.path(outdir, paste0(col, ".png"))
42
- plotGG(
38
+ p <- plotthis::BarPlot(
43
39
  summaries,
44
- "col",
45
- list(mapping = aes_string(x = "Sample", y = bQuote(col), fill = "Sample")),
46
-
47
- devpars = get_devpars(),
48
- outfile = outfile
40
+ x = "Sample",
41
+ y = col,
42
+ x_text_angle = 90
43
+ )
44
+ devpars <- get_devpars()
45
+ png(
46
+ filename = outfile,
47
+ width = devpars$width,
48
+ height = devpars$height,
49
+ res = devpars$res
49
50
  )
51
+ print(p)
52
+ dev.off()
50
53
  }
51
54
 
52
55
  main = function() {
@@ -1,4 +1,4 @@
1
- source("{{biopipen_dir}}/utils/plot.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
2
2
  # to compile the expressions
3
3
  library(ComplexHeatmap)
4
4
 
@@ -1,7 +1,7 @@
1
1
  from cyvcf2 import VCF, Variant
2
2
 
3
- infile = {{in.infile | quote}} # pyright: ignore
4
- outfile = {{out.outfile | quote}} # pyright: ignore
3
+ infile: str = {{in.infile | quote}} # pyright: ignore # noqa: E999
4
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
5
5
  # vcf, default 1
6
6
  inbase = {{envs.inbase | int}} # pyright: ignore
7
7
  # bed, default 0
@@ -2,22 +2,22 @@ from os import path
2
2
 
3
3
  from biopipen.utils.misc import run_command, dict_to_cli_args
4
4
 
5
- infile = {{in.infile | quote}} # pyright: ignore
6
- outfile = {{out.outfile | quote}} # pyright: ignore
7
- joboutdir = {{job.outdir | quote}} # pyright: ignore
5
+ infile: str = {{in.infile | quote}} # pyright: ignore # noqa
6
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
7
+ joboutdir: str = {{job.outdir | quote}} # pyright: ignore
8
8
  vcfanno = {{envs.vcfanno | quote}} # pyright: ignore
9
- ncores = {{envs.ncores | repr}} # pyright: ignore
10
- args = {{envs.args | repr}} # pyright: ignore
9
+ ncores: int = {{envs.ncores | repr}} # pyright: ignore
10
+ args: dict = {{envs.args | dict}} # pyright: ignore
11
11
 
12
- {% set conf = envs.conffile or in.conffile %}
13
- {% if conf | isinstance: dict %}
12
+ {% set conf = envs.conffile or in.conffile %} # pyright: ignore # noqa
13
+ {% if conf | isinstance: dict %} # pyright: ignore # noqa
14
14
  conffile = path.join(joboutdir, "config.toml")
15
- conf = {{ conf | toml | quote }}
15
+ conf: str = {{ conf | toml | quote }} # pyright: ignore # noqa
16
16
  with open(conffile, "w") as f:
17
17
  f.write(conf)
18
- {% else %}
19
- conffile = {{conf | quote}}
20
- {% endif %}
18
+ {% else %} # pyright: ignore # noqa
19
+ conffile = {{conf | quote}} # pyright: ignore # noqa
20
+ {% endif %} # pyright: ignore # noqa
21
21
 
22
22
  args["p"] = ncores
23
23
  args["_"] = [conffile, infile]
@@ -1,25 +1,37 @@
1
+ # shellcheck disable=SC2148
2
+ # shellcheck disable=SC2036
3
+ # shellcheck disable=SC2030
4
+ # shellcheck disable=SC1083
5
+ # shellcheck disable=SC2288
1
6
  infile={{in.infile | quote}}
2
7
  outfile={{out.outfile | quote}}
3
8
  n={{envs.n}}
4
9
 
10
+ # shellcheck disable=SC2031
5
11
  if [[ $infile == *.gz ]]; then
6
- outfile=$(echo $outfile | sed -r "s/\.gz$//")
7
- nheader=$(zcat $infile | head -n 9999 | grep "^#" | wc -l | cut -d' ' -f1)
12
+ outfile=$(echo "$outfile" | sed -r "s/\.gz$//")
13
+ # shellcheck disable=SC2126
14
+ nheader=$(zcat "$infile" | head -n 9999 | grep "^#" | wc -l | cut -d' ' -f1)
8
15
  if [[ ! $n -gt 1 ]]; then
9
- nrows=$(zcat $infile | wc -l | cut -d' ' -f1)
16
+ nrows=$(zcat "$infile" | wc -l | cut -d' ' -f1)
17
+ # shellcheck disable=SC2004
10
18
  nvars=$(($nrows - $nheader))
11
19
  n=$(echo "$nvars * $n" | bc)
12
20
  fi
13
- zcat $infile | head -n $nheader > $outfile
14
- zcat $infile | tail -n +$(($nheader + 1)) | shuf -n $n | LC_ALL=C sort -k1,1V -k2,2n >> $outfile
15
- bgzip $outfile
21
+ zcat "$infile" | head -n "$nheader" > "$outfile"
22
+ # shellcheck disable=SC2004
23
+ zcat "$infile" | tail -n +$(($nheader + 1)) | shuf -n "$n" | LC_ALL=C sort -k1,1V -k2,2n >> "$outfile"
24
+ bgzip "$outfile"
16
25
  else
17
- nheader=$(head -n 9999 $infile | grep "^#" | wc -l | cut -d' ' -f1)
26
+ # shellcheck disable=SC2126
27
+ nheader=$(head -n 9999 "$infile" | grep "^#" | wc -l | cut -d' ' -f1)
18
28
  if [[ ! $n -gt 1 ]]; then
19
- nrows=$(wc -l $infile | cut -d' ' -f1)
29
+ nrows=$(wc -l "$infile" | cut -d' ' -f1)
30
+ # shellcheck disable=SC2004
20
31
  nvars=$(($nrows - $nheader))
21
32
  n=$(echo "$nvars * $n" | bc)
22
33
  fi
23
- head -n $nheader $infile > $outfile
24
- tail -n +$(($nheader + 1)) $infile | shuf -n $n | LC_ALL=C sort -k1,1V -k2,2n >> $outfile
34
+ head -n "$nheader" "$infile" > "$outfile"
35
+ # shellcheck disable=SC2004
36
+ tail -n +$(($nheader + 1)) "$infile" | shuf -n "$n" | LC_ALL=C sort -k1,1V -k2,2n >> "$outfile"
25
37
  fi
@@ -1,13 +1,13 @@
1
1
  from cyvcf2 import VCF, Writer, Variant
2
2
 
3
- infile = {{in.invcf | repr}} # pyright: ignore
4
- outfile = {{out.outfile | repr}} # pyright: ignore
3
+ infile: str = {{in.invcf | quote}} # pyright: ignore # noqa: E999
4
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
5
5
 
6
- {{envs.helper}}
6
+ {{envs.helper}} # pyright: ignore # noqa: E999
7
7
 
8
8
  keep = {{envs.keep | repr}} # pyright: ignore
9
- filters = {{envs.filters | repr}} # pyright: ignore
10
- filter_descs = {{envs.filter_descs | repr}} # pyright: ignore
9
+ filters: dict = {{envs.filters | repr}} # pyright: ignore
10
+ filter_descs: dict = {{envs.filter_descs | repr}} # pyright: ignore
11
11
 
12
12
  # builtin filters
13
13
  BUILTIN_FILTERS = {}
@@ -7,17 +7,17 @@ from biopipen.scripts.vcf.VcfFix_utils import ( # noqa: F401
7
7
  HeaderContig,
8
8
  HeaderGeneral,
9
9
  Fields,
10
- Info,
11
- Format,
12
- Alt,
13
- Filter,
14
- Sample,
15
- Samples,
10
+ # Info,
11
+ # Format,
12
+ # Alt,
13
+ # Filter,
14
+ # Sample,
15
+ # Samples,
16
16
  Variant,
17
17
  )
18
18
  from biopipen.scripts.vcf.VcfFix_utils import fix_vcffile
19
19
 
20
- infile = {{in.infile | quote}} # pyright: ignore
20
+ infile = {{in.infile | quote}} # pyright: ignore # noqa: E999
21
21
  instem = {{in.infile | stem | quote}} # pyright: ignore
22
22
  outfile = {{out.outfile | quote}} # pyright: ignore
23
23
 
@@ -1,6 +1,15 @@
1
1
  import re
2
2
  import gzip
3
- from biopipen.utils.vcf import * # noqa: F401, F403
3
+ from biopipen.utils.vcf import (
4
+ HeaderInfo,
5
+ HeaderFormat,
6
+ HeaderFilter,
7
+ HeaderContig,
8
+ HeaderGeneral,
9
+ Fields,
10
+ Variant,
11
+ HeaderItem,
12
+ )
4
13
 
5
14
 
6
15
  def line_to_obj(line: str):
@@ -41,7 +50,7 @@ def handle_obj(obj, fixes: dict):
41
50
 
42
51
  regex = fix.get("regex")
43
52
  if regex:
44
- if not re.search(regex, obj.raw):
53
+ if not re.search(regex, obj.raw): # type: ignore
45
54
  continue
46
55
 
47
56
  return fix["fix"](obj.raw if kind is None else obj)
@@ -63,11 +72,11 @@ def fix_vcffile(vcffile, outfile, fixes):
63
72
  else:
64
73
  modify_fixes.append(fix)
65
74
 
66
- inopen = gzip.open if vcffile.endswith(".gz") else open
75
+ inopen = gzip.open if str(vcffile).endswith(".gz") else open
67
76
  with inopen(vcffile, "rt") as fin, open(outfile, "w") as fout:
68
77
  for line in fin:
69
78
  obj = line_to_obj(line)
70
- out = handle_obj(obj, modify_fixes)
79
+ out = handle_obj(obj, modify_fixes) # type: ignore
71
80
  if obj.kind == "fields":
72
81
  for fix in header_append_fixes:
73
82
  fout.write(str(fix["fix"](None)).rstrip("\n") + "\n")
@@ -4,10 +4,10 @@ from os import path
4
4
  from biopipen.utils.reference import tabix_index
5
5
  from biopipen.utils.misc import run_command
6
6
 
7
- infile = {{in.infile | repr}} # pyright: ignore
8
- outfile = Path({{out.outfile | repr}}) # pyright: ignore
7
+ infile: str = {{in.infile | quote}} # pyright: ignore # noqa
8
+ outfile = Path({{out.outfile | quote}}) # pyright: ignore
9
9
  outidx = {{out.outidx | repr}} # pyright: ignore
10
- tabix = {{envs.tabix | repr}} # pyright: ignore
10
+ tabix: str = {{envs.tabix | repr}} # pyright: ignore
11
11
  ncores = {{envs.ncores | repr}} # pyright: ignore
12
12
 
13
13
  outfile_with_index = tabix_index(infile, "vcf", outfile.parent, tabix)
@@ -1,8 +1,8 @@
1
1
  from biopipen.utils.misc import run_command, dict_to_cli_args
2
2
 
3
- infile1 = {{in.infile1 | repr}} # pyright: ignore
4
- infile2 = {{in.infile2 | repr}} # pyright: ignore
5
- outfile = {{out.outfile | repr}} # pyright: ignore
3
+ infile1: str = {{in.infile1 | quote}} # pyright: ignore # noqa
4
+ infile2 = {{in.infile2 | quote}} # pyright: ignore
5
+ outfile = {{out.outfile | quote}} # pyright: ignore
6
6
  bcftools = {{envs.bcftools | repr}} # pyright: ignore
7
7
  gz = {{envs.gz | repr}} # pyright: ignore
8
8
  index = {{envs.index | repr}} # pyright: ignore
@@ -1,3 +1,5 @@
1
+ # shellcheck disable=SC2148
2
+ # shellcheck disable=SC1083
1
3
  invcf={{ in.invcf | quote }}
2
4
  outvcf={{ out.outvcf | quote }}
3
5
  rejfile={{ job.outdir | joinpaths: "rejected.vcf" | quote }}
@@ -6,12 +8,15 @@ chain={{ envs.chain | quote }}
6
8
  reffa={{ envs.reffa | quote }}
7
9
  args={{ envs.args | dict_to_cli_args: join=True }}
8
10
 
11
+ # shellcheck disable=SC2154
9
12
  refdict="${reffa%.fa}.dict"
10
13
  if [[ ! -e "$refdict" ]]; then
11
14
  echo "Sequence dictionary does not exist: $refdict" 1>&2
12
15
  exit 1
13
16
  fi
14
17
 
18
+ # shellcheck disable=SC2154
19
+ # shellcheck disable=SC2086
15
20
  $gatk LiftoverVcf \
16
21
  $args \
17
22
  --INPUT "$invcf" \
@@ -3,12 +3,12 @@ import shlex
3
3
  import concurrent.futures
4
4
  from subprocess import Popen, check_output
5
5
 
6
- infile = {{in.infile | repr}} # pyright: ignore
7
- outdir = {{out.outdir | repr}} # pyright: ignore
8
- bcftools = {{envs.bcftools | repr}} # pyright: ignore
6
+ infile: str = {{in.infile | quote}} # pyright: ignore # noqa
7
+ outdir: str = {{out.outdir | quote}} # pyright: ignore
8
+ bcftools: str = {{envs.bcftools | repr}} # pyright: ignore
9
9
  gz = {{envs.gz | repr}} # pyright: ignore
10
10
  index = {{envs.index | repr}} # pyright: ignore
11
- ncores = {{envs.ncores | int}} # pyright: ignore
11
+ ncores: int = {{envs.ncores | int}} # pyright: ignore
12
12
  private = {{envs.private | repr}} # pyright: ignore
13
13
 
14
14
  if index:
@@ -0,0 +1,52 @@
1
+ """Utilities for bcftools"""
2
+
3
+ from biopipen.utils.misc import run_command, dict_to_cli_args
4
+ from biopipen.utils.reference import tabix_index
5
+
6
+
7
+ def bcftools_version(bcftools: str) -> tuple[int, ...]:
8
+ """Get the version of bcftools
9
+
10
+ Args:
11
+ bcftools (str): Path to bcftools
12
+
13
+ Returns:
14
+ tuple[int, ...]: The version of bcftools
15
+ """
16
+ bversion = (
17
+ run_command([bcftools, "version"], stdout="return")
18
+ .splitlines()[0] # bcftools 1.20 # type: ignore
19
+ .replace("bcftools", "")
20
+ .strip() # 1.20
21
+ .split(".")
22
+ )
23
+ return tuple(map(int, bversion))
24
+
25
+
26
+ def run_bcftools(
27
+ args: dict,
28
+ bcftools: str, # TODO: get from the first argument of args
29
+ index: bool,
30
+ tabix: str
31
+ ) -> None:
32
+ """Run bcftools with the given arguments
33
+
34
+ Args:
35
+ args: Arguments to pass to bcftools
36
+ bcftools (str): Path to bcftools
37
+ index (bool): Whether to index the output
38
+ tabix (str): Path to tabix
39
+ """
40
+ if not index:
41
+ run_command(dict_to_cli_args(args, dashify=True), fg=True)
42
+ else:
43
+ bversion = bcftools_version(bcftools)
44
+ if bversion >= (1, 20):
45
+ # requires bcftools 1.20+
46
+ # '--write-index tbi' not working
47
+ # it has to be '--write-index=tbi'
48
+ args["write_index=tbi"] = True
49
+ run_command(dict_to_cli_args(args, dashify=True), fg=True)
50
+ else:
51
+ run_command(dict_to_cli_args(args, dashify=True), fg=True)
52
+ tabix_index(args["o"], "vcf", tmpdir=False, tabix=tabix)
@@ -2,13 +2,13 @@ from pathlib import Path
2
2
 
3
3
  from biopipen.utils.misc import run_command, dict_to_cli_args
4
4
 
5
- url = {{in.url | repr}} # pyright: ignore
6
- outfile = Path({{out.outfile | repr}}) # pyright: ignore
5
+ url = {{in.url | quote}} # pyright: ignore # noqa
6
+ outfile = Path({{out.outfile | quote}}) # pyright: ignore
7
7
  tool = {{envs.tool | repr}} # pyright: ignore
8
8
  wget = {{envs.wget | repr}} # pyright: ignore
9
9
  aria2c = {{envs.aria2c | repr}} # pyright: ignore
10
10
  ncores = {{envs.ncores | repr}} # pyright: ignore
11
- args = {{envs.args | dict}} # pyright: ignore
11
+ args: dict = {{envs.args | dict}} # pyright: ignore
12
12
 
13
13
  if tool == "wget":
14
14
  args["_"] = url
@@ -28,4 +28,8 @@ elif tool == "aria2c":
28
28
 
29
29
  else: # use python
30
30
  import urllib
31
- urllib.urlretrieve(url, outfile)
31
+
32
+ try:
33
+ urllib.urlretrieve(url, outfile) # type: ignore
34
+ except AttributeError:
35
+ urllib.request.urlretrieve(url, outfile) # type: ignore
@@ -2,13 +2,13 @@ from pathlib import Path
2
2
 
3
3
  from biopipen.utils.misc import run_command, dict_to_cli_args
4
4
 
5
- urlfile = {{in.urlfile | repr}} # pyright: ignore
6
- outdir = Path({{out.outdir | repr}}) # pyright: ignore
5
+ urlfile: str = {{in.urlfile | quote}} # pyright: ignore # noqa
6
+ outdir = Path({{out.outdir | quote}}) # pyright: ignore
7
7
  tool = {{envs.tool | repr}} # pyright: ignore
8
8
  wget = {{envs.wget | repr}} # pyright: ignore
9
9
  aria2c = {{envs.aria2c | repr}} # pyright: ignore
10
10
  ncores = {{envs.ncores | repr}} # pyright: ignore
11
- args = {{envs.args | repr}} # pyright: ignore
11
+ args: dict = {{envs.args | repr}} # pyright: ignore
12
12
 
13
13
  if tool == "wget":
14
14
  args["i"] = urlfile
@@ -26,10 +26,10 @@ elif tool == "aria2c":
26
26
  run_command(dict_to_cli_args(args, dashify=True), fg=True)
27
27
 
28
28
  else: # use python
29
- import urllib
29
+ from urllib.request import urlretrieve
30
30
  from urllib.parse import urlparse
31
31
  with open(urlfile, "r") as furl:
32
32
  for i, url in enumerate(furl.readlines()):
33
33
  parsed = urlparse(url)
34
34
  path = Path(parsed.path)
35
- urllib.urlretrieve(url, f"{path.stem}-{i}{path.suffix}")
35
+ urlretrieve(url, f"{path.stem}-{i}{path.suffix}")