biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.21.0"
1
+ __version__ = "0.34.26"
biopipen/core/config.toml CHANGED
@@ -1,9 +1,17 @@
1
1
  # Executables or binaries
2
2
  [exe]
3
+ # BeEM: https://github.com/kad-ecoli/BeEM
4
+ beem = "BeEM"
3
5
  # bedtools to handle bed files
4
6
  bedtools = "bedtools"
5
7
  # bcftools to handle bcf/vcf files
6
8
  bcftools = "bcftools"
9
+ # calculate_rmsd: https://github.com/charnley/rmsd
10
+ calculate_rmsd = "calculate_rmsd"
11
+ # cellranger
12
+ cellranger = "cellranger"
13
+ # cellsnp-lite
14
+ cellsnp_lite = "cellsnp-lite"
7
15
  # Control-FREEC to call cnvs
8
16
  freec = "freec"
9
17
  # liftover coordinates across genomes
@@ -11,6 +19,8 @@ liftover = "liftOver"
11
19
  # gatk, installed via conda
12
20
  gatk = "gatk"
13
21
  gatk4 = "gatk"
22
+ # google cloud sdk
23
+ gcloud = "gcloud"
14
24
  # vdjtools, installed via conda
15
25
  vdjtools = "vdjtools"
16
26
  # cnvkit.py
@@ -21,10 +31,20 @@ cnvpytor = "cnvpytor"
21
31
  cnvnator2vcf = "cnvnator2VCF.pl"
22
32
  # convert
23
33
  convert = "convert"
34
+ # fimo from meme
35
+ fimo = "fimo"
36
+ # MAXIT: https://sw-tools.rcsb.org/apps/MAXIT/
37
+ maxit = "maxit"
38
+ # MQuad: https://github.com/single-cell-genetics/MQuad
39
+ mquad = "mquad"
24
40
  # wget
25
41
  wget = "wget"
26
42
  # aria2c
27
43
  aria2c = "aria2c"
44
+ # plink
45
+ plink = "plink"
46
+ # plink2
47
+ plink2 = "plink2"
28
48
  # tabix
29
49
  tabix = "tabix"
30
50
  # sambamba
@@ -59,6 +79,10 @@ liftover_chain = ""
59
79
  # tmpdir = ""
60
80
 
61
81
  [ref]
82
+ # The reference for cellranger gex
83
+ ref_cellranger_gex = ""
84
+ # The reference for cellranger vdj
85
+ ref_cellranger_vdj = ""
62
86
  # The reference genome
63
87
  reffa = ""
64
88
  # The directory with reference for each chromosome
@@ -78,6 +102,10 @@ genome = ""
78
102
  # Database file for scType
79
103
  # https://github.com/IanevskiAleksandr/sc-type/
80
104
  sctype_db = ""
105
+ # TF Motif database
106
+ tf_motifdb = ""
107
+ # TF motif pairs
108
+ tf_motifs = ""
81
109
 
82
110
  [misc]
83
111
  # Number of cores used for each job
biopipen/core/filters.py CHANGED
@@ -1,12 +1,17 @@
1
1
  """Additional filters for pipen"""
2
2
  from __future__ import annotations
3
3
 
4
+ import re
4
5
  import shlex
5
6
  from pathlib import Path
6
7
  from typing import Any, List, Mapping
7
8
 
8
- from argx import Namespace
9
+ from argx import Namespace # pyright: ignore[reportPrivateImportUsage]
9
10
  from liquid.filters.manager import FilterManager
11
+ from yunpath import CloudPath
12
+ from pipen_report.filters import register_component, _tag
13
+
14
+ # from .defaults import BIOPIPEN_DIR
10
15
 
11
16
  filtermanager = FilterManager()
12
17
 
@@ -14,6 +19,7 @@ filtermanager = FilterManager()
14
19
  @filtermanager.register
15
20
  def dict_to_cli_args(
16
21
  dic: Mapping[str, Any],
22
+ exclude: List[str] | None = None,
17
23
  prefix: str | None = None,
18
24
  sep: str | None = " ",
19
25
  dup_key: bool = True,
@@ -26,6 +32,7 @@ def dict_to_cli_args(
26
32
 
27
33
  Args:
28
34
  dic: The dict to convert
35
+ exclude: The keys to exclude before conversion (e.g. dashify)
29
36
  prefix: The prefix of the keys after conversion
30
37
  Defaults to `None`, mean `-` for short keys and `--` for long keys
31
38
  sep: The separator between key and value
@@ -36,6 +43,13 @@ def dict_to_cli_args(
36
43
  If `sep` is `None` or `=`, this must be True, otherwise an error
37
44
  will be raised
38
45
  join: Whether to join the arguments into a single string
46
+ start_key: The key to start the arguments
47
+ This is useful when you want to put some arguments at the beginning
48
+ of the command line
49
+ end_key: The key to end the arguments
50
+ This is useful when you want to put some arguments at the end
51
+ of the command line
52
+ dashify: Whether to replace `_` with `-` in the keys
39
53
 
40
54
  Returns:
41
55
  The converted string or list of strings
@@ -43,6 +57,9 @@ def dict_to_cli_args(
43
57
  if sep in [None, "="] and not dup_key:
44
58
  raise ValueError("`dup_key` must be True when sep is `None` or `=`")
45
59
 
60
+ if exclude:
61
+ dic = {k: v for k, v in dic.items() if k not in exclude}
62
+
46
63
  starts = []
47
64
  ends = []
48
65
  out = []
@@ -105,7 +122,7 @@ def dict_to_cli_args(
105
122
  def r(
106
123
  obj: Any,
107
124
  ignoreintkey: bool = True,
108
- todot: str = None,
125
+ todot: str | None = None,
109
126
  sortkeys: bool = False,
110
127
  skip: int = 0,
111
128
  _i: int = 0,
@@ -156,12 +173,14 @@ def r(
156
173
  return "TRUE"
157
174
  if obj.upper() == "FALSE":
158
175
  return "FALSE"
159
- if obj.upper() == "NA" or obj.upper() == "NULL":
176
+ if obj.upper() == "NA" or obj.upper() == "NULL" or obj == "None":
160
177
  return obj.upper()
178
+ if re.match(r"^\d+:\d+$", obj):
179
+ return obj
161
180
  if obj.startswith("r:") or obj.startswith("R:"):
162
181
  return str(obj)[2:]
163
182
  return repr(str(obj))
164
- if isinstance(obj, Path):
183
+ if isinstance(obj, (Path, CloudPath)):
165
184
  return repr(str(obj))
166
185
  if isinstance(obj, (list, tuple, set)):
167
186
  if any(isinstance(i, dict) for i in obj):
@@ -206,3 +225,59 @@ def r(
206
225
  return r(vars(obj), ignoreintkey, todot, sortkeys, skip, _i)
207
226
 
208
227
  return repr(obj)
228
+
229
+
230
+ @filtermanager.register
231
+ def source_r(path: str | Path, chdir: bool = False) -> str:
232
+ """Source an R script.
233
+
234
+ In addition to generating `source(path)`, we also include the mtime for the script
235
+ to trigger the job not cached when the script is updated.
236
+
237
+ If your process is used in a cloud environment, it is recommended to
238
+ use the `read` filter to load the script content instead of sourcing it using
239
+ the `source` function in R to void the path issue (path could be different
240
+ in different environments).
241
+
242
+ Args:
243
+ path: The path to the R script
244
+
245
+ Returns:
246
+ The R code to source the script
247
+ """
248
+ path = Path(path)
249
+ mtime = int(path.stat().st_mtime)
250
+ return (
251
+ f"# Last modified: {mtime}\n"
252
+ # f"biopipen_dir = {r(BIOPIPEN_DIR)}\n"
253
+ f"source('{path}', chdir = {r(chdir)})"
254
+ )
255
+
256
+
257
+ @register_component("pdf")
258
+ def _render_pdf(
259
+ cont: Mapping[str, Any],
260
+ job: Mapping[str, Any],
261
+ level: int,
262
+ ) -> str:
263
+ """Render pdf report"""
264
+ # cont["src"] is required
265
+ height = cont.get("height", "600")
266
+ return _tag(
267
+ "embed",
268
+ src=str(cont["src"]),
269
+ type="application/pdf",
270
+ width="100%",
271
+ height=height,
272
+ )
273
+
274
+
275
+ @register_component("gsea")
276
+ def _render_gsea(
277
+ cont: Mapping[str, Any],
278
+ job: Mapping[str, Any],
279
+ level: int,
280
+ ) -> str:
281
+ """Render gsea report"""
282
+ # cont["dir"] is required
283
+ raise NotImplementedError()
biopipen/core/proc.py CHANGED
@@ -1,7 +1,9 @@
1
1
  """Provides a base class for the processes to subclass"""
2
- from diot import Diot
2
+ from __future__ import annotations
3
+
4
+ from diot import Diot # type: ignore
3
5
  from liquid.defaults import SEARCH_PATHS
4
- from pipen import Proc as PipenProc
6
+ from pipen import Proc as PipenProc # type: ignore
5
7
  from pipen_filters.filters import FILTERS
6
8
 
7
9
  from .filters import filtermanager
@@ -23,5 +25,12 @@ class Proc(PipenProc):
23
25
  template_opts = {
24
26
  "globals": {**FILTERS, "biopipen_dir": str(BIOPIPEN_DIR)},
25
27
  "filters": {**FILTERS, **filtermanager.filters},
26
- "search_paths": SEARCH_PATHS + [str(REPORT_DIR)],
28
+ "search_paths": SEARCH_PATHS + [str(REPORT_DIR)], # type: ignore
29
+ }
30
+
31
+ plugin_opts = {
32
+ "poplog_pattern": (
33
+ r"^(?P<level>INFO|WARN|WARNING|CRITICAL|ERROR|DEBUG?)\s*"
34
+ r"\[\d+-\d+-\d+ \d+:\d+:\d+\] (?P<message>.*)$"
35
+ )
27
36
  }
biopipen/core/testing.py CHANGED
@@ -1,12 +1,16 @@
1
1
  """Provide utilities for testing."""
2
2
  import tempfile
3
+ from functools import wraps
3
4
  from pathlib import Path
4
5
 
5
6
  from pipen import Pipen
6
7
 
7
8
  TESTING_INDEX_INIT = 1
8
- TESTING_PARENT_DIR = tempfile.gettempdir()
9
- TESTING_DIR = f"{TESTING_PARENT_DIR}/biopipen-tests-%(index)s"
9
+ TESTING_PARENT_DIR = Path(__file__).parent.parent.parent.joinpath("tests", "running")
10
+ TESTING_PARENT_DIR.mkdir(parents=True, exist_ok=True)
11
+ TESTING_DIR = str(TESTING_PARENT_DIR.joinpath("biopipen-tests-%(index)s"))
12
+ RSCRIPT_DIR = TESTING_PARENT_DIR.joinpath("biopipen-tests-rscripts")
13
+ RSCRIPT_DIR.mkdir(exist_ok=True)
10
14
 
11
15
 
12
16
  def _find_testing_index(new):
@@ -37,14 +41,82 @@ def _get_test_dirs(testfile, new):
37
41
  return name, workdir, outdir
38
42
 
39
43
 
40
- def get_pipeline(testfile, loglevel="debug", **kwargs):
44
+ def get_pipeline(testfile, loglevel="debug", enable_report=False, **kwargs):
41
45
  """Get a pipeline for a test file"""
42
46
  name, workdir, outdir = _get_test_dirs(testfile, False)
47
+ report_plugin_prefix = "+" if enable_report else "-"
48
+ plugins = kwargs.pop("plugins", [])
49
+ if any("report" in p for p in plugins if isinstance(p, str)):
50
+ raise ValueError(
51
+ "Do not pass `report` plugin to `get_pipeline(plugins=[...])`, "
52
+ "use `enable_report` instead."
53
+ )
54
+ plugins.append(f"{report_plugin_prefix}report")
43
55
  kws = {
44
56
  "name": name,
45
57
  "workdir": workdir,
46
58
  "outdir": outdir,
47
59
  "loglevel": loglevel,
60
+ "plugins": plugins,
48
61
  }
49
62
  kws.update(kwargs)
50
63
  return Pipen(**kws)
64
+
65
+
66
+ def _run_rcode(rcode: str) -> str:
67
+ """Run R code and return the output"""
68
+ import hashlib
69
+ import textwrap
70
+ import subprocess as sp
71
+
72
+ # Use sha256 of rcode to name the file
73
+ rcode_hash = hashlib.sha256(rcode.encode()).hexdigest()
74
+ script_file = RSCRIPT_DIR.joinpath(f"rcode-{rcode_hash}.R")
75
+ script_file.write_text(rcode)
76
+ p = sp.Popen(["Rscript", str(script_file)], stdout=sp.PIPE, stderr=sp.PIPE)
77
+ out, err = p.communicate()
78
+ if p.returncode != 0:
79
+ out = (
80
+ f"R codefile:\n {script_file}\n"
81
+ f"Error:\n{textwrap.indent(err.decode(), ' ')}"
82
+ )
83
+ return out
84
+
85
+ return out.decode().strip()
86
+
87
+
88
+ def r_test(mem: callable) -> callable:
89
+ """A decorator to test R code"""
90
+ @wraps(mem)
91
+ def decorator(self, *args, **kwargs):
92
+ rcode = mem(self, *args, **kwargs)
93
+ source = getattr(self, "SOURCE_FILE", None)
94
+ expect = (
95
+ "expect <- function(expr, ...) {\n"
96
+ " if (!expr) {\n"
97
+ " msg <- lapply(\n"
98
+ " list(...),\n"
99
+ " function(x) { ifelse(is.null(x), 'NULL', x) }\n"
100
+ " )\n"
101
+ " stop(paste0(unlist(msg), collapse = ' '))\n"
102
+ " }\n"
103
+ "}\n"
104
+ )
105
+ rcode = f"{expect}\n\n{rcode}\n\ncat('PASSED')\n"
106
+ if source is not None:
107
+ if not isinstance(source, (list, tuple)):
108
+ source = [source]
109
+
110
+ libs = "\n".join([f"suppressWarnings(source('{s}'))" for s in source])
111
+ rcode = f'{libs}\n\n{rcode}'
112
+
113
+ out = _run_rcode(rcode)
114
+ self.assertEqual(
115
+ out,
116
+ "PASSED",
117
+ "\n-----------------------------\n"
118
+ f"{out}"
119
+ "\n-----------------------------\n"
120
+ )
121
+
122
+ return decorator
biopipen/ns/bam.py CHANGED
@@ -4,6 +4,9 @@ from ..core.proc import Proc
4
4
  from ..core.config import config
5
5
 
6
6
 
7
+ # +-------------------------------------------------------------------+
8
+ # | CNV callers |
9
+ # +-------------------------------------------------------------------+
7
10
  class CNVpytor(Proc):
8
11
  """Detect CNV using CNVpytor
9
12
 
@@ -17,7 +20,6 @@ class CNVpytor(Proc):
17
20
 
18
21
  Envs:
19
22
  cnvpytor: Path to cnvpytor
20
- cnvnator2vcf: Path to CNVnator2VCF.pl to convert the result to VCF file
21
23
  samtools: Path to samtools, used to index bam file in case it's not
22
24
  ncores: Number of cores to use (`-j` for cnvpytor)
23
25
  refdir: The directory containing the fasta file for each chromosome
@@ -27,21 +29,19 @@ class CNVpytor(Proc):
27
29
  binsizes: The binsizes
28
30
  snp: How to read snp data
29
31
  filters: The filters to filter the result
30
- See - https://github.com/abyzovlab/CNVpytor/blob/master
31
- /GettingStarted.md#predicting-cnv-regions
32
+ See - https://github.com/abyzovlab/CNVpytor/blob/master/GettingStarted.md#predicting-cnv-regions
32
33
  mask_snps: Whether mask 1000 Genome snps
33
34
  baf_nomask: Do not use P mask in BAF histograms
34
35
 
35
36
  Requires:
36
37
  cnvpytor:
37
38
  - check: {{proc.envs.cnvpytor}} --version
38
- """
39
+ """ # noqa: E501
39
40
  input = "bamfile:file, snpfile:file"
40
41
  output = "outdir:dir:{{in.bamfile | stem}}.cnvpytor"
41
42
  lang = config.lang.python
42
43
  envs = {
43
44
  "cnvpytor": config.exe.cnvpytor,
44
- "cnvnator2vcf": config.exe.cnvnator2vcf,
45
45
  "samtools": config.exe.samtools,
46
46
  "ncores": config.misc.ncores,
47
47
  "refdir": config.ref.refdir,
@@ -152,7 +152,7 @@ class CNAClinic(Proc):
152
152
  A list of sample names
153
153
  A float number (0 < x <= 1), the fraction of samples to use
154
154
  A integer number (x > 1), the number of samples to use
155
- binsize: Directly use this binsize for CNAClinic, in kbp.
155
+ binsize: Directly use this binsize for CNAClinic, in bp.
156
156
  genome: The genome assembly
157
157
  run_args: The arguments for CNAClinic::runSegmentation
158
158
  plot_args: The arguments for CNAClinic::plotSampleData
@@ -183,6 +183,9 @@ class CNAClinic(Proc):
183
183
  }
184
184
 
185
185
 
186
+ # +-------------------------------------------------------------------+
187
+ # | Bam processing tools |
188
+ # +-------------------------------------------------------------------+
186
189
  class BamSplitChroms(Proc):
187
190
  """Split bam file by chromosomes
188
191
 
@@ -262,3 +265,142 @@ class BamMerge(Proc):
262
265
  "sort_args": [],
263
266
  }
264
267
  script = "file://../scripts/bam/BamMerge.py"
268
+
269
+
270
+ class BamSampling(Proc):
271
+ """Keeping only a fraction of read pairs from a bam file
272
+
273
+ Input:
274
+ bamfile: The bam file
275
+
276
+ Output:
277
+ outfile: The output bam file
278
+
279
+ Envs:
280
+ ncores: Number of cores to use
281
+ samtools: Path to samtools executable
282
+ tool: The tool to use, currently only "samtools" is supported
283
+ fraction (type=float): The fraction of reads to keep.
284
+ If `0 < fraction <= 1`, it's the fraction of reads to keep.
285
+ If `fraction > 1`, it's the number of reads to keep.
286
+ Note that when fraction > 1, you may not get the exact number
287
+ of reads specified but a close number.
288
+ seed: The seed for random number generator
289
+ index: Whether to index the output bam file
290
+ sort: Whether to sort the output bam file
291
+ sort_args: The arguments for sorting bam file using `samtools sort`.
292
+ These keys are not allowed: `-o`, `-@`,
293
+ and `--threads`, as they are managed by the script.
294
+ """
295
+ input = "bamfile:file"
296
+ output = "outfile:file:{{in.bamfile | stem}}.sampled{{envs.fraction}}.bam"
297
+ lang = config.lang.python
298
+ envs = {
299
+ "ncores": config.misc.ncores,
300
+ "samtools": config.exe.samtools,
301
+ "tool": "samtools",
302
+ "fraction": None,
303
+ "seed": 8525,
304
+ "index": True,
305
+ "sort": True,
306
+ "sort_args": [],
307
+ }
308
+ script = "file://../scripts/bam/BamSampling.py"
309
+
310
+
311
+ class BamSubsetByBed(Proc):
312
+ """Subset bam file by the regions in a bed file
313
+
314
+ Input:
315
+ bamfile: The bam file
316
+ bedfile: The bed file
317
+
318
+ Output:
319
+ outfile: The output bam file
320
+
321
+ Envs:
322
+ ncores: Number of cores to use
323
+ samtools: Path to samtools executable
324
+ tool: The tool to use, currently only "samtools" is supported
325
+ index: Whether to index the output bam file
326
+ """
327
+ input = "bamfile:file, bedfile:file"
328
+ output = "outfile:file:{{in.bamfile | stem}}-subset.bam"
329
+ lang = config.lang.python
330
+ envs = {
331
+ "ncores": config.misc.ncores,
332
+ "samtools": config.exe.samtools,
333
+ "tool": "samtools",
334
+ "index": True,
335
+ }
336
+ script = "file://../scripts/bam/BamSubsetByBed.py"
337
+
338
+
339
+ class BamSort(Proc):
340
+ """Sort bam file
341
+
342
+ Input:
343
+ bamfile: The bam file
344
+
345
+ Output:
346
+ outfile: The output bam file
347
+
348
+ Envs:
349
+ tool (choice): The tool to use.
350
+ - samtools: Use `samtools`
351
+ - sambamba: Use `sambamba`
352
+ ncores (type=int): Number of cores to use
353
+ samtools: Path to samtools executable
354
+ sambamba: Path to sambamba executable
355
+ tmpdir: The temporary directory to use
356
+ byname (flag): Whether to sort by read name
357
+ index (flag): Whether to index the output bam file
358
+ The index file will be created in the same directory as the output
359
+ bam file
360
+ <more>: Other arguments passed to the sorting tool
361
+ See `samtools sort` or `sambamba sort`
362
+ """
363
+ input = "bamfile:file"
364
+ output = "outfile:file:{{in.bamfile | stem}}.sorted.bam"
365
+ lang = config.lang.python
366
+ envs = {
367
+ "tool": "samtools",
368
+ "ncores": config.misc.ncores,
369
+ "samtools": config.exe.samtools,
370
+ "sambamba": config.exe.sambamba,
371
+ "tmpdir": config.path.tmpdir,
372
+ "byname": False,
373
+ "index": True,
374
+ }
375
+ script = "file://../scripts/bam/BamSort.py"
376
+
377
+
378
+ class SamtoolsView(Proc):
379
+ """View bam file using samtools, mostly used for filtering
380
+
381
+ This is a wrapper for `samtools view` command.
382
+ It will create a new bam file with the same name as the input bam file.
383
+
384
+ Input:
385
+ bamfile: The bam file
386
+
387
+ Output:
388
+ outfile: The output bam file
389
+
390
+ Envs:
391
+ ncores: Number of cores to use
392
+ samtools: Path to samtools executable
393
+ index: Whether to index the output bam file
394
+ Requires the input bam file to be sorted.
395
+ <more>: Other arguments passed to the view tool
396
+ See `samtools view` or `sambamba view`.
397
+ """
398
+ input = "bamfile:file"
399
+ output = "outfile:file:{{in.bamfile | stem}}.bam"
400
+ lang = config.lang.python
401
+ envs = {
402
+ "ncores": config.misc.ncores,
403
+ "samtools": config.exe.samtools,
404
+ "index": True,
405
+ }
406
+ script = "file://../scripts/bam/SamtoolsView.py"
biopipen/ns/bed.py CHANGED
@@ -163,3 +163,78 @@ class BedtoolsMerge(Proc):
163
163
  "bedtools": config.exe.bedtools,
164
164
  }
165
165
  script = "file://../scripts/bed/BedtoolsMerge.py"
166
+
167
+
168
+ class BedtoolsIntersect(Proc):
169
+ """Find the intersection of two BED files, using `bedtools intersect`
170
+
171
+ See <https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html>
172
+
173
+ Input:
174
+ afile: The first BED file
175
+ bfile: The second BED file
176
+
177
+ Output:
178
+ outfile: The output BED file
179
+
180
+ Envs:
181
+ bedtools: The path to bedtools
182
+ sort: Sort `afile` and `bfile` before intersecting.
183
+ By default, `-sorted` is used, assuming the input files are sorted.
184
+ If error occurs, try to set `sort` to `True`.
185
+ chrsize: Alias for `g` in `bedtools intersect`.
186
+ postcmd: The command to be executed for the output file after intersecting.
187
+ You can use `$infile`, `$outfile`, and `$outdir` to refer to the input,
188
+ output, and output directory, respectively.
189
+ <more>: Other options to be passed to `bedtools intersect`
190
+ """ # noqa: E501
191
+ input = "afile:file", "bfile:file"
192
+ output = "outfile:file:{{in.afile | stem0}}_{{in.bfile | stem0}}-intersect.bt"
193
+ lang = config.lang.python
194
+ envs = {
195
+ "bedtools": config.exe.bedtools,
196
+ "sort": False,
197
+ "chrsize": config.ref.chrsize,
198
+ "postcmd": None,
199
+ }
200
+ script = "file://../scripts/bed/BedtoolsIntersect.py"
201
+
202
+
203
+ class BedtoolsMakeWindows(Proc):
204
+ """Make windows from a BED file or genome size file, using `bedtools makewindows`.
205
+
206
+ Input:
207
+ infile: The input BED file or a genome size file
208
+ Type will be detected by the number of columns in the file.
209
+ If it has 3+ columns, it is treated as a BED file, otherwise
210
+ a genome size file.
211
+
212
+ Output:
213
+ outfile: The output BED file
214
+
215
+ Envs:
216
+ bedtools: The path to bedtools
217
+ window (type=int): The size of the windows
218
+ step (type=int): The step size of the windows
219
+ nwin (type=int): The number of windows to be generated
220
+ Exclusive with `window` and `step`.
221
+ Either `nwin` or `window` and `step` should be provided.
222
+ reverse (flag): Reverse numbering of windows in the output
223
+ name (choice): How to name the generated windows/regions
224
+ - none: Do not add any name
225
+ - src: Use the source interval's name
226
+ - winnum: Use the window number
227
+ - srcwinnum: Use the source interval's name and window number
228
+ """ # noqa: E501
229
+ input = "infile:file"
230
+ output = "outfile:file:{{in.infile | stem}}_windows.bed"
231
+ lang = config.lang.python
232
+ envs = {
233
+ "bedtools": config.exe.bedtools,
234
+ "window": None,
235
+ "step": None,
236
+ "nwin": None,
237
+ "reverse": False,
238
+ "name": "none",
239
+ }
240
+ script = "file://../scripts/bed/BedtoolsMakeWindows.py"