miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,37 +0,0 @@
1
-
2
- # @author: Luis M. Rodriguez-R
3
- # @update: Nov-29-2012
4
-
5
- kSelector <- function(file, lib){
6
- red <- rgb(0.6, 0, 0);
7
- d <- read.table(file, sep=" ", h=T, fill=T);
8
- d <- d[!is.na(d$N50) & !is.na(d$used), ];
9
- d$reads <- max(d$reads, na.rm=T)
10
- d <- d[order(d$K), ];
11
- rownames(d) <- 1:nrow(d);
12
- par(mar=c(5,4,4,5)+.1, cex=0.8);
13
- barplot(d$reads/1e6, names=d$K, col='white', ylab='Number of reads (in millions)', xlab='K',
14
- main=paste('Reads used and N50 by K-mers in the assembly of', lib));
15
- barplot(d$used/1e6, col='grey', add=T);
16
- par(new=T);
17
- plot(1:length(d$K)-0.5, d$N50, col=red, t='b', lty=2, pch=20, cex=1, lwd=1.5,
18
- xlim=c(0, length(d$K)), xaxt='n', yaxt='n', xlab='', ylab='');
19
- axis(4, col.axis=red);
20
- mtext('N50 (bp)', side=4, line=3, col=red);
21
- # Suggest best k-mers
22
- if(nrow(d) >= 3){
23
- x = data.frame(K=d$K, l=(d$N50 - mean(d$N50))/sd(d$N50), u=(d$used - mean(d$used))/sd(d$used));
24
- rownames(x) <- rownames(d)
25
- d <- cbind(d, sel=FALSE);
26
- k_s = c();
27
- for(l_star in c(2, 1/2, 1)){
28
- k_s_i = x$K[which.max(l_star*x$l + x$u)];
29
- k_s <- c(k_s, k_s_i);
30
- x <- x[x$K!=k_s_i, ];
31
- d$sel[d$K==k_s_i] <- TRUE;
32
- }
33
- abline(v=as.numeric(rownames(d)[d$sel])-0.5, col='darkgreen', lty=6);
34
- }
35
- return(d);
36
- }
37
-
@@ -1,68 +0,0 @@
1
- #!/bin/bash
2
- #PBS -l nodes=1:ppn=1
3
- #PBS -k oe
4
-
5
- # Some defaults for the parameters
6
- BIN454=${BIN454:-"$HOME/454/bin"};
7
-
8
- # Check mandatory variables
9
- if [[ "$LIB" == "" ]]; then
10
- echo "Error: LIB is mandatory" >&2
11
- exit 1;
12
- fi
13
- if [[ "$PDIR" == "" ]]; then
14
- echo "Error: PDIR is mandatory" >&2
15
- exit 1;
16
- fi
17
- if [[ "$KVELVET$KSOAP" == "" ]]; then
18
- echo "Error: KVELVET and/or KSOAP are mandatory" >&2
19
- exit 1;
20
- fi
21
-
22
- # Prepare input
23
- KVELVET=$(echo $KVELVET | sed -e 's/:/ /g')
24
- KSOAP=$(echo $KSOAP | sed -e 's/:/ /g')
25
- NP=$(cat "$PBS_NODEFILE" | wc -l)
26
- CWD=$(pwd)
27
- DIR="$CWD/$LIB.newbler"
28
- LOG="$DIR.log"
29
- module load perl/5.14.4
30
- export PATH=$PATH:$BIN454
31
-
32
- # Create project
33
- echo new > $DIR.proc
34
- nohup newAssembly $DIR > $LOG
35
-
36
- # Prepare Velvet
37
- if [[ "$KVELVET" != "" ]] ; then
38
- echo pre-velvet > $DIR.proc
39
- rm $LIB.velvet.tmp1 &>/dev/null
40
- for K in $KVELVET ; do
41
- perl "$PDIR/FastA.filterN.pl" "$LIB.velvet_$K/contigs.fa" >> $LIB.velvet.tmp1
42
- done
43
- perl "$PDIR/newbler_preparator.pl" $LIB.velvet.tmp1 $LIB.velvet.tmp2
44
- cd $DIR
45
- nohup addRun ../$LIB.velvet.tmp2 >> $LOG
46
- cd $CWD
47
- fi ;
48
-
49
- # Prepare SOAP
50
- if [[ "$KSOAP" != "" ]] ; then
51
- echo pre-soap > $DIR.proc
52
- rm $LIB.soap.tmp1 &>/dev/null
53
- for K in $KSOAP ; do
54
- cat "$LIB.soap_$K/O.contig" >> $LIB.soap.tmp1
55
- done
56
- perl "$PDIR/newbler_preparator.pl" $LIB.soap.tmp1 $LIB.soap.tmp2
57
- cd $DIR
58
- nohup addRun ../$LIB.soap.tmp2 >> $LOG
59
- cd $CWD
60
- fi ;
61
-
62
- # Run
63
- cd $DIR
64
- echo newbler > $DIR.proc
65
- nohup runProject -cpu $NP >> $LOG
66
- cd $CWD
67
- echo done > $DIR.proc
68
-
@@ -1,49 +0,0 @@
1
- #!/usr/bin/perl
2
-
3
-
4
- my ($in, $out) = @ARGV;
5
- ($in and $out) or die "
6
- Usage: $0 input.fa output.fa
7
- ";
8
-
9
- open IN, "<", $in or die "Cannot read file: $in: $!\n";
10
- open OUT,">", $out or die "Cannot create file: $out: $!\n";
11
-
12
- %reads=();
13
- @reads=();
14
- while(<IN>){
15
- chomp;
16
- if(/^\>/){
17
- $tag=$_;
18
- $reads{$tag}='';
19
- push(@reads,$tag);
20
- }else{
21
- $reads{$tag}.=$_;
22
- }
23
- }
24
- close(IN);
25
-
26
- for(0..$#reads){
27
- $tag=$reads[$_];
28
- $read=$reads{$tag};
29
- $l=length $read;
30
- if($l<100){
31
- next;
32
- }else{
33
- if($l<1500){
34
- print OUT "$tag\n$read\n";
35
- }else{
36
- $r=int($l/1500)+1;
37
- $start=0;
38
- $i=1;
39
- while($start<$l-100){
40
- $tag_new=$tag.':r'.$i;
41
- $i++;
42
- $read_new=substr($read,$start,1500);
43
- $start+=200;
44
- print OUT "$tag_new\n$read_new\n";
45
- }
46
- }
47
- }
48
- }
49
- close(OUT);
@@ -1,80 +0,0 @@
1
- #!/bin/bash
2
- #PBS -k oe
3
-
4
- # Some defaults for the parameters
5
- INSLEN=${INSLEN:-300};
6
- USECOUPLED=${USECOUPLED:-yes}
7
- USESINGLE=${USESINGLE:-no}
8
- CLEANUP=${CLEANUP:-yes}
9
-
10
- # Check mandatory variables
11
- if [[ "$LIB" == "" ]]; then
12
- echo "Error: LIB is mandatory" >&2
13
- exit 1;
14
- fi
15
- if [[ "$PDIR" == "" ]]; then
16
- echo "Error: PDIR is mandatory" >&2
17
- exit 1;
18
- fi
19
- if [[ "$DATA" == "" ]]; then
20
- echo "Error: DATA is mandatory" >&2
21
- exit 1;
22
- fi
23
-
24
- # Prepare input
25
- module load perl/5.14.4
26
- KMER=$PBS_ARRAYID
27
- DIR="$LIB.soap_$KMER"
28
- if [[ "$USECOUPLED" == "yes" ]]; then
29
- MAXRDLEN=${MAXRDLEN:-$(perl $PDIR/FastA.length.pl "$DATA/$LIB.CoupledReads.fa" | head -n 200000 | awk '{if($2>MAX) MAX=$2} END{print MAX}')}
30
- elif [[ "$USESINGLE" == "yes" ]]; then
31
- MAXRDLEN=${MAXRDLEN:-$(perl $PDIR/FastA.length.pl "$DATA/$LIB.SingleReads.fa" | head -n 200000 | awk '{if($2>MAX) MAX=$2} END{print MAX}')}
32
- else
33
- echo "$0: Error: Nothing to do, neither USECOUPLED nor USESINGLE set to yes." >&2
34
- exit 2
35
- fi
36
- NP=$(cat "$PBS_NODEFILE" | wc -l)
37
-
38
- # Config
39
- module load SOAP/denovo2/r240
40
- echo config > $DIR.proc
41
- echo "max_rd_len=$MAXRDLEN
42
- [LIB]
43
- reverse_seq=0
44
- asm_flag=3
45
- rank=1" > $DIR.config
46
- if [[ "$USECOUPLED" == "yes" ]]; then
47
- echo "avg_ins=$INSLEN
48
- p=$DATA/$LIB.CoupledReads.fa" >> $DIR.config
49
- fi
50
- if [[ "$USESINGLE" == "yes" ]]; then
51
- echo "f=$DATA/$LIB.SingleReads.fa" >> $DIR.config
52
- fi
53
-
54
-
55
- # Run
56
- echo pre > $DIR.proc
57
- if [[ -d $DIR ]] ; then rm -R $DIR ; fi
58
- mkdir $DIR
59
- echo soap > $DIR.proc
60
- SOAPdenovo-63mer all -s $DIR.config -p $NP -K $KMER -o $DIR/O &> $DIR.log
61
- if [[ -d $DIR ]] ; then
62
- if [[ -s $DIR/O.contig ]] ; then
63
- if [[ "$CLEANUP" != "no" ]] ; then
64
- echo cleanup > $DIR.proc
65
- rm $DIR/*edge
66
- rm $DIR/*vertex
67
- rm $DIR/*Arc*
68
- rm $DIR/*Graph*
69
- rm $DIR/*readInGap*
70
- fi
71
- echo done > $DIR.proc
72
- else
73
- echo "$0: Error: File $DIR/O.contig doesn't exist, something went wrong" >&2
74
- exit 1
75
- fi
76
- else
77
- echo "$0: Error: Directory $DIR doesn't exist, something went wrong" >&2
78
- exit 1
79
- fi
80
-
@@ -1,57 +0,0 @@
1
- #!/bin/bash
2
- #PBS -q iw-shared-6
3
- #PBS -l nodes=1:ppn=1
4
- #PBS -l mem=1gb
5
- #PBS -l walltime=3:00:00
6
- #PBS -k oe
7
-
8
- # Check mandatory variables
9
- if [[ "$LIB" == "" ]]; then
10
- echo "Error: LIB is mandatory" >&2
11
- exit 1;
12
- fi
13
- if [[ "$PDIR" == "" ]]; then
14
- echo "Error: PDIR is mandatory" >&2
15
- exit 1;
16
- fi
17
-
18
- # Run
19
- module load perl/5.14.4
20
- echo "K N50 used reads " > $LIB.velvet.n50
21
- echo "K N50 used reads " > $LIB.soap.n50
22
- for ID in $(seq 10 31); do
23
- let KMER=$ID*2+1
24
- DIRV="$LIB.velvet_$KMER"
25
- DIRS="$LIB.soap_$KMER"
26
- echo $KMER > $LIB.velvet.n50.$KMER
27
- echo $KMER > $LIB.soap.n50.$KMER
28
- # N50 (>=500)
29
- perl "$PDIR/FastA.N50.pl" "$DIRV/contigs.fa" 500 | grep '^N50' | sed -e 's/.*: //' >> $LIB.velvet.n50.$KMER
30
- perl "$PDIR/FastA.N50.pl" "$DIRS/O.contig" 500 | grep '^N50' | sed -e 's/.*: //' >> $LIB.soap.n50.$KMER
31
- # Used and Total reads
32
- tail -n 1 $DIRV/Log | sed -e 's/.* using \\([0-9]*\\)\\/\\([0-9]*\\) reads.*/\\1\\n\\2/' >> $LIB.velvet.n50.$KMER
33
- if [ -e "$DIRS/O.readOnContig" ] ; then
34
- cat "$DIRS/O.readOnContig" | grep -vc '^read' >> $LIB.soap.n50.$KMER
35
- elif [ -e "$DIRS/O.readOnContig.gz" ] ; then
36
- zcat "$DIRS/O.readOnContig.gz" | grep -vc '^read' >> $LIB.soap.n50.$KMER
37
- else
38
- echo 0 >> $LIB.soap.n50.$KMER
39
- fi
40
- head -n 1 $DIRS/O.peGrads | awk '{print $3}' >> $LIB.soap.n50.$KMER
41
- # Join
42
- (cat $LIB.velvet.n50.$KMER | tr "\\n" " "; echo) >> $LIB.velvet.n50
43
- rm $LIB.velvet.n50.$KMER
44
- (cat $LIB.soap.n50.$KMER | tr "\\n" " "; echo) >> $LIB.soap.n50
45
- rm $LIB.soap.n50.$KMER
46
- done
47
-
48
- # Create plot
49
- module load R/3.1.2
50
- echo "
51
- source('$PDIR/kSelector.R');
52
- pdf('$LIB.n50.pdf', 13, 7);
53
- kSelector('$LIB.velvet.n50', '$LIB (Velvet)');
54
- kSelector('$LIB.soap.n50', '$LIB (SOAP)');
55
- dev.off();
56
- " | R --vanilla -q
57
-
@@ -1,63 +0,0 @@
1
- #!/bin/bash
2
- #PBS -l nodes=1:ppn=1
3
- #PBS -k oe
4
-
5
- # Some defaults for the parameters
6
- FORMAT=${FORMAT:-fasta};
7
- INSLEN=${INSLEN:-300};
8
- USECOUPLED=${USECOUPLED:-yes};
9
- USESINGLE=${USESINGLE:-no};
10
- CLEANUP=${CLEANUP:-yes}
11
-
12
- # Check mandatory variables
13
- if [[ "$LIB" == "" ]]; then
14
- echo "Error: LIB is mandatory" >&2
15
- exit 1;
16
- fi
17
- if [[ "$PDIR" == "" ]]; then
18
- echo "Error: PDIR is mandatory" >&2
19
- exit 1;
20
- fi
21
- if [[ "$DATA" == "" ]]; then
22
- echo "Error: DATA is mandatory" >&2
23
- exit 1;
24
- fi
25
-
26
- # Prepare input
27
- KMER=$PBS_ARRAYID
28
- CWD=$(pwd)
29
- DIR="$CWD/$LIB.velvet_$KMER"
30
-
31
- # Run
32
- module load velvet/1.2.10
33
- echo velveth > $DIR.proc
34
- CMD="velveth_101_omp $DIR $KMER -$FORMAT"
35
- if [[ "$USECOUPLED" == "yes" ]]; then
36
- CMD="$CMD -shortPaired $DATA/$LIB.CoupledReads.fa"
37
- fi
38
- if [[ "$USESINGLE" == "yes" ]]; then
39
- CMD="$CMD -short $DATA/$LIB.SingleReads.fa"
40
- fi
41
- if [[ "$VELVETH_EXTRA" != "" ]]; then
42
- CMD="$CMD $VELVETH_EXTRA"
43
- fi
44
- $CMD &> $DIR.hlog
45
- echo velvetg > $DIR.proc
46
- velvetg_101_omp "$DIR" -exp_cov auto -cov_cutoff auto -ins_length "$INSLEN" $VELVETG_EXTRA &> $DIR.glog
47
- if [[ -d $DIR ]] ; then
48
- if [[ -s $DIR/contigs.fa ]] ; then
49
- if [[ "$CLEANUP" != "no" ]] ; then
50
- echo cleanup > $DIR.proc
51
- rm $DIR/Sequences
52
- rm $DIR/Roadmaps
53
- rm $DIR/*Graph*
54
- fi
55
- echo done > $DIR.proc
56
- else
57
- echo "$0: Error: File $DIR/contigs.fa doesn't exist, something went wrong" >&2
58
- exit 1
59
- fi
60
- else
61
- echo "$0: Error: Directory $DIR doesn't exist, something went wrong" >&2
62
- exit 1
63
- fi
@@ -1,38 +0,0 @@
1
- # blast.pbs pipeline
2
- # Step 01 : Initialize input files
3
-
4
- # 00. Read configuration
5
- cd $SCRATCH ;
6
- TASK="dry" ;
7
- source "$PDIR/RUNME.bash" ;
8
- echo "$PBS_JOBID" > "$SCRATCH/success/01.00" ;
9
-
10
- if [[ ! -e "$SCRATCH/success/01.01" ]] ; then
11
- # 01. BEGIN
12
- REGISTER_JOB "01" "01" "Custom BEGIN function" \
13
- && BEGIN \
14
- || exit 1 ;
15
- touch "$SCRATCH/success/01.01" ;
16
- fi
17
-
18
- if [[ ! -e "$SCRATCH/success/01.02" ]] ; then
19
- # 02. Split
20
- [[ -d "$SCRATCH/tmp/split" ]] && rm -R "$SCRATCH/tmp/split" ;
21
- REGISTER_JOB "01" "02" "Splitting query files" \
22
- && mkdir "$SCRATCH/tmp/split" \
23
- && perl "$PDIR/FastA.split.pl" "$INPUT" "$SCRATCH/tmp/split/$PROJ" "$MAX_JOBS" \
24
- || exit 1 ;
25
- touch "$SCRATCH/success/01.02" ;
26
- fi ;
27
-
28
- if [[ ! -e "$SCRATCH/success/01.03" ]] ; then
29
- # 03. Finalize
30
- REGISTER_JOB "01" "03" "Finalizing input preparation" \
31
- && mv "$SCRATCH/tmp/split" "$SCRATCH/tmp/in" \
32
- || exit 1 ;
33
- touch "$SCRATCH/success/01.03" ;
34
- fi ;
35
-
36
- [[ -d "$SCRATCH/tmp/out" ]] || ( mkdir "$SCRATCH/tmp/out" || exit 1 ) ;
37
- JOB_DONE "01" ;
38
-
@@ -1,73 +0,0 @@
1
- # blast.pbs pipeline
2
- # Step 02 : Run BLAST
3
-
4
- # Read configuration
5
- cd $SCRATCH ;
6
- TASK="dry" ;
7
- source "$PDIR/RUNME.bash" ;
8
-
9
- # 00. Initial vars
10
- ID_N=$PBS_ARRAYID
11
- [[ "$ID_N" == "" ]] && exit 1 ;
12
- [[ -e "$SCRATCH/success/02.$ID_N" ]] && exit 0 ;
13
- IN="$SCRATCH/tmp/in/$PROJ.$ID_N.fa" ;
14
- OUT="$SCRATCH/tmp/out/$PROJ.blast.$ID_N" ;
15
- FINAL_OUT="$SCRATCH/results/$PROJ.$ID_N.blast" ;
16
- if [[ -e "$SCRATCH/success/02.$ID_N.00" ]] ; then
17
- pre_job=$(cat "$SCRATCH/success/02.$ID_N.00") ;
18
- state=$(qstat -f "$pre_job" 2>/dev/null | grep job_state | sed -e 's/.*= //')
19
- if [[ "$state" == "R" ]] ; then
20
- echo "Warning: This task is already being executed by $pre_job. Aborting." >&2 ;
21
- exit 0 ;
22
- elif [[ "$state" == "" ]] ; then
23
- echo "Warning: This task was initialized by $pre_job, but it's currently not running. Superseding." >&2 ;
24
- fi ;
25
- fi
26
- echo "$PBS_JOBID" > "$SCRATCH/success/02.$ID_N.00" ;
27
-
28
- # 01. Before BLAST
29
- if [[ ! -e "$SCRATCH/success/02.$ID_N.01" ]] ; then
30
- BEFORE_BLAST "$IN" "$OUT" || exit 1 ;
31
- touch "$SCRATCH/success/02.$ID_N.01" ;
32
- fi ;
33
-
34
- # 02. Run BLAST
35
- if [[ ! -e "$SCRATCH/success/02.$ID_N.02" ]] ; then
36
- # Recover previous runs, if any
37
- if [[ -s "$OUT" ]] ; then
38
- perl "$PDIR/BlastTab.recover_job.pl" "$IN" "$OUT" \
39
- || exit 1 ;
40
- fi ;
41
- # Run BLAST
42
- RUN_BLAST "$IN" "$OUT" \
43
- && mv "$OUT" "$OUT-z" \
44
- || exit 1 ;
45
- touch "$SCRATCH/success/02.$ID_N.02" ;
46
- fi ;
47
-
48
- # 03. Collect BLAST parts
49
- if [[ ! -e "$SCRATCH/success/02.$ID_N.03" ]] ; then
50
- if [[ -e "$OUT" ]] ; then
51
- echo "Warning: The file $OUT pre-exists, but the BLAST collection was incomplete." >&2 ;
52
- echo " I'm assuming that it corresponds to the first part of the result, but you should check manually." >&2 ;
53
- echo " The last lines are:" >&2 ;
54
- tail -n 3 "$OUT" >&2 ;
55
- else
56
- touch "$OUT" || exit 1 ;
57
- fi ;
58
- for i in $(ls $OUT-*) ; do
59
- cat "$i" >> "$OUT" ;
60
- rm "$i" || exit 1 ;
61
- done ;
62
- mv "$OUT" "$FINAL_OUT"
63
- touch "$SCRATCH/success/02.$ID_N.03" ;
64
- fi ;
65
-
66
- # 04. After BLAST
67
- if [[ ! -e "$SCRATCH/success/02.$ID_N.04" ]] ; then
68
- AFTER_BLAST "$IN" "$FINAL_OUT" || exit 1 ;
69
- touch "$SCRATCH/success/02.$ID_N.04" ;
70
- fi ;
71
-
72
- touch "$SCRATCH/success/02.$ID_N" ;
73
-
@@ -1,21 +0,0 @@
1
- # blast.pbs pipeline
2
- # Step 03 : Finalize
3
-
4
- # Read configuration
5
- cd $SCRATCH ;
6
- TASK="dry" ;
7
- source "$PDIR/RUNME.bash" ;
8
- PREFIX="$SCRATCH/results/$PROJ" ;
9
- OUT="$SCRATCH/$PROJ.blast" ;
10
- echo "$PBS_JOBID" > "$SCRATCH/success/02.00" ;
11
-
12
- # 01. END
13
- if [[ ! -e "$SCRATCH/success/03.01" ]] ; then
14
- REGISTER_JOB "03" "01" "Custom END function" \
15
- && END "$PREFIX" "$OUT" \
16
- || exit 1 ;
17
- touch "$SCRATCH/success/03.01" ;
18
- fi ;
19
-
20
- JOB_DONE "03" ;
21
-
@@ -1,72 +0,0 @@
1
- #!/usr/bin/perl
2
-
3
- use warnings;
4
- use strict;
5
- use File::Copy;
6
-
7
- my($fasta, $blast) = @ARGV;
8
-
9
- ($fasta and $blast) or die "
10
- .USAGE:
11
- $0 query.fa blast.txt
12
-
13
- query.fa Query sequences in FastA format.
14
- blast.txt Incomplete BLAST output in tabular format.
15
-
16
- ";
17
-
18
- print "Fixing $blast:\n";
19
- my $blast_res;
20
- for(my $i=0; 1; $i++){
21
- $blast_res = "$blast-$i";
22
- last unless -e $blast_res;
23
- }
24
- open BLAST, "<", $blast or die "Cannot read the file: $blast: $!\n";
25
- open TMP, ">", "$blast-tmp" or die "Cannot create the file: $blast-tmp: $!\n";
26
- my $last="";
27
- my $last_id="";
28
- my $before = "";
29
- while(my $ln=<BLAST>){
30
- chomp $ln;
31
- last unless $ln =~ m/(.+?)\t/;
32
- my $id = $1;
33
- if($id eq $last_id){
34
- $last.= $ln."\n";
35
- }else{
36
- print TMP $last if $last;
37
- $before = $last_id;
38
- $last = $ln."\n";
39
- $last_id = $id;
40
- }
41
- }
42
- close BLAST;
43
- close TMP;
44
-
45
- move "$blast-tmp", $blast_res or die "Cannot move file $blast-tmp into $blast_res: $!\n";
46
- unlink $blast or die "Cannot delete file: $blast: $!\n";
47
-
48
- unless($before eq ""){
49
- print "[$before] ";
50
- $before = ">$before";
51
-
52
- open FASTA, "<", $fasta or die "Cannot read file: $fasta: $!\n";
53
- open TMP, ">", "$fasta-tmp" or die "Cannot create file: $fasta-tmp: $!\n";
54
- my $print = 0;
55
- my $at = 0;
56
- my $i = 0;
57
- while(my $ln=<FASTA>){
58
- $i++;
59
- $print = 1 if $at and $ln =~ /^>/;
60
- print TMP $ln if $print;
61
- $ln =~ s/\s+.*//;
62
- chomp $ln;
63
- $at = $i if $ln eq $before;
64
- }
65
- close TMP;
66
- close FASTA;
67
- printf 'recovered at %.2f%% (%d/%d).'."\n", 100*$at/$i, $at, $i if $i;
68
-
69
- move $fasta, "$fasta.old" or die "Cannot move file $fasta into $fasta.old: $!\n";
70
- move "$fasta-tmp", $fasta or die "Cannot move file $fasta-tmp into $fasta: $!\n";
71
- }
72
-
@@ -1,98 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### VARIABLES
4
- # Queue and resources.
5
- QUEUE="iw-shared-6" ;
6
- MAX_JOBS=500 ; # Maximum number of concurrent jobs. Never exceed 1990.
7
- PPN=2 ;
8
- RAM="9gb" ;
9
-
10
- # Paths
11
- SCRATCH_DIR="$HOME/scratch/pipelines/blast" ; # Where the outputs and temporals will be created
12
- INPUT="$HOME/data/my-large-file.fasta" ; # Input query file
13
- DB="$HOME/data/db/nr" ; # Input database
14
- PROGRAM="blastp" ;
15
-
16
- # Pipeline
17
- MAX_TRIALS=5 ; # Maximum number of automated attempts to re-start a job
18
-
19
- ##################### FUNCTIONS
20
- ## All the functions below can be edited to suit your particular job.
21
- ## No function can be empty, but you can use a "dummy" function (like true).
22
- ## All functions have access to any of the variables defined above.
23
- ##
24
- ## The functions are executed in the following order (from left to right):
25
- ##
26
- ## / -----> BEFORE_BLAST --> RUN_BLAST --> AFTER_BLAST ---\
27
- ## / ··· ··· ··· \
28
- ## BEGIN --#--------> BEFORE_BLAST --> RUN_BLAST --> AFTER_BLAST -----#---> END
29
- ## \ ··· ··· ··· /
30
- ## \ -----> BEFORE_BLAST --> RUN_BLAST --> AFTER_BLAST ---/
31
- ##
32
-
33
- # Function to execute ONLY ONCE at the begining
34
- function BEGIN {
35
- ### Format the database (assuming proteins, check commands):
36
- # module load ncbi_blast/2.2.25 || exit 1 ;
37
- # makeblastdb -in $HOME/data/some-database.faa -title $DB -dbtype prot || exit 1 ;
38
- # module unload ncbi_blast/2.2.25 || exit 1 ;
39
- ### Don't do anything:
40
- true ;
41
- }
42
-
43
- # Function to execute BEFORE running the BLAST, for each sub-task.
44
- function BEFORE_BLAST {
45
- local IN=$1 # Query file
46
- local OUT=$2 # Blast file (to be created)
47
- ### Don't do anything:
48
- true ;
49
- }
50
-
51
- # Function that executes BLAST, for each sub-task
52
- function RUN_BLAST {
53
- local IN=$1 # Query file
54
- local OUT=$2 # Blast file (to be created)
55
- ### Run BLAST+ with 13th and 14th columns (query length and subject length):
56
- module load ncbi_blast/2.2.28_binary || exit 1 ;
57
- $PROGRAM -query $IN -db $DB -out $OUT -num_threads $PPN \
58
- -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen" \
59
- || exit 1 ;
60
- module unload ncbi_blast/2.2.28_binary || exit 1 ;
61
- ### Run BLAT (nucleotides)
62
- # module load blat/rhel6 || exit 1 ;
63
- # blat $DB $IN -out=blast8 $OUT || exit 1 ;
64
- # module unload blat/rhel6 || exit 1 ;
65
- ### Run BLAT (proteins)
66
- # module load blat/rhel6 || exit 1 ;
67
- # blat $DB $IN -out=blast8 -prot $OUT || exit 1 ;
68
- # module unload blat/rhel6 || exit 1 ;
69
- }
70
-
71
- # Function to execute AFTER running the BLAST, for each sub-task
72
- function AFTER_BLAST {
73
- local IN=$1 # Query files
74
- local OUT=$2 # Blast files
75
- ### Filter by best-match:
76
- # sort $OUT | perl $PDIR/../../Scripts/BlastTab.best_hit_sorted.pl > $OUT.bm
77
- ### Filter by Bit-score 60:
78
- # awk '$12>=60' $OUT > $OUT.bs60
79
- ### Filter by corrected identity 95 (only if it has the additional 13th column):
80
- # awk '$3*$4/$13 >= 95' $OUT > $OUT.ci95
81
- ### Don't do anything:
82
- true ;
83
- }
84
-
85
- # Function to execute ONLY ONCE at the end, to concatenate the results
86
- function END {
87
- local PREFIX=$1 # Prefix of all Blast files
88
- local OUT=$2 # Single Blast output (to be created).
89
- ### Simply concatenate files:
90
- # cat $PREFIX.*.blast > $OUT
91
- ### Concatenate only the filtered files (if filtering in AFTER_BLAST):
92
- # cat $PREFIX.*.blast.bs60 > $OUT
93
- ### Sort the BLAST by query (might require considerable RAM):
94
- # sort -k 1 $PREFIX.*.blast > $OUT
95
- ### Don't do anyhthing:
96
- true ;
97
- }
98
-
@@ -1 +0,0 @@
1
- ../../Scripts/FastA.split.pl