miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,162 +0,0 @@
1
- {
2
- "_": "Input files and directories are included in the 'Tests' folder.",
3
- "examples": [
4
- {
5
- "_": "== Examples of genome comparisons ==",
6
- "task": "ogs.stats.rb",
7
- "description": ["Statistics on the groups of orthology in the Primate",
8
- "Lentivirus Group, including HIV-1, HIV-2, and SIV."],
9
- "values": ["primate_lentivirus.ogs",null,null,null,null,null]
10
- },
11
- {
12
- "task": "ani.rb",
13
- "description": ["Average Nucleotide Identity (ANI) between two strains",
14
- "of Mycoplasma genitalium (M2288 and M2321)."],
15
- "values": ["Mgen_M2288.fna","Mgen_M2321.fna",null,null,null,null,null,
16
- null,null,null,null,null,null,null,null,null,null,null,null,null,null,
17
- null,null,null]
18
- },
19
- {
20
- "task": "aai.rb",
21
- "description": ["Average Amino acid Identity (AAI) between Mycoplasma",
22
- "genitalium (Bacteria) and Nanoarchaeum equitans (Archaea)."],
23
- "values": ["Mgen_M2288.faa","Nequ_Kin4M.faa",null,null,null,null,null,
24
- null,null,null,null,null,null,null,null,null,null,null,null,null,null,
25
- null,null,null]
26
- },
27
- {
28
- "task": "rbm.rb",
29
- "description": [
30
- "Reciprocal Best Matches between the proteomes of the",
31
- "two major HIV types (HIV-1 and HIV-2)."
32
- ],
33
- "values": [
34
- "hiv1.faa", "hiv2.faa", "hiv1-hiv2.rbm", null, null, null, null, null,
35
- null, null,null, null
36
- ]
37
- },
38
- {
39
- "task": "ogs.mcl.rb",
40
- "description": ["Groups of orthology in the Primate Letivirus Group,",
41
- "including HIV-1, HIV-2, and SIV."],
42
- "values": ["primate_lentivirus.ogs","primate_lentivirus.rbm",null,null,
43
- null,null,null,null,null,null,null,null]
44
- },
45
- {
46
- "task": "Table.df2dist.R",
47
- "description": ["Transforms a list of AAI values between Xanthomonas",
48
- "oryzae genomes into a distance matrix."],
49
- "values": ["Xanthomonas_oryzae.aai.tsv",null,null,null,null,100.0,
50
- "Xanthomonas_oryzae.aai-mat.tsv"]
51
- },
52
- {
53
- "_": "== Recruitment plots",
54
- "task": "BlastTab.catsbj.pl",
55
- "description": ["Prepares recruitment plot files for a comparison",
56
- "between a virome containing HIV and the HIV-1 genome."],
57
- "values": [null,null,null,null,"hiv1.fna","hiv_mix-hiv1.blast.tsv"]
58
- },
59
- {
60
- "task": "BlastTab.recplot2.R",
61
- "description": ["Generates recruitment plots for a comparison",
62
- "between a virome containing HIV and the HIV-1 genome."],
63
- "values": ["hiv_mix-hiv1.blast.tsv",50,100,null,null,null,null,null,null,
64
- null,null,null,"hiv_mix-hiv1.Rdata","hiv_mix-hiv1.pdf",null,null]
65
- },
66
- {
67
- "_": "== Examples of functional annotations ==",
68
- "task": "HMM.essential.rb",
69
- "description": ["Typical single-copy bacterial genes present in",
70
- "Mycoplasma genitalium."],
71
- "values": ["Mgen_M2288.faa",null,null,null,null,null,null,true,null,null,
72
- null,null,null,null,null,null,null,null,null]
73
- },
74
- {
75
- "task": "HMM.essential.rb",
76
- "description": ["Typical single-copy archaeal genes present in",
77
- "Nanoarchaeum equitans."],
78
- "values": ["Mgen_M2288.faa",null,null,null,null,null,null,null,true,null,
79
- null,null,null,null,null,null,null,null,null]
80
- },
81
- {
82
- "task": "Newick.autoprune.R",
83
- "description": ["Prune an AlkB tree with 110 tips to get only distant",
84
- "representatives (41)."],
85
- "values": ["alkB.nwk",0.9,null,null,null,null,null,"alkB-pruned.nwk"]
86
- },
87
- {
88
- "_": "== Examples of BLAST statistics and manipulation",
89
- "task": "BlastTab.topHits_sorted.rb",
90
- "description": [
91
- "Extract the best match of metagenome-derived proteins",
92
- "(from the 'A metagenome') against a Gene Ontology collection."
93
- ],
94
- "values": [
95
- "sort","a_mg.cds-go.blast.tsv", null, null, null, null,
96
- "a_mg.cds-go.blast-bm.tsv", 1, null, null
97
- ]
98
- },
99
- {
100
- "task": "BlastTab.sumPerHit.pl",
101
- "description": ["Count the number of reads per gene in a mapping of a",
102
- "metagenome to a metagenome-derived genes (from the 'A metagenome')."],
103
- "values": [null,null,null,null,null,null,null,"a_mg.reads-cds.blast.tsv",
104
- null,"a_mg.reads-cds.counts.tsv"]
105
- },
106
- {
107
- "task": "BlastTab.sumPerHit.pl",
108
- "description": ["Estimate the total abundance of Gene Ontology",
109
- "annotations in the A metagenome, using metagenome-derived proteins,",
110
- "and normalizing by the read counts of each protein."],
111
- "values": ["a_mg.reads-cds.counts.tsv",null,null,null,null,true,null,
112
- "a_mg.cds-go.blast.tsv",null,"a_mg.go.read-counts.tsv"]
113
- },
114
- {
115
- "_": "== Examples of diversity ==",
116
- "task": "Table.barplot.R",
117
- "description": ["Barplot with the distribution of bacterial phyla in",
118
- "four different sites, with taxa sorted by variance."],
119
- "values": ["phyla_counts.tsv","250,100,75,200",null,null,null,null,null,
120
- null,true,"var",2,null,null,"phyla_counts.pdf",10,null]
121
- },
122
- {
123
- "task": "Chao1.pl",
124
- "description": ["Phylum-richness estimated by the Chao1 index with 95%",
125
- "confidence, using the distributions of bacterial phyla in four",
126
- "different sites."],
127
- "values": ["phyla_counts.tsv",null,1,null,null,true,null,
128
- "phyla_chao1.tsv"]
129
- },
130
- {
131
- "task": "AlphaDiversity.pl",
132
- "description": ["Phylum-diversity estimated by the indices of Shannon",
133
- "(H'), Inverse Simpson (1/Lambda), and true diversity of order 1 (1D),",
134
- "using the distributions of bacterial phyla in four different sites."],
135
- "values": ["phyla_counts.tsv",null,1,null,null,true,null,true,1,null,
136
- "phyla_diversity.tsv"]
137
- },
138
- {
139
- "_": "== Other miscelaneous examples ==",
140
- "task": "CharTable.classify.rb",
141
- "description": ["Classification of anthrax genomes based on can-SNPs, as",
142
- "described in Van Ert 2007 (PLoS ONE 2(5):e461)."],
143
- "values": ["anthrax-cansnp-data.tsv","anthrax-cansnp-key.tsv",
144
- "anthrax-cansnp-classif.tsv","anthrax-cansnp-classif.nwk",null]
145
- },
146
- {
147
- "task": "TRIBS.test.R",
148
- "description": ["Test overclustering of Xanthomonas oryzae genomes",
149
- "encoding for PilA using Transformed-space Resampling In Biased Sets",
150
- "(TRIBS)."],
151
- "values": ["Xanthomonas_oryzae.aai-mat.tsv","Xanthomonas_oryzae-PilA.txt",
152
- 5000,null,null,null,null,0,"Xanthomonas_oryzae-PilA.tribs.Rdata",100]
153
- },
154
- {
155
- "task": "TRIBS.plot-test.R",
156
- "description": ["Show the TRIBS-normalized distances between Xanthomonas",
157
- "oryzae genomes (grey) and X. oryzae encoding for PilA (red)."],
158
- "values": ["Xanthomonas_oryzae-PilA.tribs.Rdata",null,null,null,null,null,
159
- null,null,"Xanthomonas_oryzae-PilA.tribs.pdf",null,null]
160
- }
161
- ]
162
- }
@@ -1,4 +0,0 @@
1
- {
2
- "_": "This file loads all the .json files inside 'Manifest/Tasks'.",
3
- "_include": "Tasks/*.json"
4
- }
@@ -1,69 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### VARIABLES
4
- # Queue: Preferred queue. Delete (or comment) this line to allow
5
- # automatic detection:
6
- #QUEUE="biocluster-6"
7
- # If you set the QUEUE variable, you MUST set the WTIME variable
8
- # as well, containing the walltime to be asked for. The WTIME
9
- # variable is ignored otherwise.
10
- WTIME="120:00:00"
11
-
12
- # Scratch: This is where the output will be created.
13
- SCRATCH="$HOME/scratch/pipelines/assembly"
14
-
15
- # Data folder: This is the folder that cointains the input files.
16
- DATA="$HOME/data/trim"
17
-
18
- # Location of Newbler's binaries
19
- BIN454="$HOME/454/bin"
20
-
21
- # Name(s) of the library(ies) to use, separated by spaces:
22
- # This is determined by the name of your input files. For example,
23
- # if your input files are: LLSEP.CoupledReads.fa and LWP.CoupledReads.fa,
24
- # use:
25
- # LIBRARIES="LLSEP LWP"
26
- # It's strongly encouraged to use only one per CONFIG file.
27
- LIBRARIES="A";
28
-
29
- # Use .CoupledReads.fa and/or .SingleReads.fa (yes or no):
30
- USECOUPLED=yes
31
- USESINGLE=no
32
-
33
- # Insert length (in bp): This is the average length of the entire insert,
34
- # not just the gap length.
35
- INSLEN=300
36
-
37
- # Number of CPUs to use (for SOAP and Newbler):
38
- PPN=16
39
-
40
- # RAM multiplier: Multiply the estimated required RAM by this number:
41
- RAMMULT=1
42
-
43
- # Maximum number of simultaneous jobs: Uncomment and increase these values if
44
- # you have increased resources (e.g., a dedicated queue); uncomment and decrease
45
- # if the resources are scarce (e.g., a very busy queue or other simultaneous jobs).
46
- #VELVETSIM=22
47
- #SOAPSIM=8
48
-
49
- # Extra parameters for Velvet: Any additional parameters to be passed to
50
- # velvetg or velveth. If you have MP data, consider adding the option
51
- # -shortMatePaired yes to VELVETG_EXTRA. If you have Nextera, consider
52
- # adding the option above, plus the option -ins_length_sd <integer>, to
53
- # indicate the standard deviation of the insert size. By default, the
54
- # SD is assumed to be 10% of the average, but Nextera produces much
55
- # wider distribution of sizes (i.e., larger SD). Typically you shouldn't
56
- # need to add anything in VELVETH_EXTRA.
57
- VELVETH_EXTRA=""
58
- VELVETG_EXTRA=""
59
-
60
- # Clean non-essential files (yes or no):
61
- CLEANUP=yes
62
-
63
- # Best k-mers: Space-delimited list of kmers selected from Velvet and SOAP.
64
- # This is to be modified at the begining of step 4, and it's ignored in all
65
- # the other steps.
66
- K_VELVET="21 23 35"
67
- K_SOAP="21 23 35"
68
-
69
-
@@ -1 +0,0 @@
1
- ../../Scripts/FastA.N50.pl
@@ -1 +0,0 @@
1
- ../../Scripts/FastA.filterN.pl
@@ -1 +0,0 @@
1
- ../../Scripts/FastA.length.pl
@@ -1,189 +0,0 @@
1
- @author: Luis Miguel Rodriguez-R <lmrodriguezr at gmail dot com>
2
-
3
- @update: Mar-17-2013
4
-
5
- @license: artistic 2.0
6
-
7
- @status: semi
8
-
9
- @pbs: yes
10
-
11
- # IMPORTANT
12
-
13
- This pipeline was developed for the [PACE cluster](http://pace.gatech.edu/). You
14
- are free to use it in other platforms with adequate adjustments. It is largely
15
- based on Luo _et al._ 2012, ISME J.
16
-
17
- # PURPOSE
18
-
19
- This pipeline assemblies coupled and/or single reads from one or more libraries.
20
- It assumes that the reads have been quality-checked and trimmed.
21
-
22
- # HELP
23
-
24
- 1. Files preparation:
25
-
26
- 1.1. Copy this folder to the cluster.
27
-
28
- 1.2. Copy the sequences to the cluster. Only trimmed/filtered reads are used.
29
- All the files are expected to be in the same folder, and the filenames must
30
- end in `.CoupledReads.fa` or `.SingleReads.fa`.
31
-
32
- 1.3. Copy the file `CONFIG.mock.bash` to `CONFIG.<name>.bash`, where `<name>` is a
33
- short name for your run (avoid characters other than alphanumeric).
34
-
35
- 1.4. Change the variables in `CONFIG.<name>.bash`. Notice that this pipeline
36
- supports running several libraries at the same time, but it's strongly
37
- recomended to run only one per config file, because the insert length
38
- (in step 2) and the selected k-mers (in step 3) are fixed for all the
39
- included libraries. Also, there is a technical consideration: The first
40
- step will execute parallel jobs for each odd number between 21 and 63, and
41
- SOAP will use 16 CPUs by default, which means 357 CPUs will be requested
42
- per library in step 2. It's a bad idea to run many libraries at the same
43
- time.
44
-
45
- 1.5. If you have Mate-paired datasets (for example, prepared with Nextera), first
46
- reverse-complement all the reads. See also the `VELVETG_EXTRA` variable in
47
- the `CONFIG.<name>.bash` file.
48
-
49
- 2. Velvet and SOAP assembly:
50
-
51
- 2.1. Execute `./RUNME-2.bash <name>` in the head node (see [troubleshooting](#troubleshooting) #1).
52
-
53
- 2.2. Monitor the tasks named velvet_* and soap_*.
54
-
55
- 2.3. Once completed, make sure the files .proc contain only the
56
- word "done". To do this, you may execute:
57
- ```
58
- grep -v '^done$' *.proc
59
- ```
60
-
61
- If successful, the output of the above command should be empty. See
62
- [Troubleshooting](#troubleshooting) #2 and #3 below if one or more of your jobs failed.
63
-
64
- 3. K-mers selection:
65
-
66
- 3.1. If you completed step 2, execute `./RUNME-3.bash <name>` in the head
67
- node.
68
-
69
- 3.2. Once completed, download and open the files `*.n50.pdf`.
70
-
71
- 3.3. Select the three "best" k-mers for Velvet and for SOAP (they don't
72
- have to be the same). There is no well-tested method to select the
73
- "best", and this is why this protocol is not automated, but semi-
74
- automated. A generally good rule-of-thumb is: pick one that optimizes
75
- the amount of sequences used (these are the grey bars in the plot;
76
- usually this is the smallest k-mer), pick one that optimizes the N50
77
- (this is the dashed red line; usually this is a large k-mer), and pick
78
- one that optimizes both (something in the middle). You can select
79
- more or less than three k-mers, this is just a suggestion.
80
-
81
- 4. Newbler assembly:
82
-
83
- 4.1. Edit the file `CONFIG.<name>.bash`: set the variables `K_VELVET` and
84
- `K_SOAP` to contain the lists of "best" selected k-mers for Velvet and
85
- SOAP, respectively.
86
-
87
- 4.2. Execute `./RUNME-4.bash <name>` in the head node.
88
-
89
- 4.3. Monitor the task newbler_*. Once finished, your assembly is ready.
90
- Once completed, make sure the file .newbler.proc contain only the
91
- word "done". To do this, you may execute:
92
- ```
93
- grep -v '^done$' *.proc
94
- ```
95
- If successful, the output should be empty.
96
-
97
- 4.4. The final assembly should be located in the `SCRATCH` path, in a folder
98
- named `<lib>.newbler/assembly/`. The file `454AllContigs.fna` contains
99
- all the assembled contigs, `454LargeContigs.fna` contains the contigs
100
- with 500bp or more in length, and `454NewblerMetrics.txt` contains some
101
- relevant statistics.
102
-
103
-
104
- # Comments
105
-
106
- * Some scripts contained in this package are actually symlinks to files in the
107
- _Scripts_ folder. Check the existance of these files when copied to
108
- the cluster.
109
-
110
- # Troubleshooting
111
-
112
- 1. Do I really have to change directory (`cd`) to the pipeline's folder everytime
113
- I want to execute something?
114
-
115
- No. Not really. For simplicity, this file tells you to execute, for example,
116
- `./RUNME-2.bash`. However, you don't really have to be there, you can execute it
117
- from any location. For example, if you saved this pipeline in your home
118
- directory, you can just execute `~/assembly.pbs/RUNME-2.bash` insted from any
119
- location in the head node.
120
-
121
- 2. I executed step 2, and Velvet worked but SOAP failed (or vice versa). Can I
122
- submit only one of them?
123
-
124
- Yes. To execute only Velvet, run:
125
- ```
126
- ./RUNME-2.bash <name> velvet
127
- ```
128
-
129
- To execute only SOAP, run:
130
- ```
131
- ./RUNME-2.bash <name> soap
132
- ```
133
-
134
- 3. I ran step 2, and most of the jobs finished, but few of them failed. Can I
135
- submit only few K-mers?
136
-
137
- Yes. To execute one kmer (say, the k-mer 33 of SOAP), run:
138
- ```
139
- ./RUNME-2.bash <name> soap 33
140
- ```
141
-
142
- You can also execute more than one kmer, using a comma-separated list. For
143
- example, to re-submit the k-mers 37, 39, and 41 of Velvet, run:
144
- ```
145
- ./RUNME-2.bash <name> velvet 37,39,41
146
- ```
147
-
148
- 4. What are the numbers on the job names of step 2?
149
-
150
- The K-mer. Each k-mer has it's own job, but they are "arrayed", to simplify
151
- administration: notice that all the jobs of Velvet and all the jobs of SOAP
152
- share the same job ID.
153
-
154
- 5. Some jobs are being killed, why?
155
-
156
- 5.1. First, check the log file created by the pipeline. The name is typically
157
- the output prefix and the .log extension. For velvet, there are two log files,
158
- the `.glog` and the `.hlog`. You may find the problem there.
159
-
160
- 5.2. Now, check the error file in your HOME directory. The name depends on the
161
- job, the library and the task. For example: `~/soap_Mg_2-37.e1999838` is the
162
- error file for step 2, task soap, library Mg_2, k-mer 37. The appending
163
- number after the 'e' is the job ID. If this file contains errors probably
164
- related to the pipeline, please let me know.
165
-
166
- 5.3. If you still have no clues, check the output file in your `HOME` directory. The
167
- name is just like the name of the error file (see #5.2 above), but with 'o'
168
- instead of 'e'. Compare the lines 'Resources' (what we asked the scheduler for)
169
- and 'Rsrc Used' (what the job actually used). A typical problem is that your
170
- job may need more RAM than we asked for (the value of 'mem' in both lines). If
171
- the RAM used is larger than the RAM requested, the scheduler probably killed
172
- your job. To solve this, just go to your config file, and set the variable
173
- RAMMULT to a number larger than 1. For example, if you want to ask for double the
174
- RAM, set `RAMMULT=2`. You can also include simple arithmetic operations, like
175
- `RAMMULT=3/2`. If you want to add a fixed ammount of RAM, in Gib, use addition.
176
- For example, to add 10G, set `RAMMULT=1+10`.
177
-
178
- 5.4. Still no idea? Try running the job again, sometimes the jobs fail with no
179
- apparent reason, but they succeed when re-submited. If your job keeps failing,
180
- please gather as much information (the log, error and output files should be
181
- enough) and let me take a look.
182
-
183
- 6. In the step 2, some k-mers keep failing, and I just want to give up on them, can I?
184
-
185
- Yes. Step 3 will analyze only completed jobs, so you can just ignore these faulty
186
- k-mers. Very small k-mers, for example, sometimes need too much memory, and very
187
- large k-mers in Velvet sometimes need too much time. If you don't think you're
188
- missing too much, just ignore them.
189
-
@@ -1,112 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### HELP
4
- HELP="
5
- Usage:
6
- $0 name[ prog[ k-mers]]
7
-
8
- name The name of the run. CONFIG.name.bash must exist.
9
- prog Program to execute. One of 'soap' or 'velvet'. By
10
- default, it executes both.
11
- k-mers Comma-separated list of k-mers to run. By default,
12
- it executes all the odd numbers between 21 and 63
13
- (inclusive).
14
-
15
- See $PDIR/README.txt for more information.
16
- "
17
- ##################### RUN
18
- # Find the directory of the pipeline
19
- PDIR=$(dirname $(readlink -f $0));
20
- # Load variables
21
- source "$PDIR/RUNME.bash"
22
- if [[ "$SCRATCH" == "" ]] ; then
23
- echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
24
- exit 1
25
- fi
26
-
27
- # Check request
28
- RUNVELVET=yes
29
- RUNSOAP=yes
30
- if [[ "$2" == "velvet" ]] ; then
31
- RUNSOAP=no
32
- elif [[ "$2" == "soap" ]] ; then
33
- RUNVELVET=no
34
- fi
35
- if [[ "$3" == "" ]] ; then
36
- KMERARRAY="21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63"
37
- else
38
- KMERARRAY=$3
39
- fi
40
- if [[ "$VELVETSIM" == "" ]] ; then
41
- VELVETSIM=22
42
- fi
43
- if [[ "$SOAPSIM" == "" ]] ; then
44
- let SOAPSIM=130/$PPN
45
- fi
46
-
47
- # Run it
48
- RAMMULT=${RAMMULT:-1}
49
- echo "Jobs being launched in $SCRATCH"
50
- for LIB in $LIBRARIES; do
51
- # Prepare info
52
- echo "Running $LIB";
53
- if [[ "$USECOUPLED" == "yes" ]] ; then
54
- INPUT="$DATA/$LIB.CoupledReads.fa"
55
- elif [[ "$USESINGLE" == "yes" ]] ; then
56
- INPUT="$DATA/$LIB.SingleReads.fa"
57
- else
58
- echo "$0: Error: No task selected, neither USECOUPLED nor USESINGLE set to yes." >&2
59
- exit 1;
60
- fi
61
- VARS="LIB=$LIB,PDIR=$PDIR,DATA=$DATA,USECOUPLED=$USECOUPLED,USESINGLE=$USESINGLE"
62
- [[ -n $INSLEN ]] && VARS="$VARS,INSLEN=$INSLEN"
63
- [[ -n $VELVETG_EXTRA ]] && VARS="$VARS,VELVETG_EXTRA=$VELVETG_EXTRA"
64
- [[ -n $VELVETH_EXTRA ]] && VARS="$VARS,VELVETH_EXTRA=$VELVETH_EXTRA"
65
- [[ -n $CLEANUP ]] && VARS="$VARS,CLEANUP=$CLEANUP"
66
- let SIZE=$(ls -lH "$INPUT" | awk '{print $5}')/1024/1024/1024;
67
- let RAMS=40+$SIZE*10*$RAMMULT;
68
- let RAMV=50+$SIZE*15*$RAMMULT;
69
- # Launch Velvet
70
- if [[ "$RUNVELVET" == "yes" ]] ; then
71
- NAME="velvet_${LIB}"
72
- if [[ "$QUEUE" != "" ]]; then
73
- qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
74
- -l mem=${RAMV}gb -l "walltime=$WTIME" -q "$QUEUE" \
75
- -t "$KMERARRAY%$VELVETSIM"
76
- elif [[ $RAMV -gt 150 ]]; then
77
- qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
78
- -l mem=${RAMV}gb -l walltime=360:00:00 -q biohimem-6 \
79
- -t "$KMERARRAY%$VELVETSIM"
80
- elif [[ $SIZE -lt 6 ]]; then
81
- qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
82
- -l mem=${RAMV}gb -l walltime=12:00:00 -q iw-shared-6 \
83
- -t "$KMERARRAY%$VELVETSIM"
84
- elif [[ $SIZE -lt 20 ]]; then
85
- qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
86
- -l mem=${RAMV}gb -l walltime=120:00:00 -q bioforce-6 \
87
- -t "$KMERARRAY%$VELVETSIM"
88
- else
89
- qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
90
- -l mem=${RAMV}gb -l walltime=360:00:00 -q biocluster-6 \
91
- -t "$KMERARRAY%$VELVETSIM"
92
- fi
93
- fi
94
- # Launch SOAP
95
- if [[ "$RUNSOAP" == "yes" ]] ; then
96
- NAME="soap_${LIB}"
97
- if [[ "$QUEUE" != "" ]]; then
98
- qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
99
- -l mem=${RAMS}gb -l walltime=$WTIME -q $QUEUE -l nodes=1:ppn=$PPN \
100
- -t "$KMERARRAY%$SOAPSIM"
101
- elif [[ $RAMS -gt 150 ]]; then
102
- qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
103
- -l mem=${RAMS}gb -l walltime=48:00:00 -q biohimem-6 \
104
- -l nodes=1:ppn=$PPN -t "$KMERARRAY%$SOAPSIM"
105
- else
106
- qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
107
- -l mem=${RAMS}gb -l walltime=12:00:00 -q iw-shared-6 \
108
- -l nodes=1:ppn=$PPN -t "$KMERARRAY%$SOAPSIM"
109
- fi
110
- fi
111
- done
112
-
@@ -1,23 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### RUN
4
- # Find the directory of the pipeline
5
- PDIR=$(dirname $(readlink -f $0));
6
- # Load variables
7
- source "$PDIR/RUNME.bash"
8
- if [[ "$SCRATCH" == "" ]] ; then
9
- echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
10
- exit 1
11
- fi
12
-
13
- # Run it
14
- echo "Jobs being launched in $SCRATCH"
15
- for LIB in $LIBRARIES; do
16
- # Prepare info
17
- echo "Running $LIB";
18
- VARS="LIB=$LIB,PDIR=$PDIR"
19
- # Launch Stats
20
- NAME="N50_${LIB}"
21
- qsub "$PDIR/stats.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME"
22
- done
23
-
@@ -1,44 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### RUN
4
- # Find the directory of the pipeline
5
- PDIR=$(dirname $(readlink -f $0));
6
- # Load variables
7
- source "$PDIR/RUNME.bash"
8
- if [[ "$SCRATCH" == "" ]] ; then
9
- echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
10
- exit 1
11
- fi
12
-
13
- # Run it
14
- echo "Jobs being launched in $SCRATCH"
15
- RAMMULT=${RAMMULT:-1}
16
- for LIB in $LIBRARIES; do
17
- # Prepare info
18
- echo "Running $LIB";
19
- K_VELVET=$(echo $K_VELVET | sed -e 's/ /:/g')
20
- K_SOAP=$(echo $K_SOAP | sed -e 's/ /:/g')
21
- if [[ "$USECOUPLED" == "yes" ]] ; then
22
- INPUT="$DATA/$LIB.CoupledReads.fa"
23
- elif [[ "$USESINGLE" == "yes" ]] ; then
24
- INPUT="$DATA/$LIB.SingleReads.fa"
25
- else
26
- echo "$0: Error: No task selected, neither USECOUPLED nor USESINGLE set to yes." >&2
27
- exit 1;
28
- fi
29
- let SIZE=30*$(ls -l "$INPUT" | awk '{print $5}')/1024/1024/1024;
30
- let RAM=\(3+$SIZE\)*$RAMMULT;
31
- VARS="LIB=$LIB,PDIR=$PDIR,BIN454=$BIN454,KVELVET=$K_VELVET,KSOAP=$K_SOAP"
32
- # Launch Newbler
33
- NAME="Newbler_${LIB}"
34
- if [[ "$QUEUE" != "" ]] ; then
35
- qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l "walltime=$WTIME" -q "$QUEUE"
36
- elif [[ $RAM -gt 150 ]] ; then
37
- qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l walltime=360:00:00 -q biohimem-6
38
- elif [[ $SIZE -lt 4 ]] ; then
39
- qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l walltime=12:00:00 -q iw-shared-6
40
- else
41
- qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l walltime=120:00:00 -q biocluster-6
42
- fi
43
- done
44
-
@@ -1,50 +0,0 @@
1
- #!/bin/bash
2
-
3
- ##################### RUN
4
- # Check if it was sourced from RUNME-*.bash
5
- if [[ "$PDIR" == "" ]] ; then
6
- echo "$0: Error: This file is not stand-alone. Execute one of RUNME-*.bash as described in the README.txt file" >&2
7
- exit 1
8
- fi
9
-
10
- # Find the directory of the pipeline
11
- CWD=$(pwd)
12
- PDIR=$(dirname $(readlink -f $0));
13
-
14
- # Run it
15
- # Actually, this script doesn't run anything. It's meant to keep the
16
- # variables centralized.
17
-
18
- # Load config
19
- NAMES=$(ls $PDIR/CONFIG.*.bash | sed -e 's/.*CONFIG\./ * /' | sed -e 's/\.bash//');
20
- if [[ "$1" == "" ]] ; then
21
- if [[ "$HELP" == "" ]] ; then
22
- echo "
23
- Usage:
24
- $0 name
25
-
26
- name The name of the run. CONFIG.name.bash must exist.
27
-
28
- See $PDIR/README.txt for more information.
29
-
30
- Available names are:
31
- $NAMES
32
- " >&2
33
- else
34
- echo "$HELP
35
- Available names are:
36
- $NAMES
37
- " >&2
38
- fi
39
- exit 1
40
- fi
41
- if [[ ! -e "$PDIR/CONFIG.$1.bash" ]] ; then
42
- echo "$0: Error: Impossible to find $PDIR/CONFIG.$1.bash, available names are:
43
- $NAMES" >&2
44
- exit 1
45
- fi
46
- source "$PDIR/CONFIG.$1.bash"
47
-
48
- # Create the scratch directory
49
- if [[ ! -d $SCRATCH ]] ; then mkdir -p $SCRATCH ; fi;
50
-