miga-base 0.7.26.0 → 0.7.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +1 -1
  3. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  4. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  6. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  7. data/utils/FastAAI/README.md +84 -0
  8. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  9. data/utils/enveomics/Docs/recplot2.md +244 -0
  10. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  11. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  12. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  13. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  14. data/utils/enveomics/LICENSE.txt +73 -0
  15. data/utils/enveomics/Makefile +52 -0
  16. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  17. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  18. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  19. data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
  20. data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
  21. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  22. data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
  23. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  24. data/utils/enveomics/Manifest/Tasks/other.json +829 -0
  25. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  26. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
  27. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  28. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  29. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  30. data/utils/enveomics/Manifest/categories.json +156 -0
  31. data/utils/enveomics/Manifest/examples.json +154 -0
  32. data/utils/enveomics/Manifest/tasks.json +4 -0
  33. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  34. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  35. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  36. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  37. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  38. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  39. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  40. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  41. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  42. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  43. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  44. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  45. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  46. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  47. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  48. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  49. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  50. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  51. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  52. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  53. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  54. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  55. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  56. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  57. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  58. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  59. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  60. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  61. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  62. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  63. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  64. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  65. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  66. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  67. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  68. data/utils/enveomics/README.md +42 -0
  69. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  70. data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
  71. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  72. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  73. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  74. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  75. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  76. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  77. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  78. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  79. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  80. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  81. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  82. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  83. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  84. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  85. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  86. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  87. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  88. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  89. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  90. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  91. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  92. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  93. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  94. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  95. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  96. data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
  97. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  98. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  99. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  100. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  101. data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
  102. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  103. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  104. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  105. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  106. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  107. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  108. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  109. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  110. data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
  111. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  112. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  113. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  114. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  115. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  116. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  117. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  118. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  119. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  120. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  121. data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
  122. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  123. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  124. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  125. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  126. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  127. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  128. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  129. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  130. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  131. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  132. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  133. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  134. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  135. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  136. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  137. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  138. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  139. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  140. data/utils/enveomics/Scripts/SRA.download.bash +57 -0
  141. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  142. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  143. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  144. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  145. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  146. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  147. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  148. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  149. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  150. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  151. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  152. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  153. data/utils/enveomics/Scripts/aai.rb +418 -0
  154. data/utils/enveomics/Scripts/ani.rb +362 -0
  155. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  156. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  157. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  158. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  159. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  160. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  161. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  162. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  163. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  164. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  165. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  166. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
  167. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  168. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  169. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  170. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  171. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  172. data/utils/enveomics/Scripts/ogs.rb +104 -0
  173. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  174. data/utils/enveomics/Scripts/rbm.rb +146 -0
  175. data/utils/enveomics/Tests/Makefile +10 -0
  176. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  177. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  178. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  179. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  180. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  181. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  182. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  183. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  184. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  185. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  186. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  187. data/utils/enveomics/Tests/alkB.nwk +1 -0
  188. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  189. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  190. data/utils/enveomics/Tests/hiv1.faa +59 -0
  191. data/utils/enveomics/Tests/hiv1.fna +134 -0
  192. data/utils/enveomics/Tests/hiv2.faa +70 -0
  193. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  194. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  195. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  196. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  197. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  198. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  199. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  200. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  201. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  202. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  203. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  204. data/utils/enveomics/build_enveomics_r.bash +45 -0
  205. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  206. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  207. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  208. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  209. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  210. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  211. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  212. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  213. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  214. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  215. data/utils/enveomics/enveomics.R/R/utils.R +50 -0
  216. data/utils/enveomics/enveomics.R/README.md +80 -0
  217. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  218. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  219. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  220. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  221. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  222. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  223. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  224. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  225. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  226. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  227. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  228. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
  229. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
  230. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
  231. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  232. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  233. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
  234. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
  235. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
  236. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
  237. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
  238. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
  239. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  240. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
  242. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  243. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  244. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  245. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  246. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
  247. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  248. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  249. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
  250. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  251. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  252. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  253. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  254. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
  255. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  256. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
  257. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
  258. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
  259. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  260. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  261. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
  262. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
  263. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  264. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  265. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  266. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  267. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
  268. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
  269. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
  270. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
  271. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  272. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  273. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  274. data/utils/enveomics/globals.mk +8 -0
  275. data/utils/enveomics/manifest.json +9 -0
  276. metadata +277 -4
@@ -0,0 +1,112 @@
1
+ #!/bin/bash
2
+
3
+ ##################### HELP
4
+ HELP="
5
+ Usage:
6
+ $0 name[ prog[ k-mers]]
7
+
8
+ name The name of the run. CONFIG.name.bash must exist.
9
+ prog Program to execute. One of 'soap' or 'velvet'. By
10
+ default, it executes both.
11
+ k-mers Comma-separated list of k-mers to run. By default,
12
+ it executes all the odd numbers between 21 and 63
13
+ (inclusive).
14
+
15
+ See $PDIR/README.txt for more information.
16
+ "
17
+ ##################### RUN
18
+ # Find the directory of the pipeline
19
+ PDIR=$(dirname $(readlink -f $0));
20
+ # Load variables
21
+ source "$PDIR/RUNME.bash"
22
+ if [[ "$SCRATCH" == "" ]] ; then
23
+ echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
24
+ exit 1
25
+ fi
26
+
27
+ # Check request
28
+ RUNVELVET=yes
29
+ RUNSOAP=yes
30
+ if [[ "$2" == "velvet" ]] ; then
31
+ RUNSOAP=no
32
+ elif [[ "$2" == "soap" ]] ; then
33
+ RUNVELVET=no
34
+ fi
35
+ if [[ "$3" == "" ]] ; then
36
+ KMERARRAY="21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63"
37
+ else
38
+ KMERARRAY=$3
39
+ fi
40
+ if [[ "$VELVETSIM" == "" ]] ; then
41
+ VELVETSIM=22
42
+ fi
43
+ if [[ "$SOAPSIM" == "" ]] ; then
44
+ let SOAPSIM=130/$PPN
45
+ fi
46
+
47
+ # Run it
48
+ RAMMULT=${RAMMULT:-1}
49
+ echo "Jobs being launched in $SCRATCH"
50
+ for LIB in $LIBRARIES; do
51
+ # Prepare info
52
+ echo "Running $LIB";
53
+ if [[ "$USECOUPLED" == "yes" ]] ; then
54
+ INPUT="$DATA/$LIB.CoupledReads.fa"
55
+ elif [[ "$USESINGLE" == "yes" ]] ; then
56
+ INPUT="$DATA/$LIB.SingleReads.fa"
57
+ else
58
+ echo "$0: Error: No task selected, neither USECOUPLED nor USESINGLE set to yes." >&2
59
+ exit 1;
60
+ fi
61
+ VARS="LIB=$LIB,PDIR=$PDIR,DATA=$DATA,USECOUPLED=$USECOUPLED,USESINGLE=$USESINGLE"
62
+ [[ -n $INSLEN ]] && VARS="$VARS,INSLEN=$INSLEN"
63
+ [[ -n $VELVETG_EXTRA ]] && VARS="$VARS,VELVETG_EXTRA=$VELVETG_EXTRA"
64
+ [[ -n $VELVETH_EXTRA ]] && VARS="$VARS,VELVETH_EXTRA=$VELVETH_EXTRA"
65
+ [[ -n $CLEANUP ]] && VARS="$VARS,CLEANUP=$CLEANUP"
66
+ let SIZE=$(ls -lH "$INPUT" | awk '{print $5}')/1024/1024/1024;
67
+ let RAMS=40+$SIZE*10*$RAMMULT;
68
+ let RAMV=50+$SIZE*15*$RAMMULT;
69
+ # Launch Velvet
70
+ if [[ "$RUNVELVET" == "yes" ]] ; then
71
+ NAME="velvet_${LIB}"
72
+ if [[ "$QUEUE" != "" ]]; then
73
+ qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
74
+ -l mem=${RAMV}gb -l "walltime=$WTIME" -q "$QUEUE" \
75
+ -t "$KMERARRAY%$VELVETSIM"
76
+ elif [[ $RAMV -gt 150 ]]; then
77
+ qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
78
+ -l mem=${RAMV}gb -l walltime=360:00:00 -q biohimem-6 \
79
+ -t "$KMERARRAY%$VELVETSIM"
80
+ elif [[ $SIZE -lt 6 ]]; then
81
+ qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
82
+ -l mem=${RAMV}gb -l walltime=12:00:00 -q iw-shared-6 \
83
+ -t "$KMERARRAY%$VELVETSIM"
84
+ elif [[ $SIZE -lt 20 ]]; then
85
+ qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
86
+ -l mem=${RAMV}gb -l walltime=120:00:00 -q bioforce-6 \
87
+ -t "$KMERARRAY%$VELVETSIM"
88
+ else
89
+ qsub "$PDIR/velvet.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
90
+ -l mem=${RAMV}gb -l walltime=360:00:00 -q biocluster-6 \
91
+ -t "$KMERARRAY%$VELVETSIM"
92
+ fi
93
+ fi
94
+ # Launch SOAP
95
+ if [[ "$RUNSOAP" == "yes" ]] ; then
96
+ NAME="soap_${LIB}"
97
+ if [[ "$QUEUE" != "" ]]; then
98
+ qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
99
+ -l mem=${RAMS}gb -l walltime=$WTIME -q $QUEUE -l nodes=1:ppn=$PPN \
100
+ -t "$KMERARRAY%$SOAPSIM"
101
+ elif [[ $RAMS -gt 150 ]]; then
102
+ qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
103
+ -l mem=${RAMS}gb -l walltime=48:00:00 -q biohimem-6 \
104
+ -l nodes=1:ppn=$PPN -t "$KMERARRAY%$SOAPSIM"
105
+ else
106
+ qsub "$PDIR/soap.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" \
107
+ -l mem=${RAMS}gb -l walltime=12:00:00 -q iw-shared-6 \
108
+ -l nodes=1:ppn=$PPN -t "$KMERARRAY%$SOAPSIM"
109
+ fi
110
+ fi
111
+ done
112
+
@@ -0,0 +1,23 @@
1
+ #!/bin/bash
2
+
3
+ ##################### RUN
4
+ # Find the directory of the pipeline
5
+ PDIR=$(dirname $(readlink -f $0));
6
+ # Load variables
7
+ source "$PDIR/RUNME.bash"
8
+ if [[ "$SCRATCH" == "" ]] ; then
9
+ echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
10
+ exit 1
11
+ fi
12
+
13
+ # Run it
14
+ echo "Jobs being launched in $SCRATCH"
15
+ for LIB in $LIBRARIES; do
16
+ # Prepare info
17
+ echo "Running $LIB";
18
+ VARS="LIB=$LIB,PDIR=$PDIR"
19
+ # Launch Stats
20
+ NAME="N50_${LIB}"
21
+ qsub "$PDIR/stats.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME"
22
+ done
23
+
@@ -0,0 +1,44 @@
1
+ #!/bin/bash
2
+
3
+ ##################### RUN
4
+ # Find the directory of the pipeline
5
+ PDIR=$(dirname $(readlink -f $0));
6
+ # Load variables
7
+ source "$PDIR/RUNME.bash"
8
+ if [[ "$SCRATCH" == "" ]] ; then
9
+ echo "$0: Error loading $PDIR/RUNME.bash, variable SCRATCH undefined" >&2
10
+ exit 1
11
+ fi
12
+
13
+ # Run it
14
+ echo "Jobs being launched in $SCRATCH"
15
+ RAMMULT=${RAMMULT:-1}
16
+ for LIB in $LIBRARIES; do
17
+ # Prepare info
18
+ echo "Running $LIB";
19
+ K_VELVET=$(echo $K_VELVET | sed -e 's/ /:/g')
20
+ K_SOAP=$(echo $K_SOAP | sed -e 's/ /:/g')
21
+ if [[ "$USECOUPLED" == "yes" ]] ; then
22
+ INPUT="$DATA/$LIB.CoupledReads.fa"
23
+ elif [[ "$USESINGLE" == "yes" ]] ; then
24
+ INPUT="$DATA/$LIB.SingleReads.fa"
25
+ else
26
+ echo "$0: Error: No task selected, neither USECOUPLED nor USESINGLE set to yes." >&2
27
+ exit 1;
28
+ fi
29
+ let SIZE=30*$(ls -l "$INPUT" | awk '{print $5}')/1024/1024/1024;
30
+ let RAM=\(3+$SIZE\)*$RAMMULT;
31
+ VARS="LIB=$LIB,PDIR=$PDIR,BIN454=$BIN454,KVELVET=$K_VELVET,KSOAP=$K_SOAP"
32
+ # Launch Newbler
33
+ NAME="Newbler_${LIB}"
34
+ if [[ "$QUEUE" != "" ]] ; then
35
+ qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l "walltime=$WTIME" -q "$QUEUE"
36
+ elif [[ $RAM -gt 150 ]] ; then
37
+ qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l walltime=360:00:00 -q biohimem-6
38
+ elif [[ $SIZE -lt 4 ]] ; then
39
+ qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l walltime=12:00:00 -q iw-shared-6
40
+ else
41
+ qsub "$PDIR/newbler.pbs" -v "$VARS" -d "$SCRATCH" -N "$NAME" -l nodes=1:ppn=$PPN -l mem=${RAM}g -l walltime=120:00:00 -q biocluster-6
42
+ fi
43
+ done
44
+
@@ -0,0 +1,50 @@
1
+ #!/bin/bash
2
+
3
+ ##################### RUN
4
+ # Check if it was sourced from RUNME-*.bash
5
+ if [[ "$PDIR" == "" ]] ; then
6
+ echo "$0: Error: This file is not stand-alone. Execute one of RUNME-*.bash as described in the README.txt file" >&2
7
+ exit 1
8
+ fi
9
+
10
+ # Find the directory of the pipeline
11
+ CWD=$(pwd)
12
+ PDIR=$(dirname $(readlink -f $0));
13
+
14
+ # Run it
15
+ # Actually, this script doesn't run anything. It's meant to keep the
16
+ # variables centralized.
17
+
18
+ # Load config
19
+ NAMES=$(ls $PDIR/CONFIG.*.bash | sed -e 's/.*CONFIG\./ * /' | sed -e 's/\.bash//');
20
+ if [[ "$1" == "" ]] ; then
21
+ if [[ "$HELP" == "" ]] ; then
22
+ echo "
23
+ Usage:
24
+ $0 name
25
+
26
+ name The name of the run. CONFIG.name.bash must exist.
27
+
28
+ See $PDIR/README.txt for more information.
29
+
30
+ Available names are:
31
+ $NAMES
32
+ " >&2
33
+ else
34
+ echo "$HELP
35
+ Available names are:
36
+ $NAMES
37
+ " >&2
38
+ fi
39
+ exit 1
40
+ fi
41
+ if [[ ! -e "$PDIR/CONFIG.$1.bash" ]] ; then
42
+ echo "$0: Error: Impossible to find $PDIR/CONFIG.$1.bash, available names are:
43
+ $NAMES" >&2
44
+ exit 1
45
+ fi
46
+ source "$PDIR/CONFIG.$1.bash"
47
+
48
+ # Create the scratch directory
49
+ if [[ ! -d $SCRATCH ]] ; then mkdir -p $SCRATCH ; fi;
50
+
@@ -0,0 +1,37 @@
1
+
2
+ # @author: Luis M. Rodriguez-R
3
+ # @update: Nov-29-2012
4
+
5
+ kSelector <- function(file, lib){
6
+ red <- rgb(0.6, 0, 0);
7
+ d <- read.table(file, sep=" ", h=T, fill=T);
8
+ d <- d[!is.na(d$N50) & !is.na(d$used), ];
9
+ d$reads <- max(d$reads, na.rm=T)
10
+ d <- d[order(d$K), ];
11
+ rownames(d) <- 1:nrow(d);
12
+ par(mar=c(5,4,4,5)+.1, cex=0.8);
13
+ barplot(d$reads/1e6, names=d$K, col='white', ylab='Number of reads (in millions)', xlab='K',
14
+ main=paste('Reads used and N50 by K-mers in the assembly of', lib));
15
+ barplot(d$used/1e6, col='grey', add=T);
16
+ par(new=T);
17
+ plot(1:length(d$K)-0.5, d$N50, col=red, t='b', lty=2, pch=20, cex=1, lwd=1.5,
18
+ xlim=c(0, length(d$K)), xaxt='n', yaxt='n', xlab='', ylab='');
19
+ axis(4, col.axis=red);
20
+ mtext('N50 (bp)', side=4, line=3, col=red);
21
+ # Suggest best k-mers
22
+ if(nrow(d) >= 3){
23
+ x = data.frame(K=d$K, l=(d$N50 - mean(d$N50))/sd(d$N50), u=(d$used - mean(d$used))/sd(d$used));
24
+ rownames(x) <- rownames(d)
25
+ d <- cbind(d, sel=FALSE);
26
+ k_s = c();
27
+ for(l_star in c(2, 1/2, 1)){
28
+ k_s_i = x$K[which.max(l_star*x$l + x$u)];
29
+ k_s <- c(k_s, k_s_i);
30
+ x <- x[x$K!=k_s_i, ];
31
+ d$sel[d$K==k_s_i] <- TRUE;
32
+ }
33
+ abline(v=as.numeric(rownames(d)[d$sel])-0.5, col='darkgreen', lty=6);
34
+ }
35
+ return(d);
36
+ }
37
+
@@ -0,0 +1,68 @@
1
+ #!/bin/bash
2
+ #PBS -l nodes=1:ppn=1
3
+ #PBS -k oe
4
+
5
+ # Some defaults for the parameters
6
+ BIN454=${BIN454:-"$HOME/454/bin"};
7
+
8
+ # Check mandatory variables
9
+ if [[ "$LIB" == "" ]]; then
10
+ echo "Error: LIB is mandatory" >&2
11
+ exit 1;
12
+ fi
13
+ if [[ "$PDIR" == "" ]]; then
14
+ echo "Error: PDIR is mandatory" >&2
15
+ exit 1;
16
+ fi
17
+ if [[ "$KVELVET$KSOAP" == "" ]]; then
18
+ echo "Error: KVELVET and/or KSOAP are mandatory" >&2
19
+ exit 1;
20
+ fi
21
+
22
+ # Prepare input
23
+ KVELVET=$(echo $KVELVET | sed -e 's/:/ /g')
24
+ KSOAP=$(echo $KSOAP | sed -e 's/:/ /g')
25
+ NP=$(cat "$PBS_NODEFILE" | wc -l)
26
+ CWD=$(pwd)
27
+ DIR="$CWD/$LIB.newbler"
28
+ LOG="$DIR.log"
29
+ module load perl/5.14.4
30
+ export PATH=$PATH:$BIN454
31
+
32
+ # Create project
33
+ echo new > $DIR.proc
34
+ nohup newAssembly $DIR > $LOG
35
+
36
+ # Prepare Velvet
37
+ if [[ "$KVELVET" != "" ]] ; then
38
+ echo pre-velvet > $DIR.proc
39
+ rm $LIB.velvet.tmp1 &>/dev/null
40
+ for K in $KVELVET ; do
41
+ perl "$PDIR/FastA.filterN.pl" "$LIB.velvet_$K/contigs.fa" >> $LIB.velvet.tmp1
42
+ done
43
+ perl "$PDIR/newbler_preparator.pl" $LIB.velvet.tmp1 $LIB.velvet.tmp2
44
+ cd $DIR
45
+ nohup addRun ../$LIB.velvet.tmp2 >> $LOG
46
+ cd $CWD
47
+ fi ;
48
+
49
+ # Prepare SOAP
50
+ if [[ "$KSOAP" != "" ]] ; then
51
+ echo pre-soap > $DIR.proc
52
+ rm $LIB.soap.tmp1 &>/dev/null
53
+ for K in $KSOAP ; do
54
+ cat "$LIB.soap_$K/O.contig" >> $LIB.soap.tmp1
55
+ done
56
+ perl "$PDIR/newbler_preparator.pl" $LIB.soap.tmp1 $LIB.soap.tmp2
57
+ cd $DIR
58
+ nohup addRun ../$LIB.soap.tmp2 >> $LOG
59
+ cd $CWD
60
+ fi ;
61
+
62
+ # Run
63
+ cd $DIR
64
+ echo newbler > $DIR.proc
65
+ nohup runProject -cpu $NP >> $LOG
66
+ cd $CWD
67
+ echo done > $DIR.proc
68
+
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/perl
2
+
3
+
4
+ my ($in, $out) = @ARGV;
5
+ ($in and $out) or die "
6
+ Usage: $0 input.fa output.fa
7
+ ";
8
+
9
+ open IN, "<", $in or die "Cannot read file: $in: $!\n";
10
+ open OUT,">", $out or die "Cannot create file: $out: $!\n";
11
+
12
+ %reads=();
13
+ @reads=();
14
+ while(<IN>){
15
+ chomp;
16
+ if(/^\>/){
17
+ $tag=$_;
18
+ $reads{$tag}='';
19
+ push(@reads,$tag);
20
+ }else{
21
+ $reads{$tag}.=$_;
22
+ }
23
+ }
24
+ close(IN);
25
+
26
+ for(0..$#reads){
27
+ $tag=$reads[$_];
28
+ $read=$reads{$tag};
29
+ $l=length $read;
30
+ if($l<100){
31
+ next;
32
+ }else{
33
+ if($l<1500){
34
+ print OUT "$tag\n$read\n";
35
+ }else{
36
+ $r=int($l/1500)+1;
37
+ $start=0;
38
+ $i=1;
39
+ while($start<$l-100){
40
+ $tag_new=$tag.':r'.$i;
41
+ $i++;
42
+ $read_new=substr($read,$start,1500);
43
+ $start+=200;
44
+ print OUT "$tag_new\n$read_new\n";
45
+ }
46
+ }
47
+ }
48
+ }
49
+ close(OUT);
@@ -0,0 +1,80 @@
1
+ #!/bin/bash
2
+ #PBS -k oe
3
+
4
+ # Some defaults for the parameters
5
+ INSLEN=${INSLEN:-300};
6
+ USECOUPLED=${USECOUPLED:-yes}
7
+ USESINGLE=${USESINGLE:-no}
8
+ CLEANUP=${CLEANUP:-yes}
9
+
10
+ # Check mandatory variables
11
+ if [[ "$LIB" == "" ]]; then
12
+ echo "Error: LIB is mandatory" >&2
13
+ exit 1;
14
+ fi
15
+ if [[ "$PDIR" == "" ]]; then
16
+ echo "Error: PDIR is mandatory" >&2
17
+ exit 1;
18
+ fi
19
+ if [[ "$DATA" == "" ]]; then
20
+ echo "Error: DATA is mandatory" >&2
21
+ exit 1;
22
+ fi
23
+
24
+ # Prepare input
25
+ module load perl/5.14.4
26
+ KMER=$PBS_ARRAYID
27
+ DIR="$LIB.soap_$KMER"
28
+ if [[ "$USECOUPLED" == "yes" ]]; then
29
+ MAXRDLEN=${MAXRDLEN:-$(perl $PDIR/FastA.length.pl "$DATA/$LIB.CoupledReads.fa" | head -n 200000 | awk '{if($2>MAX) MAX=$2} END{print MAX}')}
30
+ elif [[ "$USESINGLE" == "yes" ]]; then
31
+ MAXRDLEN=${MAXRDLEN:-$(perl $PDIR/FastA.length.pl "$DATA/$LIB.SingleReads.fa" | head -n 200000 | awk '{if($2>MAX) MAX=$2} END{print MAX}')}
32
+ else
33
+ echo "$0: Error: Nothing to do, neither USECOUPLED nor USESINGLE set to yes." >&2
34
+ exit 2
35
+ fi
36
+ NP=$(cat "$PBS_NODEFILE" | wc -l)
37
+
38
+ # Config
39
+ module load SOAP/denovo2/r240
40
+ echo config > $DIR.proc
41
+ echo "max_rd_len=$MAXRDLEN
42
+ [LIB]
43
+ reverse_seq=0
44
+ asm_flag=3
45
+ rank=1" > $DIR.config
46
+ if [[ "$USECOUPLED" == "yes" ]]; then
47
+ echo "avg_ins=$INSLEN
48
+ p=$DATA/$LIB.CoupledReads.fa" >> $DIR.config
49
+ fi
50
+ if [[ "$USESINGLE" == "yes" ]]; then
51
+ echo "f=$DATA/$LIB.SingleReads.fa" >> $DIR.config
52
+ fi
53
+
54
+
55
+ # Run
56
+ echo pre > $DIR.proc
57
+ if [[ -d $DIR ]] ; then rm -R $DIR ; fi
58
+ mkdir $DIR
59
+ echo soap > $DIR.proc
60
+ SOAPdenovo-63mer all -s $DIR.config -p $NP -K $KMER -o $DIR/O &> $DIR.log
61
+ if [[ -d $DIR ]] ; then
62
+ if [[ -s $DIR/O.contig ]] ; then
63
+ if [[ "$CLEANUP" != "no" ]] ; then
64
+ echo cleanup > $DIR.proc
65
+ rm $DIR/*edge
66
+ rm $DIR/*vertex
67
+ rm $DIR/*Arc*
68
+ rm $DIR/*Graph*
69
+ rm $DIR/*readInGap*
70
+ fi
71
+ echo done > $DIR.proc
72
+ else
73
+ echo "$0: Error: File $DIR/O.contig doesn't exist, something went wrong" >&2
74
+ exit 1
75
+ fi
76
+ else
77
+ echo "$0: Error: Directory $DIR doesn't exist, something went wrong" >&2
78
+ exit 1
79
+ fi
80
+