miga-base 0.7.26.0 → 0.7.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +1 -1
  3. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  4. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  6. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  7. data/utils/FastAAI/README.md +84 -0
  8. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  9. data/utils/enveomics/Docs/recplot2.md +244 -0
  10. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  11. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  12. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  13. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  14. data/utils/enveomics/LICENSE.txt +73 -0
  15. data/utils/enveomics/Makefile +52 -0
  16. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  17. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  18. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  19. data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
  20. data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
  21. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  22. data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
  23. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  24. data/utils/enveomics/Manifest/Tasks/other.json +829 -0
  25. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  26. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
  27. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  28. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  29. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  30. data/utils/enveomics/Manifest/categories.json +156 -0
  31. data/utils/enveomics/Manifest/examples.json +154 -0
  32. data/utils/enveomics/Manifest/tasks.json +4 -0
  33. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  34. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  35. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  36. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  37. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  38. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  39. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  40. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  41. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  42. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  43. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  44. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  45. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  46. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  47. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  48. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  49. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  50. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  51. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  52. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  53. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  54. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  55. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  56. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  57. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  58. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  59. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  60. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  61. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  62. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  63. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  64. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  65. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  66. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  67. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  68. data/utils/enveomics/README.md +42 -0
  69. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  70. data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
  71. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  72. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  73. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  74. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  75. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  76. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  77. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  78. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  79. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  80. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  81. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  82. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  83. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  84. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  85. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  86. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  87. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  88. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  89. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  90. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  91. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  92. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  93. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  94. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  95. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  96. data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
  97. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  98. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  99. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  100. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  101. data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
  102. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  103. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  104. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  105. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  106. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  107. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  108. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  109. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  110. data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
  111. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  112. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  113. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  114. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  115. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  116. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  117. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  118. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  119. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  120. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  121. data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
  122. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  123. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  124. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  125. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  126. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  127. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  128. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  129. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  130. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  131. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  132. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  133. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  134. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  135. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  136. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  137. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  138. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  139. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  140. data/utils/enveomics/Scripts/SRA.download.bash +57 -0
  141. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  142. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  143. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  144. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  145. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  146. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  147. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  148. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  149. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  150. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  151. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  152. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  153. data/utils/enveomics/Scripts/aai.rb +418 -0
  154. data/utils/enveomics/Scripts/ani.rb +362 -0
  155. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  156. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  157. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  158. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  159. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  160. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  161. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  162. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  163. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  164. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  165. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  166. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
  167. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  168. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  169. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  170. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  171. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  172. data/utils/enveomics/Scripts/ogs.rb +104 -0
  173. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  174. data/utils/enveomics/Scripts/rbm.rb +146 -0
  175. data/utils/enveomics/Tests/Makefile +10 -0
  176. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  177. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  178. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  179. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  180. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  181. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  182. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  183. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  184. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  185. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  186. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  187. data/utils/enveomics/Tests/alkB.nwk +1 -0
  188. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  189. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  190. data/utils/enveomics/Tests/hiv1.faa +59 -0
  191. data/utils/enveomics/Tests/hiv1.fna +134 -0
  192. data/utils/enveomics/Tests/hiv2.faa +70 -0
  193. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  194. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  195. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  196. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  197. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  198. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  199. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  200. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  201. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  202. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  203. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  204. data/utils/enveomics/build_enveomics_r.bash +45 -0
  205. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  206. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  207. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  208. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  209. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  210. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  211. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  212. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  213. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  214. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  215. data/utils/enveomics/enveomics.R/R/utils.R +50 -0
  216. data/utils/enveomics/enveomics.R/README.md +80 -0
  217. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  218. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  219. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  220. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  221. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  222. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  223. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  224. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  225. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  226. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  227. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  228. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
  229. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
  230. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
  231. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  232. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  233. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
  234. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
  235. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
  236. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
  237. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
  238. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
  239. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  240. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
  242. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  243. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  244. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  245. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  246. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
  247. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  248. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  249. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
  250. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  251. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  252. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  253. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  254. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
  255. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  256. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
  257. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
  258. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
  259. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  260. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  261. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
  262. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
  263. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  264. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  265. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  266. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  267. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
  268. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
  269. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
  270. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
  271. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  272. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  273. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  274. data/utils/enveomics/globals.mk +8 -0
  275. data/utils/enveomics/manifest.json +9 -0
  276. metadata +277 -4
@@ -0,0 +1,182 @@
1
+
2
+ ##### CLASSES:
3
+ # Gene.new(genome, id): Initializes a new Gene.
4
+ # genome: A string uniquely identifying the parent genome.
5
+ # id: A string uniquely identifying the gene within the genome. It can be
6
+ # non-unique across genomes.
7
+ class Gene
8
+ attr_reader :genome_id, :id
9
+ @@genomes = []
10
+ def self.genomes
11
+ @@genomes
12
+ end
13
+ def initialize(genome, id)
14
+ if genome.is_a? Integer
15
+ abort "Internal error: Genome #{genome} does not exist yet." if
16
+ @@genomes[genome].nil?
17
+ @genome_id = genome
18
+ else
19
+ @@genomes << genome unless @@genomes.include? genome
20
+ @genome_id = @@genomes.index(genome)
21
+ end
22
+ @id = id
23
+ end
24
+ # Compare if two Gene objects refer to the same gene.
25
+ def ==(b)
26
+ self.genome_id==b.genome_id and self.id==b.id
27
+ end
28
+ # Get all genomes in the run as an array of strings.
29
+ def genome
30
+ @@genomes[self.genome_id]
31
+ end
32
+ def to_s
33
+ "#{self.genome}:#{self.id}"
34
+ end
35
+ end
36
+
37
+ # OG.new(): Initializes an empty OG.
38
+ # OG.new(genomes, genes): Initializes a pre-computed OG.
39
+ # genomes: List of genomes as an array of strings (as in Gene.genomes).
40
+ # genes: List of genes as an array of strings, with '-' indicating no genes and
41
+ # multiple genes separated by ','.
42
+ class OG
43
+ attr_reader :genes, :notes
44
+ def initialize(genomes=nil, genes=nil)
45
+ @genes = []
46
+ @notes = []
47
+ unless genomes.nil? or genes.nil?
48
+ (0 .. genes.length-1).each do |genome_i|
49
+ next if genes[genome_i]=="-"
50
+ genes[genome_i].split(/,/).each do |gene_id|
51
+ self << Gene.new(genomes[genome_i], gene_id)
52
+ end
53
+ end
54
+ end
55
+ end
56
+ # Add genes or combine another OG into the loaded OG (self).
57
+ def <<(obj)
58
+ if obj.is_a? Gene
59
+ @genes[obj.genome_id] = [] if @genes[obj.genome_id].nil?
60
+ @genes[obj.genome_id] << obj.id unless self.include? obj
61
+ elsif obj.is_a? OG
62
+ obj.genes_obj.each{ |gene| self << gene }
63
+ else
64
+ abort "Unsupported class for #{obj}"
65
+ end
66
+ end
67
+ # Get the list of genes as objects (internally saved as strings to save RAM).
68
+ def genes_obj
69
+ o = []
70
+ (0 .. Gene.genomes.length-1).map do |genome_id|
71
+ o += self.genes[genome_id].map{ |gene_id|
72
+ Gene.new(Gene.genomes[genome_id], gene_id) } unless
73
+ self.genes[genome_id].nil?
74
+ end
75
+ return o
76
+ end
77
+ # Evaluates if the OG contains the passed gene.
78
+ def include?(gene)
79
+ return false if self.genes[gene.genome_id].nil?
80
+ self.genes[gene.genome_id].include? gene.id
81
+ end
82
+ # Get the list of genomes containing genes in this OG.
83
+ def genomes
84
+ (0 .. Gene.genomes.length-1).select do |gno|
85
+ not(self.genes[gno].nil? or self.genes[gno].empty?)
86
+ end
87
+ end
88
+ # Adds a note that will be printed after the last column
89
+ def add_note note, note_idx=nil
90
+ if note_idx.nil?
91
+ @notes << note
92
+ else
93
+ @notes[note_idx] = (@notes[note_idx].nil? ? '' :
94
+ (@notes[note_idx]+' || ')) + note
95
+ end
96
+ end
97
+ def to_s
98
+ (0 .. Gene.genomes.length-1).map do |genome_id|
99
+ self.genes[genome_id].nil? ? "-" : self.genes[genome_id].join(",")
100
+ end.join("\t") + ((self.notes.size==0) ? '' :
101
+ ("\t#\t"+self.notes.join("\t")))
102
+ end
103
+ def to_bool_a
104
+ (0 .. Gene.genomes.length-1).map { |genome_id| not genes[genome_id].nil? }
105
+ end
106
+ end
107
+
108
+ # OGCollection.new(): Initializes an empty collection of OGs.
109
+ class OGCollection
110
+ attr_reader :ogs, :note_srcs
111
+ def initialize
112
+ @ogs = []
113
+ @note_srcs = []
114
+ end
115
+ # Add an OG to the collection
116
+ def <<(og)
117
+ @ogs << og
118
+ end
119
+ # Compare OGs all-vs-all to identify groups that should be merged.
120
+ def consolidate!
121
+ old_ogs = self.ogs
122
+ @ogs = []
123
+ old_ogs.each do |og|
124
+ is_new = true
125
+ og.genes_obj.each do |gene|
126
+ o = self.get_og gene
127
+ unless o.nil?
128
+ o << og
129
+ is_new = false
130
+ break
131
+ end
132
+ end
133
+ self << og if is_new
134
+ end
135
+ end
136
+ # Removes OGs present in less than 'fraction' of the genomes
137
+ def filter_core!(fraction=1.0)
138
+ min_genomes = (fraction * Gene.genomes.size).ceil
139
+ @ogs.select! { |og| og.genomes.size >= min_genomes }
140
+ end
141
+ # Removes OGs present more than 'dups' number of times in any genome
142
+ def remove_inparalogs!(dups=1)
143
+ @ogs.select! do |og|
144
+ og.genes.map{ |pergenome| pergenome.size }.max <= dups
145
+ end
146
+ end
147
+ # Add a pair of RBM genes into the corresponding OG, or create a new OG.
148
+ def add_rbm(a, b)
149
+ og = self.get_og(a)
150
+ og = self.get_og(b) if og.nil?
151
+ if og.nil?
152
+ og = OG.new
153
+ @ogs << og
154
+ end
155
+ og << a
156
+ og << b
157
+ end
158
+ # Get the OG containing the gene (returns the first, if multiple).
159
+ def get_og(gene)
160
+ idx = self.ogs.index { |og| og.include? gene }
161
+ idx.nil? ? nil : self.ogs[idx]
162
+ end
163
+ # Get the genes from a given genome (returns an array of arrays)
164
+ def get_genome_genes(genome)
165
+ genome_id = Gene.genomes.index(genome)
166
+ self.ogs.map do |og|
167
+ g = og.genes[genome_id]
168
+ g.nil? ? [] : g
169
+ end
170
+ end
171
+ # Add annotation sources
172
+ def add_note_src src
173
+ @note_srcs << src
174
+ end
175
+ def to_s
176
+ Gene.genomes.join("\t") + ((self.note_srcs.length>0) ?
177
+ ("\t#\t"+self.note_srcs.join("\t")) : '') +
178
+ "\n" + self.ogs.map{ |og| og.to_s }.join("\n")
179
+ end
180
+ def to_bool_a ; ogs.map{ |og| og.to_bool_a } ; end
181
+ end
182
+
@@ -0,0 +1,74 @@
1
+
2
+ #
3
+ # @author: Luis M. Rodriguez-R
4
+ # @license: artistic license 2.0
5
+ #
6
+
7
+ require "enveomics_rb/enveomics"
8
+ use "restclient"
9
+ use "json"
10
+
11
+ class RemoteData
12
+ # Class-level variables
13
+ @@EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
14
+ @@EBIREST = "http://www.ebi.ac.uk/Tools"
15
+
16
+ # Class-level methods
17
+ def self.eutils(script, params={}, outfile=nil)
18
+ response = nil
19
+ 10.times do
20
+ begin
21
+ response = RestClient.get "#{@@EUTILS}/#{script}", {:params=>params}
22
+ rescue => err
23
+ warn "Request failed #{response.nil? ? "without error code" :
24
+ "with error code #{response.code}"}."
25
+ next
26
+ end
27
+ break if response.code == 200
28
+ end
29
+ abort "Unable to reach NCBI EUtils, error code #{response.code}." unless
30
+ response.code == 200
31
+ unless outfile.nil?
32
+ ohf = File.open(outfile, "w")
33
+ ohf.print response.to_s
34
+ ohf.close
35
+ end
36
+ response.to_s
37
+ end
38
+ def self.efetch(*etc)
39
+ eutils "efetch.fcgi", *etc
40
+ end
41
+ def self.elink(*etc)
42
+ eutils "elink.fcgi", *etc
43
+ end
44
+ def self.esummary(*etc)
45
+ eutils "esummary.fcgi", *etc
46
+ end
47
+ def self.update_gi(db, old_gi)
48
+ summ = JSON.parse RemoteData.esummary({:db=>db, :id=>old_gi,
49
+ :retmode=>"json"})
50
+ return nil,nil if summ["result"].nil? or summ["result"][old_gi.to_s].nil?
51
+ new_acc = summ["result"][old_gi.to_s]["replacedby"]
52
+ new_gi = (new_acc.nil? ? nil :
53
+ RemoteData.efetch({:db=>db, :id=>new_acc, :rettype=>"gi"}))
54
+ return new_gi,summ["result"][old_gi.to_s]["status"]
55
+ end
56
+ def self.ebiFetch(db, id, format, outfile=nil)
57
+ url = "#{@@EBIREST}/dbfetch/dbfetch/#{db}/#{id}/#{format}"
58
+ response = RestClient::Request.execute(:method=>:get,
59
+ :url=>url, :timeout=>600)
60
+ raise "Unable to reach EBI REST client, error code " +
61
+ response.code.to_s + "." unless response.code == 200
62
+ response.to_s
63
+ end
64
+ def self.ebiseq2taxid(id,db)
65
+ doc = RemoteData.ebiFetch(db, id, "annot").split(/[\n\r]/)
66
+ ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first
67
+ ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
68
+ return nil if ln.nil?
69
+ ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, "\\1")
70
+ return nil unless ln =~ /^\d+$/
71
+ ln
72
+ end
73
+ end
74
+
@@ -0,0 +1,237 @@
1
+
2
+ ##### CLASSES:
3
+ # SeqRange.parse(str): Initializes a new SeqRange from a string. A SeqRange is a
4
+ # representation of any collection of coordinates in a given sequence.
5
+ # Coordinates here are 1-based and base-located. Admitedly, the
6
+ # 0-based/interbase-located system is much more convenient for range
7
+ # operations, but GenBank (together with most common Software) is built on
8
+ # the 1-based/base-located system.
9
+ # str: A string describing the sequence range as in GenBank records.
10
+ # Note that "ID:location" notation is NOT supported by this implementation,
11
+ # althought it is permitted by GenBank. Some examples of valid `str`:
12
+ # "<1..123"
13
+ # "complement(3..6)"
14
+ # "join(complement(join(13..43,complement(45..46),complement(1..12),
15
+ # <1..12)),12..15,13..22)"
16
+ # The last one is valid, but once parsed it's internally simplified as:
17
+ # "join(complement(<1..12),1..12,45..46,complement(13..43),12..15,13..22)"
18
+ # Which is exactly equivalent. The common (but non-GenBank-compliant)
19
+ # practice of inverting coordinates instead of using the `complement()`
20
+ # operator is also supported. For example:
21
+ # "123..3"
22
+ # Is interpreted as:
23
+ # "complement(3..123)"
24
+ # See also http://www.insdc.org/files/feature_table.html
25
+ #
26
+ # SeqRange.new(c): Initializes a new SeqRange from an object.
27
+ # c: Any object supported by the `<<` operator, or `nil` to create an empty
28
+ # SeqRange.
29
+ #
30
+ # See also ContigSeqRange.parse.
31
+ class SeqRange
32
+ # Class-level
33
+ def self.parse(str)
34
+ str.gsub!(/[^A-Za-z0-9\.\(\)<>,]/,"")
35
+ sr = nil
36
+ if str =~ /^join\((.+)\)$/i
37
+ str1 = $1
38
+ str2 = ""
39
+ sr = SeqRange.new
40
+ parens = 0
41
+ str1.each_char do |chr|
42
+ if chr=="," and parens==0
43
+ sr += SeqRange.parse(str2)
44
+ str2 = ""
45
+ next
46
+ elsif chr=="("
47
+ parens += 1
48
+ elsif chr==")"
49
+ parens -= 1
50
+ raise "Unbalanced parenthesis in '#{str1}'." if parens < 0
51
+ end
52
+ str2 += chr
53
+ end
54
+ sr += SeqRange.parse(str2) unless str2.empty?
55
+ sr
56
+ elsif str =~ /^complement\((.+)\)$/i
57
+ sr = SeqRange.parse($1)
58
+ sr.reverse!
59
+ sr
60
+ else
61
+ sr = SeqRange.new(ContigSeqRange.parse(str))
62
+ end
63
+ sr
64
+ end
65
+ # Instance-level
66
+ attr_reader :contig
67
+ def initialize(c=nil)
68
+ @contig = []
69
+ self << c unless c.nil?
70
+ end
71
+ def leftmost; contig.map{ |c| c.left }.min; end
72
+ def rightmost; contig.map{ |c| c.right }.max; end
73
+ def size; contig.map{ |c| c.size }.inject(0,:+); end
74
+ def +(sr)
75
+ return(self + SeqRange.new(sr)) if sr.is_a? ContigSeqRange
76
+ raise "Unsupported operation '+' with class #{sr.class.to_s}." unless
77
+ sr.is_a? SeqRange
78
+ out = SeqRange.new(self)
79
+ out << sr
80
+ out
81
+ end
82
+ def /(sr)
83
+ if sr.is_a? SeqRange
84
+ sr2 = sr.sort.compact
85
+ raise "Denominator is not a contiguous domain." unless sr2.size==1
86
+ return(self/sr2.contig.first)
87
+ end
88
+ raise "Unsupported operation '/' with class #{sr.class.to_s}" unless
89
+ sr.is_a? ContigSeqRange
90
+ raise "Denominator doesn't span the whole domain of numerator." unless
91
+ sr.left <= leftmost and sr.right >= rightmost
92
+ i = ContigSeqRange.IGNORE_STRAND
93
+ ContigSeqRange.IGNORE_STRAND = false
94
+ range = self.sort.compact.size
95
+ ContigSeqRange.IGNORE_STRAND = i
96
+ range.to_f / sr.size
97
+ end
98
+ def <<(c)
99
+ if c.is_a? ContigSeqRange
100
+ @contig << c
101
+ elsif c.is_a? SeqRange
102
+ @contig += c.contig
103
+ elsif c.is_a? Array
104
+ raise "Array must contain only objects of class ContigSeqRange." unless
105
+ c.map{ |cc| cc.is_a? ContigSeqRange }.all?
106
+ @contig += c
107
+ else
108
+ raise "Unsupported operation '<<' with class #{c.class.to_s}."
109
+ end
110
+ end
111
+ def reverse ; SeqRange.new(self).reverse! ; end
112
+ def sort ; SeqRange.new(self).sort! ; end
113
+ def compact ; SeqRange.new(self).compact! ; end
114
+ def reverse!
115
+ @contig.each{ |c| c.reverse! }
116
+ @contig.reverse!
117
+ self
118
+ end
119
+ def sort!
120
+ @contig.sort!{ |x,y| x.left <=> y.left }
121
+ self
122
+ end
123
+ def compact!
124
+ return self if contig.size < 2
125
+ clean = false
126
+ while not clean
127
+ clean = true
128
+ (2 .. contig.size).each do |i|
129
+ next unless contig[i-2].reverse? == contig[i-1].reverse?
130
+ next unless contig[i-2].contig? contig[i-1]
131
+ contig[i-2] += contig[i-1]
132
+ contig[i-1] = nil
133
+ clean = false
134
+ break
135
+ end
136
+ @contig.compact!
137
+ end
138
+ self
139
+ end
140
+ def to_s
141
+ o = contig.map{ |c| c.to_s }.join(",")
142
+ o = "join(#{o})" if contig.size > 1
143
+ o
144
+ end
145
+ end
146
+
147
+
148
+ # ContigSeqRange.parse(str): Initializes a new ContigSeqRange from a string. A
149
+ # ContigSeqRange is a primitive of `SeqRange` that doesn't support the
150
+ # `join()` operator. Other than that, syntax is identical to `SeqRange`.
151
+ # str: A string describing the sequence range as in GenBank records (except
152
+ # `join()`).
153
+ #
154
+ # ContigSeqRange.new(a,b): Initializes a new ContigSeqRange from the
155
+ # coordinates as integers.
156
+ # a: Start of the range.
157
+ # b: End of the range. If a>b, the `complement()` operator is assumed.
158
+ #
159
+ # ContigSeqRange.IGNORE_STRAND = true: Use this pragma to ignore strandness.
160
+ # If set, it globally affects the behavior of of the class. Note that
161
+ # `SeqRange` instances contain a collection of `ContigSeqRange` objects, so
162
+ # that class is also affected.
163
+ class ContigSeqRange
164
+ # Class-level
165
+ @@IGNORE_STRAND = false
166
+ def self.IGNORE_STRAND=(v); @@IGNORE_STRAND = !!v ; end
167
+ def self.IGNORE_STRAND; @@IGNORE_STRAND ; end
168
+ def self.parse(str)
169
+ str.downcase!
170
+ m = %r{^
171
+ (?<c>complement\()? # Reverse
172
+ (?<lt><?) # Open-ended to the left
173
+ (?<left>\d+) # Left coordinate
174
+ (
175
+ \.\.\.? # 2 or 3 dots
176
+ (?<gt1>>?) # Open-ended to the right
177
+ (?<right>\d+) # Right coordinate
178
+ )?
179
+ (?<gt2>>?) # Open-ended to the right
180
+ \)? # If reverse
181
+ $}x.match(str)
182
+ raise "Cannot parse range: #{str}." if m.nil?
183
+ c = ContigSeqRange.new(m[:left].to_i, m[:right].to_i)
184
+ c.open_left = true if m[:lt]=="<"
185
+ c.open_right = true if m[:gt1]==">" or m[:gt2]==">"
186
+ c.reverse! if m[:c]=="complement("
187
+ c
188
+ end
189
+ # Instance-level
190
+ attr_accessor :open_left, :open_right
191
+ attr_reader :coords
192
+ def initialize(a,b)
193
+ @coords = [[a,b].min, [a,b].max]
194
+ @open_left = false
195
+ @open_right = false
196
+ @reverse = (a > b)
197
+ end
198
+ def from; coords[ reverse ? 1 : 0 ] ; end
199
+ def to; coords[ reverse ? 0 : 1 ] ; end
200
+ def left; coords[0] ; end
201
+ def right; coords[1] ; end
202
+ def size; right-left+1 ; end
203
+ def reverse?; @reverse ; end
204
+ def reverse!
205
+ @reverse = ! reverse? unless @@IGNORE_STRAND
206
+ self
207
+ end
208
+ def overlap?(sr) !(right < sr.left or left > sr.right) ; end
209
+ def contig?(sr) !(right+1 < sr.left or left-1 > sr.right) ; end
210
+ def +(sr)
211
+ raise "Unsupported operation '+' with class #{sr.class.to_s}" unless
212
+ sr.is_a? ContigSeqRange
213
+ raise "Non-contiguous ranges cannot be added." unless contig? sr
214
+ raise "Ranges in different strands cannot be added." unless
215
+ reverse? == sr.reverse?
216
+ out = ContigSeqRange.new([left,sr.left].min, [right,sr.right].max)
217
+ out.reverse! if reverse?
218
+ out.open_left=true if (left < sr.left ? self : sr).open_left
219
+ out.open_right=true if (right > sr.right ? self : sr).open_right
220
+ out
221
+ end
222
+ def to_s
223
+ o = ""
224
+ o += "<" if open_left
225
+ o += left.to_s
226
+ if left == right
227
+ o += ">" if open_right
228
+ else
229
+ o += ".."
230
+ o += ">" if open_right
231
+ o += right.to_s
232
+ end
233
+ o = "complement(#{o})" if reverse?
234
+ o
235
+ end
236
+ end
237
+