miga-base 1.2.17.0 → 1.2.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +1 -1
  3. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  4. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  6. data/utils/FastAAI/FastAAI +3659 -0
  7. data/utils/FastAAI/FastAAI-legacy/FastAAI +1336 -0
  8. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +1296 -0
  9. data/utils/FastAAI/README.md +84 -0
  10. data/utils/enveomics/Docs/recplot2.md +244 -0
  11. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  12. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  13. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  14. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  15. data/utils/enveomics/LICENSE.txt +73 -0
  16. data/utils/enveomics/Makefile +52 -0
  17. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  18. data/utils/enveomics/Manifest/Tasks/blasttab.json +790 -0
  19. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  20. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  21. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  22. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  23. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  24. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  25. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  26. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  27. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +650 -0
  28. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  29. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  30. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  31. data/utils/enveomics/Manifest/categories.json +165 -0
  32. data/utils/enveomics/Manifest/examples.json +162 -0
  33. data/utils/enveomics/Manifest/tasks.json +4 -0
  34. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  35. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  36. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  37. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  38. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  39. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  40. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  41. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  42. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  43. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  44. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  45. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  46. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  47. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  48. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  49. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  50. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  51. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  52. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  53. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  54. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  55. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  56. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  57. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  58. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  59. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  60. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  61. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  62. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  63. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  64. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  65. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  66. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  67. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  68. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  69. data/utils/enveomics/README.md +42 -0
  70. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  71. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  72. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  73. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  74. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  75. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  76. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  77. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  78. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  79. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  80. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  81. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  82. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  83. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  84. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  85. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  86. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  87. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  88. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  89. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  90. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  91. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  92. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  93. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +123 -0
  94. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  95. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  96. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  97. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  98. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  99. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  100. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  101. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  102. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  103. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  104. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  105. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  106. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  107. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  108. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  109. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  110. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  111. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  112. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  113. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  114. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  115. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  116. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  117. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  118. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  119. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  120. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  121. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  122. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  123. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  124. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  125. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  126. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  127. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  128. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  129. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  130. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  131. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  132. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  133. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  134. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  135. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  136. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  137. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  138. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  139. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  140. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  141. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  142. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  143. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  144. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  145. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  146. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  147. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  148. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  149. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  150. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  151. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  152. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  153. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  154. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  155. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  156. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  157. data/utils/enveomics/Scripts/aai.rb +421 -0
  158. data/utils/enveomics/Scripts/ani.rb +362 -0
  159. data/utils/enveomics/Scripts/anir.rb +137 -0
  160. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  161. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  162. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  163. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  164. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  165. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  166. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  167. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  168. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  169. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  170. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  171. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  172. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +88 -0
  173. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +74 -0
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  182. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  183. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  184. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  185. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  186. data/utils/enveomics/Scripts/ogs.rb +104 -0
  187. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  188. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  189. data/utils/enveomics/Scripts/rbm.rb +108 -0
  190. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  191. data/utils/enveomics/Tests/Makefile +10 -0
  192. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  193. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  194. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  195. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  196. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  197. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  198. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  199. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  200. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  201. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  202. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  203. data/utils/enveomics/Tests/alkB.nwk +1 -0
  204. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  205. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  206. data/utils/enveomics/Tests/hiv1.faa +59 -0
  207. data/utils/enveomics/Tests/hiv1.fna +134 -0
  208. data/utils/enveomics/Tests/hiv2.faa +70 -0
  209. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  210. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  211. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  212. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  213. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  214. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  215. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  216. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  217. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  218. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  219. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  220. data/utils/enveomics/build_enveomics_r.bash +45 -0
  221. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  222. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  223. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  224. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  225. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  226. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  227. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  228. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  229. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  230. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  231. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  232. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  233. data/utils/enveomics/enveomics.R/README.md +81 -0
  234. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  235. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  236. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  237. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  238. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  239. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  240. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  241. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  242. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  243. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  244. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  245. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  246. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  247. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  248. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  249. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  250. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  251. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  252. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  253. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  254. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  255. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  256. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  257. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  258. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  259. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  260. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  261. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  262. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  263. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  264. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  280. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  282. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  283. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  284. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  285. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  286. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  287. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  288. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  289. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  290. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  291. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  292. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  293. data/utils/enveomics/globals.mk +8 -0
  294. data/utils/enveomics/manifest.json +9 -0
  295. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  296. data/utils/multitrim/README.md +67 -0
  297. data/utils/multitrim/multitrim.py +1555 -0
  298. data/utils/multitrim/multitrim.yml +13 -0
  299. metadata +301 -5
@@ -0,0 +1,17 @@
1
+
2
+ module Enveomics
3
+ class Error < RuntimeError
4
+ end
5
+
6
+ class CommandError < Error
7
+ end
8
+
9
+ class OptionError < Error
10
+ end
11
+
12
+ class UnimplementedError < Error
13
+ end
14
+
15
+ class ParseError < Error
16
+ end
17
+ end
@@ -0,0 +1,30 @@
1
+
2
+ require 'enveomics_rb/stats/sample'
3
+
4
+ module Enveomics
5
+ # Calculate Gaussian Mixture Models by Expectation Maximization
6
+ class GmmEm
7
+ attr :sample
8
+ attr :components
9
+ attr :opts
10
+
11
+ # Initialize Enve::GmmEm object from numeric array +x+, +components+
12
+ # gaussian components (an Integer), and options hash +opts+ with supported
13
+ # Symbol keys:
14
+ # - ll_delta_converge: Maximum change in LL to consider convergence
15
+ # (by default: 1e-15)
16
+ # - max_iter: Maximum number of EM iterations (by default: 1_000)
17
+ # - init_mu: Initial components means as numeric array
18
+ # - init_sigma: Initial components standard deviation as numeric array
19
+ # - init_alpha: Initial components fractions as numeric array adding up to 1
20
+ def initialize(x, components = 2, opts = {})
21
+ @sample = Enve::Stats::Sample.new(x)
22
+ @opts = opts
23
+ @opts[:ll_delta_convergence] ||= 1e-15
24
+ @opts[:max_iter] ||= 1_000
25
+ end
26
+
27
+
28
+ end
29
+ end
30
+
@@ -0,0 +1,253 @@
1
+
2
+ #
3
+ # @author: Luis M. Rodriguez-R
4
+ # @update: Jul-14-2015
5
+ # @license: artistic license 2.0
6
+ #
7
+
8
+ module JPlace
9
+ ##### CLASSES:
10
+ # Placement.new(placement[, fields]): Initializes a new read placement.
11
+ # placement: A hash containing the placement.
12
+ # fields: If passed, sets the field order for all subsequent placements.
13
+ class Placement
14
+ attr_writer :flag # This attribute is used by JPlace.distances.rb as a placeholder
15
+ attr_reader :p, :n, :m, :flag
16
+ @@fields = nil
17
+ def self.fields=(fields)
18
+ @@fields=fields
19
+ end
20
+ def self.fields
21
+ @@fields
22
+ end
23
+ def initialize(placement, fields=nil)
24
+ @@fields = fields if @@fields.nil? and not fields.nil?
25
+ # Save only the best (first) placement:
26
+ abort "Placements must contain a 'p' field.\n" if placement["p"].nil?
27
+ abort "Placements must contain a 'p' field with at least one entry.\n" if placement["p"][0].nil?
28
+ @p = [placement["p"][0]]
29
+ # Find name-only placements (EPA-style):
30
+ unless placement["n"].nil?
31
+ @n = placement["n"]
32
+ @m = @n.map{ |n| 1 }
33
+ end
34
+ # Find multiplicity placements (pplacer-style):
35
+ unless placement["nm"].nil?
36
+ @n = placement["nm"].map{ |nm| nm[0] }
37
+ @m = placement["nm"].map{ |nm| nm[1].to_i }
38
+ end
39
+ abort "Placements must contain one of 'n' or 'nm' fields.\n" if @n.nil? or @m.nil?
40
+ end
41
+ def nm
42
+ (0 .. (self.n.length-1)).map{ |i| {:n=>self.n[i], :m=>self.m[i]} }
43
+ end
44
+ def get_field_value(field)
45
+ abort "Impossible to read placement with undefined fields." if @@fields.nil?
46
+ f = @@fields.find_index(field)
47
+ abort "Undefined field #{field}." if f.nil?
48
+ self.p[0][f]
49
+ end
50
+ def set_field_value(field, value)
51
+ f = @@fields.find_index(field)
52
+ abort "Undefined field #{field}." if f.nil?
53
+ self.p[0][f] = value
54
+ end
55
+ def edge_num
56
+ self.get_field_value('edge_num').to_i
57
+ end
58
+ def likelihood
59
+ self.get_field_value('likelihood').to_f
60
+ end
61
+ def like_weight_ratio
62
+ self.get_field_value('like_weight_ratio').to_f
63
+ end
64
+ def distal_length
65
+ (self.get_field_value('distal_length') || 0).to_f
66
+ end
67
+ def pendant_length
68
+ (self.get_field_value('pendant_length') || 0).to_f
69
+ end
70
+ def to_s
71
+ "#<Placement of #{self.n}: #{self.p}>"
72
+ end
73
+ end
74
+
75
+ # Ancilliary class Tree
76
+ class Tree
77
+ @@HAS_ICONV = nil
78
+ def self.has_iconv?
79
+ if @@HAS_ICONV.nil?
80
+ @@HAS_ICONV = true
81
+ begin
82
+ require 'rubygems'
83
+ require 'iconv'
84
+ rescue LoadError
85
+ @@HAS_ICONV = false
86
+ end
87
+ end
88
+ @@HAS_ICONV
89
+ end
90
+ def self.from_nwk(nwk)
91
+ if Tree.has_iconv?
92
+ ic = Iconv.new('UTF-8//IGNORE','UTF-8')
93
+ nwk = ic.iconv(nwk + ' ')[0..-2]
94
+ end
95
+ Node.new(nwk)
96
+ end
97
+ end
98
+
99
+ # Node.new(nwk[, parent]): Initializes a new Node.
100
+ # nwk: Node's description in Newick format.
101
+ # parent: Node's parent, or nil if root node.
102
+ class Node
103
+ # Class
104
+ @@edges = []
105
+ def self.edges
106
+ @@edges
107
+ end
108
+ def self.register(node)
109
+ @@edges[node.index] = node unless node.index.nil?
110
+ end
111
+ # Class-level functions related to JPlace
112
+ def self.link_placement(placement)
113
+ abort "Trying to link placement in undefined edge #{placement.edge_num}: #{placement.to_s}" if @@edges[placement.edge_num].nil?
114
+ @@edges[placement.edge_num].add_placement!(placement)
115
+ end
116
+ def self.unlink_placement(placement)
117
+ @@edges[placement.edge_num].delete_placement!(placement)
118
+ end
119
+ # Instance
120
+ attr_reader :children, :length, :name, :label, :index, :nwk, :parent, :placements, :collapsed
121
+ def initialize(nwk, parent=nil)
122
+ abort "Empty newick.\n" if nwk.nil? or nwk==''
123
+ nwk.gsub! /;(.)/, '--\1'
124
+ @nwk = nwk
125
+ @parent = parent
126
+ @placements = []
127
+ @collapsed = false
128
+ # Find index
129
+ index_m = /^(?<pre>.*){(?<idx>[0-9]+)}(?<post>[^\(\),;]*);?$/.match(nwk)
130
+ if index_m.nil? and parent.nil? and nwk[nwk.length-1]==';'
131
+ @index = nil
132
+ else
133
+ abort "Unindexed edge found:\n#{@nwk}\n" if index_m.nil?
134
+ nwk = index_m[:pre]+index_m[:post]
135
+ @index = index_m[:idx].to_i
136
+ end
137
+ # Find name, label, and length
138
+ meta_m = /^(\((?<cont>.+)\))?(?<name>[^:\(\);]*)(:(?<length>[0-9\.Ee+-]*)(?<label>\[[^\[\]\(\);]+\])?)?;?$/.match(nwk) or
139
+ abort "Cannot parse node metadata (index #{@index}):\n#{@nwk}\n"
140
+ nwk = meta_m[:cont]
141
+ @name = meta_m[:name]
142
+ @length = meta_m[:length]
143
+ @label = meta_m[:label]
144
+ # Find children
145
+ @children = []
146
+ nwk ||= ''
147
+ quote = nil
148
+ while nwk != ''
149
+ i = 0
150
+ j = 0
151
+ nwk.each_char do |chr|
152
+ if quote.nil?
153
+ if chr=='"' or chr=="'"
154
+ quote = chr
155
+ else
156
+ i += 1 if chr=='('
157
+ i -= 1 if chr==')'
158
+ if i==0 and chr==','
159
+ i=nil
160
+ break
161
+ end
162
+ end
163
+ else
164
+ quote = nil if chr==quote
165
+ end
166
+ j += 1
167
+ end
168
+ abort "Unbalanced node at edge {#{@index}}, with leftness #{i}:\n#{@nwk}\n" unless i.nil? or i==0
169
+ @children << Node.new(nwk[0 .. j-1],self)
170
+ nwk = nwk.length==j ? '' : nwk[j+1 .. -1]
171
+ end
172
+ Node.register(self)
173
+ end
174
+ # Accessors/Setters
175
+ def name=(new_name)
176
+ @name = new_name.gsub(/[\s\(\),;:]/, '_')
177
+ end
178
+ # Tree algorithms
179
+ def post_order &blk
180
+ self.children.each { |n| n.post_order &blk }
181
+ blk[self]
182
+ end
183
+ def in_order &blk
184
+ abort "Tree must be dycotomic to traverse in_order, node #{self.cannonical_name} "+
185
+ "has #{self.children.lenght} children." unless [0,2].include? self.children.length
186
+ self.children[0].in_order &blk unless self.children[0].nil?
187
+ blk[self]
188
+ self.children[1].in_order &blk unless self.children[1].nil?
189
+ end
190
+ def pre_order &blk
191
+ blk[self]
192
+ self.children.each { |n| n.pre_order &blk }
193
+ end
194
+ def path_to_root
195
+ if @path_to_root.nil?
196
+ @path_to_root = [self]
197
+ @path_to_root += self.parent.path_to_root unless self.parent.nil?
198
+ end
199
+ @path_to_root
200
+ end
201
+ def distance_to_root
202
+ if @distance_to_root.nil?
203
+ @distance_to_root = path_to_root.map{ |n| n.length.nil? ? 0.0 : n.length.to_f }.reduce(0.0, :+)
204
+ end
205
+ @distance_to_root
206
+ end
207
+ def lca(node)
208
+ p1 = self.path_to_root
209
+ p2 = node.path_to_root
210
+ p1.find{ |n| p2.include? n }
211
+ end
212
+ def distance(node)
213
+ self.distance_to_root + node.distance_to_root - (2.0 * self.lca(node).distance_to_root)
214
+ end
215
+ def ==(node) self.index == node.index ; end
216
+ # Tree representation
217
+ def cannonical_name
218
+ return(self.name) unless self.name.nil? or self.name == ""
219
+ return(self.label) unless self.label.nil? or self.label == ""
220
+ return("{#{self.index.to_s}}") unless self.index.nil?
221
+ ""
222
+ end
223
+ def to_s
224
+ o = ""
225
+ o += "(" + self.children.map{ |c| c.to_s }.join(",") + ")" if self.children.length > 0
226
+ o += self.cannonical_name
227
+ u = "#{self.length.nil? ? "" : self.length}#{self.label.nil? ? "" : self.label}"
228
+ o += ":#{u}" unless u==""
229
+ o
230
+ end
231
+ # Instance-level functions related to JPlace
232
+ def collapse!
233
+ self.pre_order do |n|
234
+ if n!=self
235
+ while n.placements.length > 0
236
+ p = Node.unlink_placement(n.placements[0])
237
+ p.set_field_value('edge_num', self.index)
238
+ Node.link_placement(p)
239
+ end
240
+ end
241
+ end
242
+ @collapsed = true
243
+ end
244
+ def add_placement!(placement)
245
+ @placements << placement
246
+ end
247
+ def delete_placement!(placement)
248
+ @placements.delete(placement)
249
+ end
250
+ end
251
+
252
+ end # module JPlace
253
+
@@ -0,0 +1,88 @@
1
+
2
+ module Enveomics
3
+ ##
4
+ # A simple object representing a sequence match from a search engine
5
+ # supporting tabular BLAST output
6
+ class Match
7
+ class << self
8
+ def column_types
9
+ {
10
+ qseqid: String, sseqid: String, pident: Float,
11
+ length: Integer, mismatch: Integer, gapopen: Integer,
12
+ q_start: Integer, q_end: Integer, s_start: Integer,
13
+ s_end: Integer, evalue: Float, bitscore: Float,
14
+ # Non-standard (but frequently used in Enveomics Collection):
15
+ qry_len: Integer, sbj_len: Integer
16
+ }
17
+ end
18
+
19
+ def column_type(sym)
20
+ column_types[colname(sym)]
21
+ end
22
+
23
+ def to_column_type(sym, value)
24
+ case column_type(sym).to_s
25
+ when 'String' ; value.to_s
26
+ when 'Float' ; value.to_f
27
+ when 'Integer'; value.to_i
28
+ end
29
+ end
30
+
31
+ def columns
32
+ column_types.keys
33
+ end
34
+
35
+ def column(sym)
36
+ columns.index(colname(sym))
37
+ end
38
+
39
+ def colsynonyms
40
+ {
41
+ qry: :qseqid, sbj: :sseqid,
42
+ id: :pident, len: :length, score: :bitscore
43
+ }
44
+ end
45
+
46
+ def colnames
47
+ columns + colsynonyms.keys
48
+ end
49
+
50
+ def colname(sym)
51
+ s = sym.to_sym
52
+ column_types[s] ? s : colsynonyms[s]
53
+ end
54
+ end
55
+
56
+ ####--- Instance Level ---###
57
+
58
+ attr :row
59
+
60
+ ##
61
+ # Initialize Enveomics::Match object from a tabular blast line String +ln+
62
+ def initialize(ln)
63
+ @row = ln.chomp.split("\t")
64
+ end
65
+
66
+ colnames.each do |sym|
67
+ define_method sym do
68
+ self.class.to_column_type(sym, row[self.class.column(sym)])
69
+ end
70
+ end
71
+
72
+ def qry_fract
73
+ return 0.0 unless qry_len.zero?
74
+ @fract ||= len.to_f / qry_len
75
+ end
76
+
77
+ alias fract qry_fract
78
+
79
+ def sbj_fract
80
+ return 0.0 unless sbj_len.zero?
81
+ @fract ||= len.to_f / sbj_len
82
+ end
83
+
84
+ def to_s
85
+ row.join("\t")
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,182 @@
1
+
2
+ ##### CLASSES:
3
+ # Gene.new(genome, id): Initializes a new Gene.
4
+ # genome: A string uniquely identifying the parent genome.
5
+ # id: A string uniquely identifying the gene within the genome. It can be
6
+ # non-unique across genomes.
7
+ class Gene
8
+ attr_reader :genome_id, :id
9
+ @@genomes = []
10
+ def self.genomes
11
+ @@genomes
12
+ end
13
+ def initialize(genome, id)
14
+ if genome.is_a? Integer
15
+ abort "Internal error: Genome #{genome} does not exist yet." if
16
+ @@genomes[genome].nil?
17
+ @genome_id = genome
18
+ else
19
+ @@genomes << genome unless @@genomes.include? genome
20
+ @genome_id = @@genomes.index(genome)
21
+ end
22
+ @id = id
23
+ end
24
+ # Compare if two Gene objects refer to the same gene.
25
+ def ==(b)
26
+ self.genome_id==b.genome_id and self.id==b.id
27
+ end
28
+ # Get all genomes in the run as an array of strings.
29
+ def genome
30
+ @@genomes[self.genome_id]
31
+ end
32
+ def to_s
33
+ "#{self.genome}:#{self.id}"
34
+ end
35
+ end
36
+
37
+ # OG.new(): Initializes an empty OG.
38
+ # OG.new(genomes, genes): Initializes a pre-computed OG.
39
+ # genomes: List of genomes as an array of strings (as in Gene.genomes).
40
+ # genes: List of genes as an array of strings, with '-' indicating no genes and
41
+ # multiple genes separated by ','.
42
+ class OG
43
+ attr_reader :genes, :notes
44
+ def initialize(genomes=nil, genes=nil)
45
+ @genes = []
46
+ @notes = []
47
+ unless genomes.nil? or genes.nil?
48
+ (0 .. genes.length-1).each do |genome_i|
49
+ next if genes[genome_i]=="-"
50
+ genes[genome_i].split(/,/).each do |gene_id|
51
+ self << Gene.new(genomes[genome_i], gene_id)
52
+ end
53
+ end
54
+ end
55
+ end
56
+ # Add genes or combine another OG into the loaded OG (self).
57
+ def <<(obj)
58
+ if obj.is_a? Gene
59
+ @genes[obj.genome_id] = [] if @genes[obj.genome_id].nil?
60
+ @genes[obj.genome_id] << obj.id unless self.include? obj
61
+ elsif obj.is_a? OG
62
+ obj.genes_obj.each{ |gene| self << gene }
63
+ else
64
+ abort "Unsupported class for #{obj}"
65
+ end
66
+ end
67
+ # Get the list of genes as objects (internally saved as strings to save RAM).
68
+ def genes_obj
69
+ o = []
70
+ (0 .. Gene.genomes.length-1).map do |genome_id|
71
+ o += self.genes[genome_id].map{ |gene_id|
72
+ Gene.new(Gene.genomes[genome_id], gene_id) } unless
73
+ self.genes[genome_id].nil?
74
+ end
75
+ return o
76
+ end
77
+ # Evaluates if the OG contains the passed gene.
78
+ def include?(gene)
79
+ return false if self.genes[gene.genome_id].nil?
80
+ self.genes[gene.genome_id].include? gene.id
81
+ end
82
+ # Get the list of genomes containing genes in this OG.
83
+ def genomes
84
+ (0 .. Gene.genomes.length-1).select do |gno|
85
+ not(self.genes[gno].nil? or self.genes[gno].empty?)
86
+ end
87
+ end
88
+ # Adds a note that will be printed after the last column
89
+ def add_note note, note_idx=nil
90
+ if note_idx.nil?
91
+ @notes << note
92
+ else
93
+ @notes[note_idx] = (@notes[note_idx].nil? ? '' :
94
+ (@notes[note_idx]+' || ')) + note
95
+ end
96
+ end
97
+ def to_s
98
+ (0 .. Gene.genomes.length-1).map do |genome_id|
99
+ self.genes[genome_id].nil? ? "-" : self.genes[genome_id].join(",")
100
+ end.join("\t") + ((self.notes.size==0) ? '' :
101
+ ("\t#\t"+self.notes.join("\t")))
102
+ end
103
+ def to_bool_a
104
+ (0 .. Gene.genomes.length-1).map { |genome_id| not genes[genome_id].nil? }
105
+ end
106
+ end
107
+
108
+ # OGCollection.new(): Initializes an empty collection of OGs.
109
+ class OGCollection
110
+ attr_reader :ogs, :note_srcs
111
+ def initialize
112
+ @ogs = []
113
+ @note_srcs = []
114
+ end
115
+ # Add an OG to the collection
116
+ def <<(og)
117
+ @ogs << og
118
+ end
119
+ # Compare OGs all-vs-all to identify groups that should be merged.
120
+ def consolidate!
121
+ old_ogs = self.ogs
122
+ @ogs = []
123
+ old_ogs.each do |og|
124
+ is_new = true
125
+ og.genes_obj.each do |gene|
126
+ o = self.get_og gene
127
+ unless o.nil?
128
+ o << og
129
+ is_new = false
130
+ break
131
+ end
132
+ end
133
+ self << og if is_new
134
+ end
135
+ end
136
+ # Removes OGs present in less than 'fraction' of the genomes
137
+ def filter_core!(fraction=1.0)
138
+ min_genomes = (fraction * Gene.genomes.size).ceil
139
+ @ogs.select! { |og| og.genomes.size >= min_genomes }
140
+ end
141
+ # Removes OGs present more than 'dups' number of times in any genome
142
+ def remove_inparalogs!(dups=1)
143
+ @ogs.select! do |og|
144
+ og.genes.map{ |pergenome| pergenome.size }.max <= dups
145
+ end
146
+ end
147
+ # Add a pair of RBM genes into the corresponding OG, or create a new OG.
148
+ def add_rbm(a, b)
149
+ og = self.get_og(a)
150
+ og = self.get_og(b) if og.nil?
151
+ if og.nil?
152
+ og = OG.new
153
+ @ogs << og
154
+ end
155
+ og << a
156
+ og << b
157
+ end
158
+ # Get the OG containing the gene (returns the first, if multiple).
159
+ def get_og(gene)
160
+ idx = self.ogs.index { |og| og.include? gene }
161
+ idx.nil? ? nil : self.ogs[idx]
162
+ end
163
+ # Get the genes from a given genome (returns an array of arrays)
164
+ def get_genome_genes(genome)
165
+ genome_id = Gene.genomes.index(genome)
166
+ self.ogs.map do |og|
167
+ g = og.genes[genome_id]
168
+ g.nil? ? [] : g
169
+ end
170
+ end
171
+ # Add annotation sources
172
+ def add_note_src src
173
+ @note_srcs << src
174
+ end
175
+ def to_s
176
+ Gene.genomes.join("\t") + ((self.note_srcs.length>0) ?
177
+ ("\t#\t"+self.note_srcs.join("\t")) : '') +
178
+ "\n" + self.ogs.map{ |og| og.to_s }.join("\n")
179
+ end
180
+ def to_bool_a ; ogs.map{ |og| og.to_bool_a } ; end
181
+ end
182
+
@@ -0,0 +1,49 @@
1
+ require 'enveomics_rb/bm_set'
2
+
3
+ module Enveomics
4
+ class RBM
5
+ attr :seq1, :seq2, :bms1, :bms2
6
+
7
+ ##
8
+ # Initialize RBM object with sequence paths +seq1+ and +seq2+, and
9
+ # Enveomics::BMset options Hash +bm_opts+
10
+ def initialize(seq1, seq2, bm_opts = {})
11
+ @seq1 = seq1
12
+ @seq2 = seq2
13
+ @bms1 = Enveomics::BMset.new(seq1, seq2, bm_opts)
14
+ @bms2 = Enveomics::BMset.new(seq2, seq1, bm_opts)
15
+ @set = nil
16
+ end
17
+
18
+ ##
19
+ # Array of Reciprocal Best Enveomics::Match objects
20
+ def set
21
+ @set ||= reciprocate!
22
+ end
23
+
24
+ ##
25
+ # Number of reciprocal best matches found
26
+ def count
27
+ set.count
28
+ end
29
+
30
+ ##
31
+ # Find reciprocal best matches and return the subset of +bms1+ that
32
+ # is reciprocal with +bms2+
33
+ def reciprocate!
34
+ bms1.each.select do |bm|
35
+ bms2[bm.sbj] && bm.qry == bms2[bm.sbj].sbj
36
+ end
37
+ end
38
+
39
+ ##
40
+ # Enumerate RBMs and yield +blk+
41
+ def each(&blk)
42
+ if block_given?
43
+ set.each { |bm| blk.call(bm) }
44
+ else
45
+ to_enum(:each)
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,74 @@
1
+
2
+ #
3
+ # @author: Luis M. Rodriguez-R
4
+ # @license: artistic license 2.0
5
+ #
6
+
7
+ require "enveomics_rb/enveomics"
8
+ use "restclient"
9
+ use "json"
10
+
11
+ class RemoteData
12
+ # Class-level variables
13
+ @@EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
14
+ @@EBIREST = "http://www.ebi.ac.uk/Tools"
15
+
16
+ # Class-level methods
17
+ def self.eutils(script, params={}, outfile=nil)
18
+ response = nil
19
+ 10.times do
20
+ begin
21
+ response = RestClient.get "#{@@EUTILS}/#{script}", {:params=>params}
22
+ rescue => err
23
+ warn "Request failed #{response.nil? ? "without error code" :
24
+ "with error code #{response.code}"}."
25
+ next
26
+ end
27
+ break if response.code == 200
28
+ end
29
+ abort "Unable to reach NCBI EUtils, error code #{response.code}." unless
30
+ response.code == 200
31
+ unless outfile.nil?
32
+ ohf = File.open(outfile, "w")
33
+ ohf.print response.to_s
34
+ ohf.close
35
+ end
36
+ response.to_s
37
+ end
38
+ def self.efetch(*etc)
39
+ eutils "efetch.fcgi", *etc
40
+ end
41
+ def self.elink(*etc)
42
+ eutils "elink.fcgi", *etc
43
+ end
44
+ def self.esummary(*etc)
45
+ eutils "esummary.fcgi", *etc
46
+ end
47
+ def self.update_gi(db, old_gi)
48
+ summ = JSON.parse RemoteData.esummary({:db=>db, :id=>old_gi,
49
+ :retmode=>"json"})
50
+ return nil,nil if summ["result"].nil? or summ["result"][old_gi.to_s].nil?
51
+ new_acc = summ["result"][old_gi.to_s]["replacedby"]
52
+ new_gi = (new_acc.nil? ? nil :
53
+ RemoteData.efetch({:db=>db, :id=>new_acc, :rettype=>"gi"}))
54
+ return new_gi,summ["result"][old_gi.to_s]["status"]
55
+ end
56
+ def self.ebiFetch(db, id, format, outfile=nil)
57
+ url = "#{@@EBIREST}/dbfetch/dbfetch/#{db}/#{id}/#{format}"
58
+ response = RestClient::Request.execute(:method=>:get,
59
+ :url=>url, :timeout=>600)
60
+ raise "Unable to reach EBI REST client, error code " +
61
+ response.code.to_s + "." unless response.code == 200
62
+ response.to_s
63
+ end
64
+ def self.ebiseq2taxid(id,db)
65
+ doc = RemoteData.ebiFetch(db, id, "annot").split(/[\n\r]/)
66
+ ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first
67
+ ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
68
+ return nil if ln.nil?
69
+ ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, "\\1")
70
+ return nil unless ln =~ /^\d+$/
71
+ ln
72
+ end
73
+ end
74
+