miga-base 0.7.26.0 → 0.7.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +1 -1
  3. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  4. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  6. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  7. data/utils/FastAAI/README.md +84 -0
  8. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  9. data/utils/enveomics/Docs/recplot2.md +244 -0
  10. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  11. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  12. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  13. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  14. data/utils/enveomics/LICENSE.txt +73 -0
  15. data/utils/enveomics/Makefile +52 -0
  16. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  17. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  18. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  19. data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
  20. data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
  21. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  22. data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
  23. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  24. data/utils/enveomics/Manifest/Tasks/other.json +829 -0
  25. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  26. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
  27. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  28. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  29. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  30. data/utils/enveomics/Manifest/categories.json +156 -0
  31. data/utils/enveomics/Manifest/examples.json +154 -0
  32. data/utils/enveomics/Manifest/tasks.json +4 -0
  33. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  34. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  35. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  36. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  37. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  38. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  39. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  40. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  41. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  42. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  43. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  44. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  45. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  46. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  47. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  48. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  49. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  50. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  51. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  52. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  53. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  54. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  55. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  56. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  57. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  58. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  59. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  60. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  61. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  62. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  63. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  64. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  65. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  66. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  67. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  68. data/utils/enveomics/README.md +42 -0
  69. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  70. data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
  71. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  72. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  73. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  74. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  75. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  76. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  77. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  78. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  79. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  80. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  81. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  82. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  83. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  84. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  85. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  86. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  87. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  88. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  89. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  90. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  91. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  92. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  93. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  94. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  95. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  96. data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
  97. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  98. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  99. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  100. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  101. data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
  102. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  103. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  104. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  105. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  106. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  107. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  108. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  109. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  110. data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
  111. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  112. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  113. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  114. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  115. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  116. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  117. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  118. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  119. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  120. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  121. data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
  122. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  123. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  124. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  125. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  126. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  127. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  128. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  129. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  130. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  131. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  132. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  133. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  134. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  135. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  136. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  137. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  138. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  139. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  140. data/utils/enveomics/Scripts/SRA.download.bash +57 -0
  141. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  142. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  143. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  144. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  145. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  146. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  147. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  148. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  149. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  150. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  151. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  152. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  153. data/utils/enveomics/Scripts/aai.rb +418 -0
  154. data/utils/enveomics/Scripts/ani.rb +362 -0
  155. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  156. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  157. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  158. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  159. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  160. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  161. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  162. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  163. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  164. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  165. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  166. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
  167. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  168. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  169. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  170. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  171. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  172. data/utils/enveomics/Scripts/ogs.rb +104 -0
  173. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  174. data/utils/enveomics/Scripts/rbm.rb +146 -0
  175. data/utils/enveomics/Tests/Makefile +10 -0
  176. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  177. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  178. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  179. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  180. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  181. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  182. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  183. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  184. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  185. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  186. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  187. data/utils/enveomics/Tests/alkB.nwk +1 -0
  188. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  189. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  190. data/utils/enveomics/Tests/hiv1.faa +59 -0
  191. data/utils/enveomics/Tests/hiv1.fna +134 -0
  192. data/utils/enveomics/Tests/hiv2.faa +70 -0
  193. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  194. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  195. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  196. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  197. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  198. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  199. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  200. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  201. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  202. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  203. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  204. data/utils/enveomics/build_enveomics_r.bash +45 -0
  205. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  206. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  207. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  208. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  209. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  210. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  211. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  212. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  213. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  214. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  215. data/utils/enveomics/enveomics.R/R/utils.R +50 -0
  216. data/utils/enveomics/enveomics.R/README.md +80 -0
  217. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  218. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  219. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  220. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  221. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  222. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  223. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  224. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  225. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  226. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  227. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  228. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
  229. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
  230. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
  231. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  232. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  233. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
  234. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
  235. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
  236. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
  237. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
  238. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
  239. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  240. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
  242. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  243. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  244. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  245. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  246. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
  247. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  248. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  249. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
  250. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  251. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  252. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  253. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  254. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
  255. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  256. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
  257. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
  258. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
  259. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  260. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  261. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
  262. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
  263. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  264. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  265. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  266. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  267. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
  268. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
  269. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
  270. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
  271. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  272. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  273. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  274. data/utils/enveomics/globals.mk +8 -0
  275. data/utils/enveomics/manifest.json +9 -0
  276. metadata +277 -4
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
+ # @license artistic license 2.0
6
+ #
7
+
8
+ $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
9
+ require "enveomics_rb/remote_data"
10
+ use "nokogiri"
11
+
12
+ #================================[ Options parsing ]
13
+ o = {
14
+ :q=>false, :gis=>[], :dbfrom=>"nuccore", :header=>true,
15
+ :exact_gi=>false, :no_nil=>false, :ret=>"ScientificName",
16
+ :ranks=>%w(superkingdom phylum class order family genus species)}
17
+ OptionParser.new do |opt|
18
+ opt.banner = "
19
+ Maps a list of NCBI GIs to their corresponding taxonomy using the NCBI
20
+ EUtilities. Avoid using this script on millions of entries at a time, since
21
+ each entry elicits two requests to NCBI's servers.
22
+
23
+ *IMPORTANT NOTE*: NCBI is phasing out support for GIs. Please use acc.ver
24
+ instead with NCBIacc2tax.rb.
25
+
26
+ Usage: #{$0} [options]".gsub(/^ +/,"")
27
+ opt.separator ""
28
+ opt.on("-g", "--gis GI1,GI2,...", Array,
29
+ "Comma-separated list of GIs. Required unless -i is passed."
30
+ ){ |v| o[:gis]=v }
31
+ opt.on("-i", "--infile FILE",
32
+ "Raw text file containing the list of GIs, one per line.",
33
+ "Required unless -g is passed."){ |v| o[:infile]=v }
34
+ opt.on("-p", "--protein",
35
+ "Use if the GIs are proteins. Otherwise, GIs are assumed to be from " +
36
+ "the Nuccore Database."){ o[:dbfrom]="protein" }
37
+ opt.on("-r", "--ranks RANK1,RANK2,...", Array,
38
+ "Taxonomic ranks to report. By default: #{o[:ranks].join(",")}."
39
+ ){ |v| o[:ranks]=v }
40
+ opt.on("-n", "--noheader",
41
+ "Do not include a header in the output."){ o[:header]=false }
42
+ opt.on("-t", "--taxids",
43
+ "Return Taxonomy IDs instead of scientific names."){ o[:ret]="TaxId" }
44
+ opt.on("--exact-gi",
45
+ "Returns only taxonomy associated with the exact GI passed.",
46
+ "By default, it attempts to update accession versions if possible."
47
+ ){ |v| o[:exact_gi]=v }
48
+ opt.on("--ignore-missing",
49
+ "Does not report missing GIs in the output file.",
50
+ "By default, it reports GI and empty values for all other columns."
51
+ ){ |v| o[:no_nil]=v }
52
+ opt.on("-q", "--quiet", "Run quietly."){ |v| o[:q]=true }
53
+ opt.on("-h", "--help","Display this screen") do
54
+ puts opt
55
+ exit
56
+ end
57
+ opt.separator ""
58
+ end.parse!
59
+
60
+ #================================[ Functions ]
61
+ def gi2taxid(db, gi)
62
+ doc = Nokogiri::XML( RemoteData.elink({:dbfrom=>db,
63
+ :db=>"taxonomy", :id=>gi}) )
64
+ doc.at_xpath("/eLinkResult/LinkSet/LinkSetDb/Link/Id")
65
+ end
66
+ #================================[ Main ]
67
+ begin
68
+ o[:gis] += File.readlines(o[:infile]).map{ |l| l.chomp } unless
69
+ o[:infile].nil?
70
+ o[:ranks].map!{ |r| r.downcase }
71
+ puts (["GI", "TaxId"] + o[:ranks].map{ |r| r.capitalize }).join("\t") if
72
+ o[:header]
73
+ o[:gis].each do |gi|
74
+ taxid = gi2taxid(o[:dbfrom], gi)
75
+ status = ""
76
+ if taxid.nil? and not o[:exact_gi]
77
+ new_gi, status = RemoteData.update_gi(o[:dbfrom], gi)
78
+ taxid = gi2taxid(o[:dbfrom], new_gi) unless new_gi.nil?
79
+ end
80
+ if taxid.nil?
81
+ warn "Cannot find link to taxonomy: #{gi} #{status}"
82
+ puts ([gi, ""] + o[:ranks].map{ |i| "" }).join("\t") unless o[:no_nil]
83
+ next
84
+ end
85
+ taxonomy = {}
86
+ unless taxid.nil?
87
+ doc = Nokogiri::XML( RemoteData.efetch({:db=>"taxonomy",
88
+ :id=>taxid.content}) )
89
+ taxonomy[ doc.at_xpath("/TaxaSet/Taxon/Rank").content ] =
90
+ doc.at_xpath("/TaxaSet/Taxon/#{o[:ret]}").content
91
+ doc.xpath("/TaxaSet/Taxon/LineageEx/Taxon").each do |taxon|
92
+ taxonomy[ taxon.at_xpath("./Rank").content ] =
93
+ taxon.at_xpath("./#{o[:ret]}").content
94
+ end
95
+ end
96
+ puts ([gi, taxid.content] +
97
+ o[:ranks].map{ |rank| taxonomy[ rank ] ||= "" }).join("\t")
98
+ end
99
+ rescue => err
100
+ $stderr.puts "Exception: #{err}\n\n"
101
+ err.backtrace.each { |l| $stderr.puts l + "\n" }
102
+ err
103
+ end
@@ -0,0 +1,96 @@
1
+ # usage perl in_silico_GA.pl [options]
2
+
3
+ use Getopt::Long;
4
+ use Math::Random qw(:all);
5
+
6
+ $argu=GetOptions('in=s'=>\$infile, # input fasta chr file
7
+ 'out=s'=>\$outfile, # output file name
8
+ 'coverage=s'=>\$cov, # desired output
9
+ 'seq_error=s'=>\$seq_error, # sequencing error
10
+ 'read_len=s'=>\$read_len, # simulated read length
11
+ 'ins_len=s'=>\$ins_len, # insertion length
12
+ 'ins_var=s'=>\$ins_var);
13
+
14
+ $chr='';
15
+ open(IN,$infile);
16
+ open(OUT,">$outfile");
17
+ %code=();
18
+ $code{'0'}='C';
19
+ $code{'1'}='A';
20
+ $code{'2'}='T';
21
+ $code{'3'}='G';
22
+
23
+ while(<IN>){
24
+ chomp;
25
+ if(!/^\>/){
26
+ $chr.=$_;
27
+ }
28
+ else{
29
+ $gi=$_;
30
+ if($gi= ~/^\>gi\|(\S+)\|\S+\|\S+/){
31
+ $gi=$1;}
32
+
33
+ }
34
+ }
35
+ close(IN);
36
+
37
+ $chr_size=length $chr;
38
+ print "chromosome size: $chr_size\n";
39
+ $seg_size=2*$read_len+$ins_len;
40
+ $reads_number=int($cov*$chr_size/($read_len*2));
41
+ print "generated reads $reads_number x 2\n";
42
+
43
+ for(1..$reads_number){
44
+ $index=$_;
45
+ $l=length $index;
46
+ $k=8-$l;
47
+ $kk='0' x $k;
48
+ $id= 'read'.$kk.$index.'_'.$gi;
49
+
50
+ #make start site;
51
+ $start_site=int(rand($chr_size));
52
+ #make short seg length;
53
+ $seg_length=int(random_normal(1,$seg_size,$ins_var));
54
+
55
+ #extract the segment
56
+ $seg=substr($chr,$start_site,$seg_length);
57
+ $s_len=length $seg;
58
+ $gap=$seg_length-$s_len;
59
+ if($gap!=0){
60
+ $makeup=substr($chr,0,$gap);
61
+ $seg.=$makeup;
62
+ }
63
+
64
+ $id.='.start'.$start_site.'.seg_len'.$seg_length;
65
+
66
+ #get the reads
67
+ $seq1=substr($seg,0,$read_len);
68
+ #$seg=~tr/ATCG/TAGC/ this line can change the orientation of the second read;
69
+ $seq2=substr($seg,-$read_len);
70
+ # sequencing error introducing
71
+ @seq1=split(//,$seq1);
72
+ @seq2=split(//,$seq2);
73
+ @mut1=random_binomial($read_len,1,$seq_error);
74
+ @mut2=random_binomial($read_len,1,$seq_error);
75
+
76
+ for(0..$#mut1){
77
+ $i=$_;
78
+ if($mut1[$i]==1){
79
+ $r=int(rand(4));
80
+ $seq1[$i]=$code{$r};
81
+ }
82
+ if($mut2[$i]==1){
83
+ $r=int(rand(4));
84
+ $seq2[$i]=$code{$r};
85
+ }
86
+ }
87
+ $seq1=join('',@seq1);
88
+ $seq2=join('',@seq2);
89
+
90
+ $id1=$id.'#0/1';
91
+ $id2=$id.'#0/2';
92
+
93
+ print OUT ">$id1\n$seq1\n>$id2\n$seq2\n";
94
+ }
95
+
96
+
@@ -0,0 +1 @@
1
+ ../../enveomics.R
@@ -0,0 +1,24 @@
1
+
2
+ #
3
+ # @author: Luis M. Rodriguez-R
4
+ # @license: artistic license 2.0
5
+ #
6
+
7
+ require "optparse"
8
+ ARGV << "-h" if ARGV.size==0
9
+
10
+ def use(gems, mandatory=true)
11
+ gems = [gems] unless gems.is_a? Array
12
+ begin
13
+ require "rubygems"
14
+ while ! gems.empty?
15
+ require gems.shift
16
+ end
17
+ return true
18
+ rescue LoadError
19
+ abort "\nUnmet requirements, please install required gems:" +
20
+ gems.map{ |gem| "\n gem install #{gem}" }.join + "\n\n" if mandatory
21
+ return false
22
+ end
23
+ end
24
+
@@ -0,0 +1,253 @@
1
+
2
+ #
3
+ # @author: Luis M. Rodriguez-R
4
+ # @update: Jul-14-2015
5
+ # @license: artistic license 2.0
6
+ #
7
+
8
+ module JPlace
9
+ ##### CLASSES:
10
+ # Placement.new(placement[, fields]): Initializes a new read placement.
11
+ # placement: A hash containing the placement.
12
+ # fields: If passed, sets the field order for all subsequent placements.
13
+ class Placement
14
+ attr_writer :flag # This attribute is used by JPlace.distances.rb as a placeholder
15
+ attr_reader :p, :n, :m, :flag
16
+ @@fields = nil
17
+ def self.fields=(fields)
18
+ @@fields=fields
19
+ end
20
+ def self.fields
21
+ @@fields
22
+ end
23
+ def initialize(placement, fields=nil)
24
+ @@fields = fields if @@fields.nil? and not fields.nil?
25
+ # Save only the best (first) placement:
26
+ abort "Placements must contain a 'p' field.\n" if placement["p"].nil?
27
+ abort "Placements must contain a 'p' field with at least one entry.\n" if placement["p"][0].nil?
28
+ @p = [placement["p"][0]]
29
+ # Find name-only placements (EPA-style):
30
+ unless placement["n"].nil?
31
+ @n = placement["n"]
32
+ @m = @n.map{ |n| 1 }
33
+ end
34
+ # Find multiplicity placements (pplacer-style):
35
+ unless placement["nm"].nil?
36
+ @n = placement["nm"].map{ |nm| nm[0] }
37
+ @m = placement["nm"].map{ |nm| nm[1].to_i }
38
+ end
39
+ abort "Placements must contain one of 'n' or 'nm' fields.\n" if @n.nil? or @m.nil?
40
+ end
41
+ def nm
42
+ (0 .. (self.n.length-1)).map{ |i| {:n=>self.n[i], :m=>self.m[i]} }
43
+ end
44
+ def get_field_value(field)
45
+ abort "Impossible to read placement with undefined fields." if @@fields.nil?
46
+ f = @@fields.find_index(field)
47
+ abort "Undefined field #{field}." if f.nil?
48
+ self.p[0][f]
49
+ end
50
+ def set_field_value(field, value)
51
+ f = @@fields.find_index(field)
52
+ abort "Undefined field #{field}." if f.nil?
53
+ self.p[0][f] = value
54
+ end
55
+ def edge_num
56
+ self.get_field_value('edge_num').to_i
57
+ end
58
+ def likelihood
59
+ self.get_field_value('likelihood').to_f
60
+ end
61
+ def like_weight_ratio
62
+ self.get_field_value('like_weight_ratio').to_f
63
+ end
64
+ def distal_length
65
+ (self.get_field_value('distal_length') || 0).to_f
66
+ end
67
+ def pendant_length
68
+ (self.get_field_value('pendant_length') || 0).to_f
69
+ end
70
+ def to_s
71
+ "#<Placement of #{self.n}: #{self.p}>"
72
+ end
73
+ end
74
+
75
+ # Ancilliary class Tree
76
+ class Tree
77
+ @@HAS_ICONV = nil
78
+ def self.has_iconv?
79
+ if @@HAS_ICONV.nil?
80
+ @@HAS_ICONV = true
81
+ begin
82
+ require 'rubygems'
83
+ require 'iconv'
84
+ rescue LoadError
85
+ @@HAS_ICONV = false
86
+ end
87
+ end
88
+ @@HAS_ICONV
89
+ end
90
+ def self.from_nwk(nwk)
91
+ if Tree.has_iconv?
92
+ ic = Iconv.new('UTF-8//IGNORE','UTF-8')
93
+ nwk = ic.iconv(nwk + ' ')[0..-2]
94
+ end
95
+ Node.new(nwk)
96
+ end
97
+ end
98
+
99
+ # Node.new(nwk[, parent]): Initializes a new Node.
100
+ # nwk: Node's description in Newick format.
101
+ # parent: Node's parent, or nil if root node.
102
+ class Node
103
+ # Class
104
+ @@edges = []
105
+ def self.edges
106
+ @@edges
107
+ end
108
+ def self.register(node)
109
+ @@edges[node.index] = node unless node.index.nil?
110
+ end
111
+ # Class-level functions related to JPlace
112
+ def self.link_placement(placement)
113
+ abort "Trying to link placement in undefined edge #{placement.edge_num}: #{placement.to_s}" if @@edges[placement.edge_num].nil?
114
+ @@edges[placement.edge_num].add_placement!(placement)
115
+ end
116
+ def self.unlink_placement(placement)
117
+ @@edges[placement.edge_num].delete_placement!(placement)
118
+ end
119
+ # Instance
120
+ attr_reader :children, :length, :name, :label, :index, :nwk, :parent, :placements, :collapsed
121
+ def initialize(nwk, parent=nil)
122
+ abort "Empty newick.\n" if nwk.nil? or nwk==''
123
+ nwk.gsub! /;(.)/, '--\1'
124
+ @nwk = nwk
125
+ @parent = parent
126
+ @placements = []
127
+ @collapsed = false
128
+ # Find index
129
+ index_m = /^(?<pre>.*){(?<idx>[0-9]+)}(?<post>[^\(\),;]*);?$/.match(nwk)
130
+ if index_m.nil? and parent.nil? and nwk[nwk.length-1]==';'
131
+ @index = nil
132
+ else
133
+ abort "Unindexed edge found:\n#{@nwk}\n" if index_m.nil?
134
+ nwk = index_m[:pre]+index_m[:post]
135
+ @index = index_m[:idx].to_i
136
+ end
137
+ # Find name, label, and length
138
+ meta_m = /^(\((?<cont>.+)\))?(?<name>[^:\(\);]*)(:(?<length>[0-9\.Ee+-]*)(?<label>\[[^\[\]\(\);]+\])?)?;?$/.match(nwk) or
139
+ abort "Cannot parse node metadata (index #{@index}):\n#{@nwk}\n"
140
+ nwk = meta_m[:cont]
141
+ @name = meta_m[:name]
142
+ @length = meta_m[:length]
143
+ @label = meta_m[:label]
144
+ # Find children
145
+ @children = []
146
+ nwk ||= ''
147
+ quote = nil
148
+ while nwk != ''
149
+ i = 0
150
+ j = 0
151
+ nwk.each_char do |chr|
152
+ if quote.nil?
153
+ if chr=='"' or chr=="'"
154
+ quote = chr
155
+ else
156
+ i += 1 if chr=='('
157
+ i -= 1 if chr==')'
158
+ if i==0 and chr==','
159
+ i=nil
160
+ break
161
+ end
162
+ end
163
+ else
164
+ quote = nil if chr==quote
165
+ end
166
+ j += 1
167
+ end
168
+ abort "Unbalanced node at edge {#{@index}}, with leftness #{i}:\n#{@nwk}\n" unless i.nil? or i==0
169
+ @children << Node.new(nwk[0 .. j-1],self)
170
+ nwk = nwk.length==j ? '' : nwk[j+1 .. -1]
171
+ end
172
+ Node.register(self)
173
+ end
174
+ # Accessors/Setters
175
+ def name=(new_name)
176
+ @name = new_name.gsub(/[\s\(\),;:]/, '_')
177
+ end
178
+ # Tree algorithms
179
+ def post_order &blk
180
+ self.children.each { |n| n.post_order &blk }
181
+ blk[self]
182
+ end
183
+ def in_order &blk
184
+ abort "Tree must be dycotomic to traverse in_order, node #{self.cannonical_name} "+
185
+ "has #{self.children.lenght} children." unless [0,2].include? self.children.length
186
+ self.children[0].in_order &blk unless self.children[0].nil?
187
+ blk[self]
188
+ self.children[1].in_order &blk unless self.children[1].nil?
189
+ end
190
+ def pre_order &blk
191
+ blk[self]
192
+ self.children.each { |n| n.pre_order &blk }
193
+ end
194
+ def path_to_root
195
+ if @path_to_root.nil?
196
+ @path_to_root = [self]
197
+ @path_to_root += self.parent.path_to_root unless self.parent.nil?
198
+ end
199
+ @path_to_root
200
+ end
201
+ def distance_to_root
202
+ if @distance_to_root.nil?
203
+ @distance_to_root = path_to_root.map{ |n| n.length.nil? ? 0.0 : n.length.to_f }.reduce(0.0, :+)
204
+ end
205
+ @distance_to_root
206
+ end
207
+ def lca(node)
208
+ p1 = self.path_to_root
209
+ p2 = node.path_to_root
210
+ p1.find{ |n| p2.include? n }
211
+ end
212
+ def distance(node)
213
+ self.distance_to_root + node.distance_to_root - (2.0 * self.lca(node).distance_to_root)
214
+ end
215
+ def ==(node) self.index == node.index ; end
216
+ # Tree representation
217
+ def cannonical_name
218
+ return(self.name) unless self.name.nil? or self.name == ""
219
+ return(self.label) unless self.label.nil? or self.label == ""
220
+ return("{#{self.index.to_s}}") unless self.index.nil?
221
+ ""
222
+ end
223
+ def to_s
224
+ o = ""
225
+ o += "(" + self.children.map{ |c| c.to_s }.join(",") + ")" if self.children.length > 0
226
+ o += self.cannonical_name
227
+ u = "#{self.length.nil? ? "" : self.length}#{self.label.nil? ? "" : self.label}"
228
+ o += ":#{u}" unless u==""
229
+ o
230
+ end
231
+ # Instance-level functions related to JPlace
232
+ def collapse!
233
+ self.pre_order do |n|
234
+ if n!=self
235
+ while n.placements.length > 0
236
+ p = Node.unlink_placement(n.placements[0])
237
+ p.set_field_value('edge_num', self.index)
238
+ Node.link_placement(p)
239
+ end
240
+ end
241
+ end
242
+ @collapsed = true
243
+ end
244
+ def add_placement!(placement)
245
+ @placements << placement
246
+ end
247
+ def delete_placement!(placement)
248
+ @placements.delete(placement)
249
+ end
250
+ end
251
+
252
+ end # module JPlace
253
+