miga-base 0.7.26.0 → 0.7.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +1 -1
  3. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  4. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  6. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  7. data/utils/FastAAI/README.md +84 -0
  8. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  9. data/utils/enveomics/Docs/recplot2.md +244 -0
  10. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  11. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  12. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  13. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  14. data/utils/enveomics/LICENSE.txt +73 -0
  15. data/utils/enveomics/Makefile +52 -0
  16. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  17. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  18. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  19. data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
  20. data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
  21. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  22. data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
  23. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  24. data/utils/enveomics/Manifest/Tasks/other.json +829 -0
  25. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  26. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
  27. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  28. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  29. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  30. data/utils/enveomics/Manifest/categories.json +156 -0
  31. data/utils/enveomics/Manifest/examples.json +154 -0
  32. data/utils/enveomics/Manifest/tasks.json +4 -0
  33. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  34. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  35. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  36. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  37. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  38. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  39. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  40. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  41. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  42. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  43. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  44. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  45. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  46. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  47. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  48. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  49. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  50. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  51. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  52. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  53. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  54. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  55. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  56. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  57. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  58. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  59. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  60. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  61. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  62. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  63. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  64. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  65. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  66. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  67. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  68. data/utils/enveomics/README.md +42 -0
  69. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  70. data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
  71. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  72. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  73. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  74. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  75. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  76. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  77. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  78. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  79. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  80. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  81. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  82. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  83. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  84. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  85. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  86. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  87. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  88. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  89. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  90. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  91. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  92. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  93. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  94. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  95. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  96. data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
  97. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  98. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  99. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  100. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  101. data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
  102. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  103. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  104. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  105. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  106. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  107. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  108. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  109. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  110. data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
  111. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  112. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  113. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  114. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  115. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  116. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  117. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  118. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  119. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  120. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  121. data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
  122. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  123. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  124. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  125. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  126. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  127. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  128. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  129. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  130. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  131. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  132. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  133. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  134. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  135. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  136. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  137. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  138. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  139. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  140. data/utils/enveomics/Scripts/SRA.download.bash +57 -0
  141. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  142. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  143. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  144. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  145. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  146. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  147. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  148. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  149. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  150. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  151. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  152. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  153. data/utils/enveomics/Scripts/aai.rb +418 -0
  154. data/utils/enveomics/Scripts/ani.rb +362 -0
  155. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  156. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  157. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  158. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  159. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  160. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  161. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  162. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  163. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  164. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  165. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  166. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
  167. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  168. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  169. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  170. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  171. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  172. data/utils/enveomics/Scripts/ogs.rb +104 -0
  173. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  174. data/utils/enveomics/Scripts/rbm.rb +146 -0
  175. data/utils/enveomics/Tests/Makefile +10 -0
  176. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  177. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  178. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  179. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  180. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  181. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  182. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  183. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  184. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  185. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  186. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  187. data/utils/enveomics/Tests/alkB.nwk +1 -0
  188. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  189. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  190. data/utils/enveomics/Tests/hiv1.faa +59 -0
  191. data/utils/enveomics/Tests/hiv1.fna +134 -0
  192. data/utils/enveomics/Tests/hiv2.faa +70 -0
  193. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  194. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  195. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  196. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  197. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  198. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  199. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  200. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  201. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  202. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  203. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  204. data/utils/enveomics/build_enveomics_r.bash +45 -0
  205. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  206. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  207. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  208. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  209. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  210. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  211. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  212. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  213. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  214. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  215. data/utils/enveomics/enveomics.R/R/utils.R +50 -0
  216. data/utils/enveomics/enveomics.R/README.md +80 -0
  217. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  218. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  219. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  220. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  221. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  222. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  223. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  224. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  225. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  226. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  227. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  228. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
  229. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
  230. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
  231. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  232. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  233. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
  234. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
  235. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
  236. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
  237. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
  238. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
  239. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  240. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
  242. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  243. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  244. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  245. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  246. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
  247. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  248. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  249. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
  250. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  251. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  252. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  253. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  254. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
  255. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  256. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
  257. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
  258. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
  259. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  260. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  261. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
  262. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
  263. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  264. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  265. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  266. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  267. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
  268. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
  269. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
  270. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
  271. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  272. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  273. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  274. data/utils/enveomics/globals.mk +8 -0
  275. data/utils/enveomics/manifest.json +9 -0
  276. metadata +277 -4
@@ -0,0 +1,171 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # @author Luis M. Rodriguez-R
4
+ # @update Dec-21-2015
5
+ # @license artistic license 2.0
6
+ #
7
+
8
+ $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
9
+ require "enveomics_rb/enveomics"
10
+
11
+ o = {permutations: 1000, bootstraps: 1000, overwrite: false}
12
+ OptionParser.new do |opt|
13
+ opt.banner = "
14
+ Estimates the log2-ratio of different amino acids in homologous sites using
15
+ an AAsubs file (see BlastPairwise.AAsubs.pl). It provides the point
16
+ estimation (.obs file), the bootstrap of the estimation (.boot file) and the
17
+ null model based on label-permutation (.null file).
18
+
19
+ Usage: #{$0} [options]".gsub(/^ +/,"")
20
+ opt.separator ""
21
+ opt.separator "Mandatory"
22
+ opt.on("-i", "--input FILE",
23
+ "Input file in AAsubs format (see BlastPairwise.AAsubs.pl)."
24
+ ){ |v| o[:file] = v}
25
+ opt.separator ""
26
+ opt.separator "Output files"
27
+ opt.on("-O", "--obs-file FILE",
28
+ "Output file with the log2-ratios per amino acid.",
29
+ "By default, '--input value'.obs."
30
+ ){ |v| o[:obs] = v }
31
+ opt.on("-B", "--bootstrap-file FILE",
32
+ "Output file with the bootstrap results of log2-ratios per amino acid.",
33
+ "By default, '--input value'.boot."
34
+ ){ |v| o[:boot] = v }
35
+ opt.on("-N", "--null-file FILE",
36
+ "Output file with the permutation results of log2-ratios per amino acid.",
37
+ "By default, '--input value'.null."
38
+ ){ |v| o[:null] = v }
39
+ opt.on("--overwrite",
40
+ "Overwrite existing files. By default, skip steps if the files already" +
41
+ " exist."){ |v| o[:overwrite] = v }
42
+ opt.separator ""
43
+ opt.separator "Parameters"
44
+ opt.on("-b", "--bootstraps INT",
45
+ "Number of bootstraps to run. By default: #{o[:bootstraps]}."
46
+ ){ |v| o[:bootstraps] = v.to_i }
47
+ opt.on("-p", "--permutations INT",
48
+ "Number of permutations to run. By default: #{o[:permutations]}."
49
+ ){ |v| o[:permutations] = v.to_i }
50
+ opt.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
51
+ opt.on("-h", "--help", "Display this screen.") do
52
+ puts opt
53
+ exit
54
+ end
55
+ opt.separator ""
56
+ end.parse!
57
+
58
+ # Initialize
59
+ abort "--input is mandatory" if o[:file].nil?
60
+ ALPHABET = %w(A C D E F G H I K L M N P Q R S T V W Y X)
61
+ o[:obs] ||= "#{o[:file]}.obs"
62
+ o[:boot] ||= "#{o[:file]}.boot"
63
+ o[:null] ||= "#{o[:file]}.null"
64
+
65
+ # Functions
66
+ def dist_summary(a,b)
67
+ ALPHABET.map do |i|
68
+ Math.log(a[i].reduce(0,:+).to_f/b[i].reduce(0,:+), 10)
69
+ end
70
+ end
71
+ def empty_sample
72
+ Hash[ALPHABET.map{|k| [k, []]}]
73
+ end
74
+
75
+ # Initialize
76
+ $stderr.puts "Initializing." unless o[:q]
77
+ sample_A = empty_sample
78
+ sample_B = empty_sample
79
+ last_label = nil
80
+ prot_index = -1
81
+
82
+ # Read file
83
+ $stderr.puts "Reading input file." unless o[:q]
84
+ ifh = File.open(o[:file], "r")
85
+ ifh.each do |l|
86
+ r = l.chomp.split /\t/
87
+ if r.first != last_label
88
+ prot_index +=1
89
+ last_label = r.first
90
+ ALPHABET.each do |a|
91
+ sample_A[a][prot_index] = 0
92
+ sample_B[a][prot_index] = 0
93
+ end
94
+ end
95
+ [1,2].each do |ds|
96
+ unless %w(- *).include? r[ds]
97
+ abort "Unknown amino acid in line #{$.}: '#{r[ds]}'." unless
98
+ ALPHABET.include? r[ds]
99
+ sample_A[ r[ds] ][ prot_index ] += 1 if ds==1
100
+ sample_B[ r[ds] ][ prot_index ] += 1 if ds==2
101
+ end
102
+ end
103
+ end
104
+ ifh.close
105
+ $stderr.puts " > Found #{prot_index+1} proteins." unless o[:q]
106
+ $stderr.puts " > Saving #{o[:obs]}" unless o[:q]
107
+ sum = dist_summary(sample_A, sample_B)
108
+ File.open(o[:obs], "w") do |fh|
109
+ fh.puts ["AA", "log10_AB"].join("\t")
110
+ ALPHABET.each do |i|
111
+ fh.puts [i, sum.shift].join("\t")
112
+ end
113
+ end
114
+
115
+ # Permutations
116
+ if File.size? o[:null] and not o[:overwrite]
117
+ $stderr.puts "Skipping permutations." unless o[:q]
118
+ else
119
+ $stderr.puts "Permutating." unless o[:q]
120
+ permut_sum = []
121
+ o[:permutations].times do |i|
122
+ permut_A = empty_sample
123
+ permut_B = empty_sample
124
+ (0 .. prot_index).each do |j|
125
+ # Copy counts of the protein
126
+ ALPHABET.each do |k|
127
+ permut_A[k][j] = sample_A[k][j]
128
+ permut_B[k][j] = sample_B[k][j]
129
+ end
130
+ # Swap labels at random
131
+ permut_A,permut_B = permut_B,permut_A if rand(2)==1
132
+ end
133
+ permut_sum << dist_summary(permut_A, permut_B)
134
+ end
135
+ $stderr.puts " > Performed #{o[:permutations]} permutations." unless o[:q]
136
+ $stderr.puts " > Saving #{o[:null]}" unless o[:q]
137
+ File.open(o[:null], "w") do |fh|
138
+ fh.puts ALPHABET.join("\t")
139
+ permut_sum.each{ |s| fh.puts s.join("\t") }
140
+ end
141
+ end
142
+
143
+ # Bootstraps
144
+ if File.size? o[:boot] and not o[:overwrite]
145
+ $stderr.puts "Skipping bootstraps." unless o[:q]
146
+ else
147
+ $stderr.puts "Bootstrapping." unless o[:q]
148
+ boot_sum = []
149
+ o[:bootstraps].times do |i|
150
+ boot_A = empty_sample
151
+ boot_B = empty_sample
152
+ (0 .. prot_index).each do |j|
153
+ # Sample randomly with replacement
154
+ jr = rand(prot_index+1)
155
+ # Copy counts of the protein
156
+ ALPHABET.each do |k|
157
+ boot_A[k][j] = sample_A[k][jr]
158
+ boot_B[k][j] = sample_B[k][jr]
159
+ end
160
+ end
161
+ boot_sum << dist_summary(boot_A, boot_B)
162
+ end
163
+ $stderr.puts " > Performed #{o[:bootstraps]} bootstraps." unless o[:q]
164
+ $stderr.puts " > Saving #{o[:boot]}" unless o[:q]
165
+ File.open(o[:boot], "w") do |fh|
166
+ fh.puts ALPHABET.join("\t")
167
+ boot_sum.each{ |s| fh.puts s.join("\t") }
168
+ end
169
+ end
170
+
171
+ $stderr.puts "Done. Yayyy!" unless o[:q]
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # @author Luis M. Rodriguez-R
4
+ # @update Nov-30-2015
5
+ # @license artistic license 2.0
6
+ #
7
+
8
+ $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
9
+ require "enveomics_rb/enveomics"
10
+
11
+ o = {:q=>false, :missing=>"-", :model=>"AUTO", :removeinvar=>false,
12
+ :undefined=>"-.Xx?"}
13
+ OptionParser.new do |opt|
14
+ opt.banner = "
15
+ Concatenates several multiple alignments in FastA format into a single
16
+ multiple alignment. The IDs of the sequences (or the ID prefixes, if using
17
+ --ignore-after) must coincide across files.
18
+
19
+ Usage: #{$0} [options] aln1.fa aln2.fa ... > aln.fa".gsub(/^ +/,"")
20
+ opt.separator ""
21
+ opt.on("-c", "--coords FILE",
22
+ "Output file of coordinates in RAxML-compliant format."
23
+ ){ |v| o[:coords]=v }
24
+ opt.on("-i", "--ignore-after STRING",
25
+ "Remove everything in the IDs after the specified string."
26
+ ){ |v| o[:ignoreafter]=v }
27
+ opt.on("-I", "--remove-invariable", "Remove invariable sites.",
28
+ "Note: Invariable sites are defined as columns with only one state and",
29
+ "undefined characters. Additional ambiguous characters may exist and",
30
+ "should be declared using --undefined."){ |v| o[:removeinvar]=v }
31
+ opt.on("-u", "--missing-char CHAR",
32
+ "Character denoting missing data. By default: '#{o[:missing]}'.") do |v|
33
+ abort "Missing positions can only be denoted by single characters, " +
34
+ "offending value: '#{v}'." if v.length != 1
35
+ o[:missing]=v
36
+ end
37
+ opt.on("-m", "--model STRING",
38
+ "Name of the model to use if --coords is used. See RAxML's docs; ",
39
+ "supported values in v8+ include:",
40
+ "o For DNA alignments:",
41
+ " 'DNA[F|X]', or 'DNA[F|X]/3' (to estimate rates per codon position,",
42
+ " particular notation for this script).",
43
+ "o General protein alignments:",
44
+ " 'AUTO' (default in this script), 'DAYHOFF' (1978), 'DCMUT' (MBE 2005;",
45
+ " 22(2):193-199), 'JTT' (Nat 1992;358:86-89), 'VT' (JCompBiol 2000;",
46
+ " 7(6):761-776), 'BLOSUM62' (PNAS 1992;89:10915), and 'LG' (MBE 2008;",
47
+ " 25(7):1307-1320).",
48
+ "o Specialized protein alignments:",
49
+ " 'MTREV' (mitochondrial, JME 1996;42(4):459-468), 'WAG' (globular, MBE",
50
+ " 2001;18(5):691-699), 'RTREV' (retrovirus, JME 2002;55(1):65-73), ",
51
+ " 'CPREV' (chloroplast, JME 2000;50(4):348-358), and 'MTMAM' (nuclear",
52
+ " mammal proteins, JME 1998;46(4):409-418)."){|v| o[:model]=v}
53
+ opt.on("--undefined STRING",
54
+ "All characters to be regarded as 'undefined'. It should include all",
55
+ "ambiguous and missing data chars. Ignored unless --remove-invariable.",
56
+ "By default: '#{o[:undefined]}'."){|v| o[:undefined]=v}
57
+ opt.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
58
+ opt.on("-h", "--help", "Display this screen.") do
59
+ puts opt
60
+ exit
61
+ end
62
+ opt.separator ""
63
+ end.parse!
64
+ alns = ARGV
65
+ abort "Alignment files are mandatory" if alns.nil? or alns.empty?
66
+
67
+ ##### MAIN:
68
+ begin
69
+ $stderr.puts "Reading." unless o[:q]
70
+ a = {}
71
+ n = alns.size-1
72
+ lengths = []
73
+ (0 .. n).each do |i|
74
+ key = nil
75
+ File.open(alns[i],"r").each do |ln|
76
+ ln.chomp!
77
+ if ln =~ /^>(\S+)/
78
+ key = $1
79
+ key.sub!(/#{o[:ignoreafter]}.*/,"") unless o[:ignoreafter].nil?
80
+ a[key] ||= []
81
+ a[key][i] = ""
82
+ else
83
+ abort "#{alns[i]}: Leading line is not a def-line, is this a "+
84
+ "valid FastA file?" if key.nil?
85
+ ln.gsub!(/\s/,"")
86
+ a[key][i] += ln
87
+ end
88
+ end
89
+ abort "#{alns[i]}: Empty alignment?" if key.nil?
90
+ lengths[i] = a[key][i].length
91
+ end
92
+ if o[:removeinvar]
93
+ $stderr.puts "Removing invariable sites." unless o[:q]
94
+ invs = 0
95
+ (0 .. n).each do |i|
96
+ olen = lengths[i]
97
+ (0 .. (lengths[i]-1)).each do |pos|
98
+ chr = nil
99
+ inv = true
100
+ a.keys.each do |key|
101
+ next if a[key][i].nil?
102
+ chr = a[key][i][pos] if
103
+ chr.nil? or o[:undefined].chars.include? chr
104
+ if chr != a[key][i][pos] and
105
+ not o[:undefined].chars.include? a[key][i][pos]
106
+ inv = false
107
+ break
108
+ end
109
+ end
110
+ if inv
111
+ a.keys.each{|key| a[key][i][pos]="!" unless a[key][i].nil?}
112
+ lengths[i] -= 1
113
+ invs += 1
114
+ end
115
+ end
116
+ a.keys.each{|key| a[key][i].gsub!("!", "") unless a[key][i].nil?}
117
+ end
118
+ $stderr.puts " Removed #{invs} sites." unless o[:q]
119
+ end
120
+ $stderr.puts "Concatenating." unless o[:q]
121
+ a.keys.each do |key|
122
+ (0 .. n).each do |i|
123
+ a[key][i] = (o[:missing] * lengths[i]) if a[key][i].nil?
124
+ end
125
+ abort "Inconsistent lengths in '#{key}'
126
+ exp:#{lengths.join(" ")}
127
+ obs:#{a[key].map{|i| i.length}.join(" ")}." unless
128
+ lengths == a[key].map{|i| i.length}
129
+ puts ">#{key}", a[key].join("").gsub(/(.{1,60})/, "\\1\n")
130
+ a.delete(key)
131
+ end
132
+ $stderr.puts " #{lengths.inject(:+)} columns." unless o[:q]
133
+ unless o[:coords].nil?
134
+ $stderr.puts "Generating coordinates." unless o[:q]
135
+ coords = File.open(o[:coords],"w")
136
+ s = 0
137
+ names = (alns.map do |a|
138
+ File.basename(a).gsub(/\..*/,"").gsub(/[^A-Za-z0-9_]/,"_")
139
+ end)
140
+ (0 .. n).each do |i|
141
+ l = lengths[i]
142
+ next unless l > 0
143
+ names[i] += "_#{i}" while names.count(names[i])>1
144
+ if o[:model] =~ /(DNA.?)\/3/
145
+ coords.puts "#{$1}, #{names[i]}codon1 = #{s+1}-#{s+l}\\3"
146
+ coords.puts "#{$1}, #{names[i]}codon2 = #{s+2}-#{s+l}\\3"
147
+ coords.puts "#{$1}, #{names[i]}codon3 = #{s+3}-#{s+l}\\3"
148
+ else
149
+ coords.puts "#{o[:model]}, #{names[i]} = #{s+1}-#{s+l}"
150
+ end
151
+ s += l
152
+ end
153
+ coords.close
154
+ end
155
+ # Save the output matrix
156
+ $stderr.puts "Done.\n" unless o[:q]
157
+ rescue => err
158
+ $stderr.puts "Exception: #{err}\n\n"
159
+ err.backtrace.each { |l| $stderr.puts l + "\n" }
160
+ err
161
+ end
162
+
163
+
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env perl
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R
5
+ # @update: Mar-23-2015
6
+ # @license: artistic license 2.0
7
+ #
8
+
9
+ use Bio::AlignIO;
10
+
11
+ my($iformat,$oformat) = @ARGV;
12
+ ($iformat and $oformat) or die "
13
+ Usage:
14
+ $0 in-format out-format < in_file > output_file
15
+
16
+ in-format Input file's format.
17
+ out-format Output file's format.
18
+ in_file Input file.
19
+ out_file Output file.
20
+
21
+ Example:
22
+ # Re-format example.fa into Stockholm
23
+ $0 fasta stockholm < example.fa > example.stk
24
+
25
+ Supported formats are:
26
+ bl2seq, clustalw, emboss, fasta, maf, mase, mega,
27
+ meme, metafasta, msf, nexus, pfam, phylip, po,
28
+ prodom, psi, selex, stockholm, XMFA, arp
29
+
30
+ ";
31
+
32
+ $in = Bio::AlignIO->new(-fh => \*STDIN, -format => $iformat);
33
+ $out = Bio::AlignIO->new(-fh => \*STDOUT, -format => $oformat);
34
+ while ( my $aln = $in->next_aln ) { $out->write_aln($aln) }
35
+
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env perl
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
+ # @update: Nov-29-2015
6
+ # @license: artistic license 2.0
7
+ #
8
+
9
+ use warnings;
10
+ use strict;
11
+ use Getopt::Std;
12
+ use List::Util qw/min max sum/;
13
+
14
+ sub VERSION_MESSAGE(){print "Alpha-diversity indices (enveomics)\n"}
15
+ sub HELP_MESSAGE(){die "
16
+ Description:
17
+ Takes a table of OTU abundance in one or more samples and calculates the Rao
18
+ (Q_alpha), Rao-Jost (Q_alpha_eqv), Shannon (Hprime), and inverse Simpson
19
+ (1_lambda) indices of alpha diversity for each sample.
20
+
21
+ To use it with Qiime OTU Tables, run it as:
22
+ $0 -i OTU_Table.txt -h
23
+
24
+ Usage:
25
+ $0 [opts]
26
+
27
+ -i <str> * Input table (columns:samples, rows:OTUs, first column:OTU
28
+ names).
29
+ -r <int> Number of rows to ignore. By default: 0.
30
+ -c <int> Number of columns to ignore after the first column (i.e.,
31
+ between the first column, containing the name of the categories,
32
+ and the first column of abundance values). By default: 0.
33
+ -C <int> Number of columns to ignore at the end of each line.
34
+ By default: 0.
35
+ -d <str> Delimiter. Supported escaped characters are: \"\\t\"
36
+ (tabulation), and \"\\0\" (null bit). By default: \"\\t\".
37
+ -h If set, the first row is assumed to have the names of the
38
+ samples.
39
+ -D <str> Distances file. A squared matrix (or bottom-left half matrix)
40
+ with the distances between categories (OTUs or functions). The
41
+ first column must contain the names of the categories, and it
42
+ shouldn't have headers. If not set, all distances are assumed
43
+ to be one. Only used for Rao.
44
+ -R Do not calculate Rao indices. This significantly decreases the
45
+ total running time. Note that Rao indices are highly susceptible
46
+ to precision errors, and shouldn't be trusted for very big
47
+ numbers.
48
+ -q <int> Estimate the qD index (true diversity order q). By default: 0.
49
+ --help This help message.
50
+
51
+ * Mandatory.
52
+
53
+ "}
54
+
55
+ # Input arguments
56
+ my %o;
57
+ getopts('i:c:C:d:r:hD:Rq:', \%o);
58
+
59
+ #$o{B} and (eval("use bignum; 1") or die "Cannot use bignum.\n");
60
+ &HELP_MESSAGE() unless $o{i};
61
+ $o{c} ||= 0;
62
+ $o{C} ||= 0;
63
+ $o{r} ||= 0;
64
+ $o{d} ||= "\\t";
65
+ $o{q} ||= 0;
66
+
67
+ $o{d}="\t" if $o{d} eq "\\t";
68
+ $o{d}="\0" if $o{d} eq "\\0";
69
+
70
+ # Distance matrix
71
+ my $D = {};
72
+ if($o{D} and not $o{R}){
73
+ my @Didx = ();
74
+ open DIST, "<", $o{D} or die "Cannot read file: $o{D}: $!\n";
75
+ while(<DIST>){
76
+ chomp;
77
+ my @d = split /\t/;
78
+ my $idx = shift @d;
79
+ push @Didx, $idx;
80
+ $D->{ $idx } ||= {};
81
+ $D->{ $idx }->{ $Didx[$_] } = $d[$_] for(0 .. $#d);
82
+ }
83
+ close DIST;
84
+ undef @Didx;
85
+ }
86
+
87
+ # Abundance matrix
88
+ my @names = ();
89
+ my @cats = ();
90
+ my @values = ();
91
+ open TABLE, "<", $o{i} or die "Cannot open file: ".$o{i}.": $!\n";
92
+ <TABLE> for (1 .. $o{r});
93
+ if($o{h}){
94
+ my $h = <TABLE>;
95
+ $h or die "Empty table!\n";
96
+ chomp $h;
97
+ @names = split $o{d}, $h;
98
+ shift @names for (0 .. $o{c});
99
+ }
100
+
101
+ while(<TABLE>){
102
+ chomp;
103
+ my @ln = split $o{d};
104
+ push @cats, shift(@ln);
105
+ shift @ln for (1 .. $o{c});
106
+ pop @ln for (1 .. $o{C});
107
+ push @{$values[$_] ||= []}, $ln[$_] for (0 .. $#ln);
108
+ push @{$values[$#ln+1]}, sum(@ln);
109
+ }
110
+ close TABLE;
111
+ $names[$#values] = "gamma";
112
+
113
+ if($o{R}){
114
+ print "".join($o{d}, qw/Sample Hprime 1_lambda qD/)."\n";
115
+ }else{
116
+ print "".join($o{d}, qw/Sample Q_alpha Q_alpha_eqv Hprime 1_lambda qD/)."\n";
117
+ }
118
+ for my $i (0 .. $#values){
119
+ print "".(exists $names[$i] ? $names[$i] : $i).$o{d};
120
+ my $N = sum @{$values[$i]};
121
+ my $Q = 0;
122
+ my $H = 0;
123
+ my $l = 0;
124
+ my $qD = 0 unless $o{q}==1;
125
+ for my $ik (0 .. $#{$values[$i]}){
126
+ unless($o{R}){
127
+ my $Qi = 0;
128
+ for my $jk (0 .. $#{$values[$i]}){
129
+ my $dij = (!$o{D}) ? 1 :
130
+ exists $D->{ $cats[$ik] }->{ $cats[$jk] } ?
131
+ $D->{ $cats[$ik] }->{ $cats[$jk] } :
132
+ exists $D->{ $cats[$jk] }->{ $cats[$ik] } ?
133
+ $D->{ $cats[$jk] }->{ $cats[$ik] } :
134
+ die "Cannot find distance between ".$cats[$ik].
135
+ " and ".$cats[$jk].".\n";
136
+ $Qi += $dij * ($values[$i]->[$ik]/$N) * ($values[$i]->[$jk]/$N);
137
+ }
138
+ $Q += $Qi;
139
+ }
140
+ my $pi = $N ? $values[$i]->[$ik]/$N : 0;
141
+ $H -= $pi * log($pi) if $pi;
142
+ $l += $pi**2;
143
+ $qD += $pi * ($pi**($o{q}-1)) unless $o{q}==1 or not $pi;
144
+ }
145
+ $qD = $o{q}==1 ? exp($H) : 1/($qD**(1/($o{q}-1)));
146
+ if($o{R}){
147
+ print "".join($o{d}, $H, $l ? 1/$l : "Inf", $qD)."\n";
148
+ }else{
149
+ print "".join($o{d}, $Q, ($Q==1 ? "NA" : 1/(1-$Q)), $H, 1/$l, $qD)."\n";
150
+ }
151
+ }
152
+