miga-base 0.7.25.2 → 0.7.25.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (277) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +2 -2
  3. data/utils/distance/runner.rb +2 -1
  4. metadata +5 -278
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  6. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  7. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  8. data/utils/FastAAI/FastAAI/FastAAI +0 -1336
  9. data/utils/FastAAI/README.md +0 -84
  10. data/utils/FastAAI/kAAI_v1.0_virus.py +0 -1296
  11. data/utils/enveomics/Docs/recplot2.md +0 -244
  12. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  13. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  14. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  15. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  16. data/utils/enveomics/LICENSE.txt +0 -73
  17. data/utils/enveomics/Makefile +0 -52
  18. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  19. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -786
  20. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  21. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -766
  22. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -243
  23. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  24. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -67
  25. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  26. data/utils/enveomics/Manifest/Tasks/other.json +0 -829
  27. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  28. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -501
  29. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  30. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  31. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  32. data/utils/enveomics/Manifest/categories.json +0 -156
  33. data/utils/enveomics/Manifest/examples.json +0 -154
  34. data/utils/enveomics/Manifest/tasks.json +0 -4
  35. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  36. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  37. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  38. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  39. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  40. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  41. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  42. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  43. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  44. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  45. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  46. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  47. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  48. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  49. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  50. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  51. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  52. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  53. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  54. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  55. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  56. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  57. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  58. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  59. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  60. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  61. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  62. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  63. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  64. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  65. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  66. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  67. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  68. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  69. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  70. data/utils/enveomics/README.md +0 -42
  71. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  72. data/utils/enveomics/Scripts/Aln.cat.rb +0 -163
  73. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  74. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  75. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  76. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  77. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  78. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  79. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  80. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  81. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  82. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  83. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  84. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  85. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  86. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  87. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  88. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  89. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  90. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  91. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  92. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  93. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  94. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -101
  95. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  96. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  97. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  98. data/utils/enveomics/Scripts/FastA.N50.pl +0 -56
  99. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  100. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  101. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  102. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  103. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -92
  104. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  105. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  106. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  107. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  108. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  109. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  110. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  111. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  112. data/utils/enveomics/Scripts/FastA.sample.rb +0 -83
  113. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  114. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  115. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  116. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  117. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  118. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  119. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  120. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  121. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  122. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  123. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -63
  124. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  125. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  126. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  127. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  128. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  129. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  130. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  131. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  132. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  133. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  134. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  135. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  136. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  137. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  138. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  139. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  140. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  141. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  142. data/utils/enveomics/Scripts/SRA.download.bash +0 -57
  143. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  144. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  145. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  146. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  147. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  148. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  149. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  150. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  151. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  152. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  153. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  154. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  155. data/utils/enveomics/Scripts/aai.rb +0 -418
  156. data/utils/enveomics/Scripts/ani.rb +0 -362
  157. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  158. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  159. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  160. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  161. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  162. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  163. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  164. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  165. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  166. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  167. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  168. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
  169. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  170. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  171. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  172. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  173. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  174. data/utils/enveomics/Scripts/ogs.rb +0 -104
  175. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  176. data/utils/enveomics/Scripts/rbm.rb +0 -146
  177. data/utils/enveomics/Tests/Makefile +0 -10
  178. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  179. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  180. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  181. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  182. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  183. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  184. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  185. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  186. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  187. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  188. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  189. data/utils/enveomics/Tests/alkB.nwk +0 -1
  190. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  191. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  192. data/utils/enveomics/Tests/hiv1.faa +0 -59
  193. data/utils/enveomics/Tests/hiv1.fna +0 -134
  194. data/utils/enveomics/Tests/hiv2.faa +0 -70
  195. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  196. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  197. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  198. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  199. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  200. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  201. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  202. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  203. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  204. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  205. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  206. data/utils/enveomics/build_enveomics_r.bash +0 -45
  207. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  208. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  209. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  210. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  211. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  212. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  213. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  214. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  215. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  216. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  217. data/utils/enveomics/enveomics.R/R/utils.R +0 -50
  218. data/utils/enveomics/enveomics.R/README.md +0 -80
  219. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  220. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  221. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -17
  222. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -17
  223. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -17
  224. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  225. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  226. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  227. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  228. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  229. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  230. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -32
  231. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -91
  232. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -57
  233. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  234. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  235. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -39
  236. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -38
  237. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -40
  238. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -67
  239. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -37
  240. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -122
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  242. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  243. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -68
  244. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  245. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  246. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  247. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  248. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -41
  249. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  250. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  251. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -40
  252. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  253. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  254. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  255. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  256. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -41
  257. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  258. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -43
  259. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -37
  260. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -74
  261. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  262. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  263. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -32
  264. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -59
  265. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  266. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  267. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  268. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  269. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -63
  270. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -38
  271. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -38
  272. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -111
  273. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  274. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  275. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  276. data/utils/enveomics/globals.mk +0 -8
  277. data/utils/enveomics/manifest.json +0 -9
@@ -1,81 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @update: Feb-06-2015
6
- # @license: artistic license 2.0
7
- #
8
-
9
- require 'optparse'
10
- require 'rubygems'
11
- require 'restclient'
12
- require 'open-uri'
13
- require 'JSON'
14
-
15
- o = {:q=>FALSE, :url=>'http://api.metagenomics.anl.gov/m5nr', :max=>0, :recover=>FALSE}
16
- ARGV << '-h' if ARGV.size==0
17
- OptionParser.new do |opts|
18
- opts.banner = "
19
- Downloads a set of sequences from M5nr with a given functional annotation.
20
-
21
- Usage: #{$0} [options]"
22
- opts.separator ""
23
- opts.separator "Mandatory"
24
- opts.on("-f", "--function STR", "Functional annotation."){ |v| o[:function] = v }
25
- opts.separator ""
26
- opts.separator "Options"
27
- opts.on("-m", "--max INT", "Maximum number of sequences to download. By default: all (0)."){ |v| o[:max] = v.to_i }
28
- opts.on("-r", "--recover", "If set, tries to recover a previous FastA."){ o[:recover]=TRUE }
29
- opts.on("-n", "--url STR", "URL for M5nr API. By default: #{o[:url]}."){ |v| o[:url] = v }
30
- opts.on("-o", "--out FILE", "File containing the sequences. By default: value of -f appended with .fa."){ |v| o[:out] = v }
31
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = TRUE }
32
- opts.on("-h", "--help", "Display this screen") do
33
- puts opts
34
- exit
35
- end
36
- opts.separator ""
37
- end.parse!
38
- abort "-f is mandatory" if o[:function].nil?
39
- o[:out] = "#{o[:function].gsub(/ /,'_')}.fa" if o[:out].nil?
40
- uri_fun = URI::encode(o[:function])
41
-
42
- ignore = {}
43
- if o[:recover] and File.exists? o[:out]
44
- ih = File.open(o[:out], "r")
45
- ih.each_line do |ln|
46
- id = /^>(\S+)\s/.match(ln)
47
- unless id.nil?
48
- ignore[id[1]] = 1
49
- end
50
- end
51
- ih.close
52
- of = File.open(o[:out], "a+")
53
- else
54
- of = File.open(o[:out], "w")
55
- end
56
-
57
- next_url = "#{o[:url]}/function/#{uri_fun}"
58
- i = 0
59
- loop do
60
- $stderr.print "Downloading sequence #{i+1}. \r" unless o[:q]
61
- res_fun = RestClient.get next_url
62
- abort "Unable to reach MG-RAST M5nr API, error code #{res_fun.code}." unless res_fun.code == 200
63
- fun = JSON.parse(res_fun.to_str)
64
- fun["data"].each do |datum|
65
- if ignore["#{datum["source"]}:#{datum["accession"]}"].nil?
66
- res_seq = RestClient.get "#{o[:url]}/md5/#{datum["md5"]}", {:params=>{:sequence=>1}}
67
- abort "Unable to reach MG-RAST M5nr API, error code #{res_seq.code}." unless res_seq.code == 200
68
- seq = JSON.parse(res_seq.to_str)
69
- of.puts ">#{datum["source"]}:#{datum["accession"]} #{datum["function"]} [#{datum["organism"]} taxid:#{datum["ncbi_tax_id"]}]"
70
- of.puts seq["data"]["sequence"].scan(/.{80}|.+/).map{ |x| x.strip }.join($/)
71
- end
72
- i += 1
73
- break if o[:max]>0 and i >= o[:max]
74
- end # |datum|
75
- next_url = fun["next"]
76
- break if next_url.nil? or (o[:max] > 0 and i >= o[:max])
77
- end
78
- of.close
79
-
80
- $stderr.puts "Downloaded #{i} sequences." unless o[:q]
81
-
@@ -1,198 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license artistic license 2.0
5
- # @update Mar-23-2015
6
- #
7
- use warnings;
8
- use strict;
9
- use Symbol;
10
- use Getopt::Std;
11
- use List::Util qw/max/;
12
-
13
- sub HELP_MESSAGE { die "
14
- Usage:
15
- $0 [args]
16
-
17
- Mandatory:
18
- -m <str> MyTaxa output.
19
-
20
- Optional:
21
- -g <str> Genes predicted in the format defined by -f. If not passed, abundance is assumed to be based
22
- on contigs.
23
- -f <str> Format of the predicted genes. One of:
24
- o gff2: GFF v2 as produced by MetaGeneMark.hmm (default).
25
- o gff3: GFF v3, including the field id in the last column (with the Gene ID).
26
- o tab: A tab-delimited file with the gene ID (col #1), the length of the gene in bp (col #2),
27
- and the ID of the corresponding contig (col #3). The length of the gene (col #2) isn't used
28
- (and it can be empty), but the column must exist (i.e., 2 tabs per line) for compatibility
29
- with BlastTab.metaxaPrep.pl
30
- -c <str> Counts file: Sequence IDs (genes if -g is provided, contigs otherwise) and reads per sequence
31
- in a tab-delimited file. If not provided, each sequence counts as 1.
32
- -O <str> Prefix of the output files to be generated. By default, the value of -m.
33
- -I <str> File containing the complete classification of all the contigs identified as Innominate taxa.
34
- By default, this file is not created.
35
- -G <str> File containing the classification of each gene. By default, this file is not created. This
36
- requires -g to be set. Note: This option requires extra RAM.
37
- -K <str> File containing a krona input file. By default, this file is not created.
38
- -k <str> List of ranks to include in the Krona file, delimited by comma. It MUST be decreasing rank.
39
- By default: 'superkingdom,phylum,class,family,genus,species'. This is ignored unless -K also
40
- is passed.
41
- -R <str> List of taxonomic ranks for which individual reports should be generated, delimited by comma.
42
- It MUST be decreasing rank. By default: 'phylum,genus,species'.
43
- -r If set, reports raw counts. Otherwise, reports permil of the rank.
44
- -u Report Unknown taxa.
45
- -q Run quietly.
46
- -h Display this help message and exits.
47
-
48
- " }
49
-
50
- my %o;
51
- getopts('g:f:c:m:O:I:G:K:k:R:ruqh', \%o);
52
- $o{h} and &HELP_MESSAGE;
53
- $o{m} or &HELP_MESSAGE;
54
- $o{O} ||= $o{m};
55
- $o{f} ||= "gff2";
56
- $o{k} ||= "superkingdom,phylum,class,family,genus,species";
57
- my @K = split /,/, lc $o{k};
58
- $o{R} ||= "phylum,genus,species";
59
- my @R = split /,/, lc $o{R};
60
- ($o{G} and not $o{g}) and die "-G requires -g to be set.\n";
61
-
62
-
63
- my %gene;
64
- my %count;
65
- my %ctg=();
66
- if($o{g}){
67
- print STDERR "Reading genes collection.\n" unless $o{q};
68
- open GFF, "<", $o{g} or die "Cannot read file: $o{g}: $!\n";
69
- while(<GFF>){
70
- next if /^#/;
71
- next if /^\s*$/;
72
- chomp;
73
- my($id,$ctg);
74
- my @ln = split /\t/;
75
- if($o{f} eq 'gff2'){
76
- exists $ln[8] or die "Cannot parse line $., expecting 9 columns: $_\n";
77
- $id = $ln[8];
78
- $id =~ s/gene_id /gene_id_/;
79
- $ctg=$ln[0];
80
- }elsif($o{f} eq 'gff3'){
81
- exists $ln[8] or die "Cannot parse line $., expecting 9 columns: $_\n";
82
- $ln[8] =~ /id=([^;]+)/ or die "Cannot parse line $.: $_\n";
83
- $id = $1;
84
- $ctg = $ln[0];
85
- }elsif($o{f} eq 'tab'){
86
- exists $ln[2] or die "Cannot parse line $., expecting 3 columns: $_\n";
87
- $id = $ln[0];
88
- $ctg = $ln[2];
89
- }else{
90
- die "Unsupported format: ".$o{f}.".\n";
91
- }
92
- $ctg =~ s/ .*//;
93
- if($o{c}){
94
- $gene{$id} = $ctg;
95
- }else{
96
- $count{$ctg}++;
97
- }
98
- push( @{$ctg{$ctg}||=[]}, $id ) if $o{G};
99
- }
100
- close GFF;
101
- print STDERR " Found ".(scalar(keys %gene))." genes.\n" unless $o{q};
102
- }
103
-
104
- my $Nreads = 0;
105
- if($o{c}){
106
- print STDERR "Reading read-counts.\n" unless $o{q};
107
- open COUNT, "<", $o{c} or die "Cannot read file: $o{c}: $!\n";
108
- while(<COUNT>){
109
- chomp;
110
- my @l = split /\t/;
111
- if($o{g}){
112
- exists $gene{$l[0]} or die "Cannot find gene's contig: $l[0].\n";
113
- $count{ $gene{$l[0]} } += $l[1];
114
- delete $gene{$l[0]};
115
- }else{
116
- $count{ $l[0] } += $l[1];
117
- }
118
- $Nreads += $l[1];
119
- }
120
- close COUNT;
121
- print STDERR " Found ".scalar(keys %gene)." genes without reads.\n" if scalar(keys %gene) and not $o{q};
122
- $count{$_}+=0 for values %gene;
123
- print STDERR " Found ".scalar(keys %count)." sequences and $Nreads reads.\n" unless $o{q};
124
- }
125
-
126
- print STDERR "Reading Metaxa results.\n";
127
- open METAXA, "<", $o{m} or die "Cannot read file: $o{m}: $!\n";
128
- my $ctg;
129
- my $rank;
130
- my @ofh = ();
131
- my @n = (0,0,0);
132
- my @out = ({},{},{});
133
- my @rank_name = map { ucfirst } ('unknown', @R);
134
- my %rank = map { ($rank_name[$_]=>$_) } 0 .. $#rank_name;
135
- my @rank_tag = ("NA", map { "<$_>" } @R);
136
- $o{I} and (open OUT_I, ">", $o{I} or die "Cannot create file: $o{I}: $!\n");
137
- $o{K} and (open OUT_K, ">", $o{K} or die "Cannot create file: $o{K}: $!\n");
138
- $o{G} and (open OUT_G, ">", $o{G} or die "Cannot create file: $o{G}: $!\n");
139
-
140
- my $Nreads_class = 0;
141
- my $Nno_read_ctg = 0;
142
- while(not eof(METAXA)){
143
- my @h=split /\t/, <METAXA>;
144
- my $t=<METAXA>; chomp $t;
145
- exists $h[3] or die "Cannot parse MyTaxa file, line $.: $_\n";
146
- my $count_h;
147
- if($o{c} or $o{g}){
148
- unless(exists $count{$h[0]}){
149
- $Nno_read_ctg++;
150
- next;
151
- }
152
- $count_h = $count{$h[0]};
153
- }else{
154
- $count_h = 1;
155
- }
156
- if($o{G}){ print OUT_G "$_\t$t\n" for @{$ctg{$h[0]}} }
157
- next unless $count_h;
158
- my $last = 'organism';
159
- $n[0] += $count_h;
160
- for my $r (1 .. max(values %rank)){
161
- if($rank{$h[1]} >= $r){
162
- if($t =~ m/$rank_tag[$r]([^;]*)/){
163
- $last = $1 if $1;
164
- }else{
165
- $last = $last=~/^Innominate / ? $last : "Innominate $last";
166
- $o{I} and print OUT_I "$h[0]\t$rank_name[$r]\t$last\t$t\n";
167
- }
168
- $out[$r]->{$last} += $count_h;
169
- $n[$r] += $count_h;
170
- }else{
171
- $out[$r]->{"Unknown $last"} += $count_h if $o{u};
172
- }
173
- }
174
- if($o{K}){
175
- my $ln = $count_h;
176
- for my $r (@K){ $ln.= "\t".($t=~m/<$r>([^;]+)/?$1:'') }
177
- print OUT_K "$ln\n";
178
- }
179
- $Nreads_class+= $count_h;
180
- }
181
- print OUT_K "".($Nreads-$Nreads_class)."\n" if $o{K} and $Nreads>$Nreads_class;
182
- close METAXA;
183
- $o{I} and close OUT_I;
184
- $o{K} and close OUT_K;
185
- $o{G} and close OUT_G;
186
- print " Found $n[0] reads.\n" unless $o{q};
187
- print " Couldn't find counts for $Nno_read_ctg contigs.\n" if $Nno_read_ctg;
188
- unless($o{q}){ print " Found $n[$_] classified reads at ".$rank_name[$_]." level.\n" for (1 .. max(values %rank)) }
189
-
190
- print STDERR "Generating output.\n" unless $o{q};
191
- for my $rank (1 .. max(values %rank)){
192
- open OUT, ">", "$o{O}.".$rank_name[$rank].".txt" or die "Cannot create file: $o{O}.".$rank_name[$rank].".txt: $!\n";
193
- for my $class (keys %{$out[$rank]}){
194
- printf OUT "%s\t%.20f\n", $class, ($out[$rank]->{$class}*($o{r}?1:1000/$n[$rank]));
195
- }
196
- close OUT;
197
- }
198
-
@@ -1,35 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update: Mar-23-2015
5
- # @license: artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
-
11
- my($file,$tax,$rank) = @ARGV;
12
- ($file and $tax) or die "
13
- .Usage:
14
- $0 file.txt taxon[ rank]
15
-
16
- file.txt MyTaxa output.
17
- taxon Taxon to look for.
18
- rank Rank of taxon (optional). By default: any rank.
19
-
20
- ";
21
- $rank ||= ".*";
22
- $rank = lc $rank;
23
-
24
- open MT, "<", $file or die "Cannot read file: $file: $!\n";
25
- my $last = '';
26
- while(my $ln=<MT>){
27
- chomp $ln;
28
- if($ln =~ /<$rank>$tax(;|$)/){
29
- $last =~ s/\t.*//;
30
- print $last, "\n";
31
- }
32
- $last = $ln;
33
- }
34
- close MT;
35
-
@@ -1,49 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @update: Feb-06-2015
6
- # @license artistic license 2.0
7
- #
8
-
9
- require 'optparse'
10
-
11
- opts = {:rank=>'genus', :quiet=>FALSE}
12
- ARGV << '-h' if ARGV.size==0
13
- OptionParser.new do |opt|
14
- opt.separator "Generates a simple tabular file with the classification of each sequence at a given taxonomic rank from a MyTaxa output."
15
- opt.separator ""
16
- opt.on("-i", "--mytaxa FILE", "Input MyTaxa file."){ |v| opts[:mytaxa]=v }
17
- opt.on("-r", "--rank STR", "Taxonomic rank. By default: #{opts[:rank]}."){ |v| opts[:rank] = v.downcase }
18
- opt.on("-q","--quiet","Run quietly.") { opts[:quiet]=TRUE }
19
- opt.on("-h","--help","Display this screen.") do
20
- puts opt
21
- exit
22
- end
23
- opt.separator ""
24
- end.parse!
25
- abort "-i/--mytaxa is mandatory." if opts[:mytaxa].nil?
26
- abort "-i/--mytaxa must exist." unless File.exists? opts[:mytaxa]
27
-
28
- begin
29
- f = File.open(opts[:mytaxa], "r")
30
- ctg = nil;
31
- while(ln = f.gets)
32
- m = /^(.+)(\t.+){3}/.match(ln)
33
- if m
34
- raise "Couldn't find classification for contig #{ctg}" unless ctg.nil?
35
- ctg = m[1]
36
- else
37
- raise "Couldn't find the contig name at line #{$.}" if ctg.nil?
38
- m = /<#{opts[:rank]}>([^;]+)/.match(ln)
39
- puts "#{ctg}\t#{m ? m[1] : "Unclassified"}"
40
- ctg = nil
41
- end
42
- end
43
- f.close
44
- rescue => err
45
- $stderr.puts "Exception: #{err}\n\n"
46
- err.backtrace.each { |l| $stderr.puts l + "\n" }
47
- err
48
- end
49
-
@@ -1,92 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @license artistic license 2.0
6
- #
7
-
8
- $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
9
- require "enveomics_rb/remote_data"
10
- use "nokogiri"
11
-
12
- #================================[ Options parsing ]
13
- o = {
14
- :q=>false, :accs=>[], :dbfrom=>"nuccore", :header=>true,
15
- :no_nil=>false, :ret=>"ScientificName",
16
- :ranks=>%w(superkingdom phylum class order family genus species)}
17
- OptionParser.new do |opt|
18
- opt.banner = "
19
- Maps a list of NCBI accessions to their corresponding taxonomy using the NCBI
20
- EUtilities. Avoid using this script on millions of entries at a time, since
21
- each entry elicits two requests to NCBI's servers.
22
-
23
- Usage: #{$0} [options]".gsub(/^ +/,"")
24
- opt.separator ""
25
- opt.on("-a", "--acc acc1,acc2.ver,...", Array,
26
- "Comma-separated list of accessions. Required unless -i is passed."
27
- ){ |v| o[:accs]=v }
28
- opt.on("-i", "--infile FILE",
29
- "Raw text file containing the list of accessions, one per line.",
30
- "Required unless -g is passed."){ |v| o[:infile]=v }
31
- opt.on("-p", "--protein",
32
- "Use if the accessions are proteins. Otherwise, accessions are assumed " +
33
- "to be from the Nuccore Database."){ o[:dbfrom]="protein" }
34
- opt.on("-r", "--ranks RANK1,RANK2,...", Array,
35
- "Taxonomic ranks to report. By default: #{o[:ranks].join(",")}."
36
- ){ |v| o[:ranks]=v }
37
- opt.on("-n", "--noheader",
38
- "Do not include a header in the output."){ o[:header]=false }
39
- opt.on("-t", "--taxids",
40
- "Return Taxonomy IDs instead of scientific names."){ o[:ret]="TaxId" }
41
- opt.on("--ignore-missing",
42
- "Does not report missing accessions in the output file.",
43
- "By default, it reports accessions and empty values for all other columns."
44
- ){ |v| o[:no_nil]=v }
45
- opt.on("-q", "--quiet", "Run quietly."){ |v| o[:q]=true }
46
- opt.on("-h", "--help","Display this screen") do
47
- puts opt
48
- exit
49
- end
50
- opt.separator ""
51
- end.parse!
52
-
53
- #================================[ Functions ]
54
- def acc2taxid(db, acc)
55
- doc = Nokogiri::XML( RemoteData.elink({:dbfrom=>db,
56
- :db=>"taxonomy", :id=>acc, :idtype=>"acc"}) )
57
- doc.at_xpath("/eLinkResult/LinkSet/LinkSetDb/Link/Id")
58
- end
59
- #================================[ Main ]
60
- begin
61
- o[:accs] += File.readlines(o[:infile]).map{ |l| l.chomp } unless
62
- o[:infile].nil?
63
- o[:ranks].map!{ |r| r.downcase }
64
- puts (["Acc", "TaxId"] + o[:ranks].map{ |r| r.capitalize }).join("\t") if
65
- o[:header]
66
- o[:accs].each do |acc|
67
- taxid = acc2taxid(o[:dbfrom], acc)
68
- status = ""
69
- if taxid.nil?
70
- warn "Cannot find link to taxonomy: #{acc} #{status}"
71
- puts ([acc, ""] + o[:ranks].map{ |i| "" }).join("\t") unless o[:no_nil]
72
- next
73
- end
74
- taxonomy = {}
75
- unless taxid.nil?
76
- doc = Nokogiri::XML( RemoteData.efetch({:db=>"taxonomy",
77
- :id=>taxid.content}) )
78
- taxonomy[ doc.at_xpath("/TaxaSet/Taxon/Rank").content ] =
79
- doc.at_xpath("/TaxaSet/Taxon/#{o[:ret]}").content
80
- doc.xpath("/TaxaSet/Taxon/LineageEx/Taxon").each do |taxon|
81
- taxonomy[ taxon.at_xpath("./Rank").content ] =
82
- taxon.at_xpath("./#{o[:ret]}").content
83
- end
84
- end
85
- puts ([acc, taxid.content] +
86
- o[:ranks].map{ |rank| taxonomy[ rank ] ||= "" }).join("\t")
87
- end
88
- rescue => err
89
- $stderr.puts "Exception: #{err}\n\n"
90
- err.backtrace.each { |l| $stderr.puts l + "\n" }
91
- err
92
- end