miga-base 0.3.0.0 → 0.3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +21 -4
  3. data/actions/init.rb +258 -0
  4. data/actions/run_local.rb +1 -2
  5. data/actions/test_taxonomy.rb +4 -1
  6. data/bin/miga +8 -1
  7. data/lib/miga/dataset.rb +4 -4
  8. data/lib/miga/dataset_result.rb +7 -4
  9. data/lib/miga/version.rb +2 -2
  10. data/scripts/_distances_noref_nomulti.bash +3 -1
  11. data/scripts/clade_finding.bash +1 -1
  12. data/scripts/init.bash +1 -1
  13. data/scripts/miga.bash +1 -1
  14. data/scripts/mytaxa.bash +78 -72
  15. data/scripts/mytaxa_scan.bash +67 -62
  16. data/scripts/ogs.bash +1 -1
  17. data/scripts/trimmed_fasta.bash +4 -3
  18. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  19. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  20. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  21. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  22. data/utils/enveomics/LICENSE.txt +73 -0
  23. data/utils/enveomics/Makefile +52 -0
  24. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  25. data/utils/enveomics/Manifest/Tasks/blasttab.json +703 -0
  26. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  27. data/utils/enveomics/Manifest/Tasks/fasta.json +571 -0
  28. data/utils/enveomics/Manifest/Tasks/fastq.json +208 -0
  29. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  30. data/utils/enveomics/Manifest/Tasks/ogs.json +339 -0
  31. data/utils/enveomics/Manifest/Tasks/other.json +746 -0
  32. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  33. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +454 -0
  34. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  35. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  36. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  37. data/utils/enveomics/Manifest/categories.json +132 -0
  38. data/utils/enveomics/Manifest/examples.json +154 -0
  39. data/utils/enveomics/Manifest/tasks.json +4 -0
  40. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  41. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +56 -0
  42. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +60 -0
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +38 -0
  44. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  45. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  46. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  49. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  50. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  51. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  52. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  53. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  54. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  55. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  56. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  57. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  58. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  59. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  60. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +55 -0
  61. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  62. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  63. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  64. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  68. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  69. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  70. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  71. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  72. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  73. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  74. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  75. data/utils/enveomics/README.md +40 -0
  76. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  77. data/utils/enveomics/Scripts/Aln.cat.rb +162 -0
  78. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  79. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  80. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  81. data/utils/enveomics/Scripts/BlastTab.addlen.rb +61 -0
  82. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  83. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  84. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +106 -0
  85. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  86. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  87. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  88. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  89. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  90. data/utils/enveomics/Scripts/BlastTab.recplot2.R +40 -0
  91. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  92. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  93. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  94. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  95. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  96. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  97. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  98. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  99. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  100. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  101. data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
  102. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  103. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  104. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  105. data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
  106. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  107. data/utils/enveomics/Scripts/FastA.interpose.pl +87 -0
  108. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  109. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  110. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  111. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  112. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  113. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  114. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  115. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  116. data/utils/enveomics/Scripts/FastA.tag.rb +64 -0
  117. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  118. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  119. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  120. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  121. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  122. data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
  123. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  124. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  125. data/utils/enveomics/Scripts/HMM.essential.rb +254 -0
  126. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  127. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  128. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +306 -0
  129. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  130. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  131. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  132. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  133. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  134. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  135. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  136. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  137. data/utils/enveomics/Scripts/SRA.download.bash +50 -0
  138. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  139. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  140. data/utils/enveomics/Scripts/Table.barplot.R +30 -0
  141. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  142. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  143. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  144. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  145. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  146. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  147. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  148. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  149. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  150. data/utils/enveomics/Scripts/aai.rb +373 -0
  151. data/utils/enveomics/Scripts/ani.rb +362 -0
  152. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  153. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  154. data/utils/enveomics/Scripts/lib/data/essential.hmm.gz +0 -0
  155. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +26 -0
  156. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  157. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  158. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  159. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  160. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
  161. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  162. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  163. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  164. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  165. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  166. data/utils/enveomics/Scripts/ogs.rb +104 -0
  167. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  168. data/utils/enveomics/Scripts/rbm.rb +137 -0
  169. data/utils/enveomics/Tests/Makefile +10 -0
  170. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  171. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  172. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  173. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  174. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  175. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  176. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  177. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  178. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  179. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  180. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  181. data/utils/enveomics/Tests/alkB.nwk +1 -0
  182. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  183. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  184. data/utils/enveomics/Tests/hiv1.faa +59 -0
  185. data/utils/enveomics/Tests/hiv1.fna +134 -0
  186. data/utils/enveomics/Tests/hiv2.faa +70 -0
  187. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  188. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  189. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  190. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  191. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  192. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  193. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  194. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  195. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  196. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  197. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  198. data/utils/enveomics/build_enveomics_r.bash +44 -0
  199. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  200. data/utils/enveomics/enveomics.R/NAMESPACE +35 -0
  201. data/utils/enveomics/enveomics.R/R/autoprune.R +121 -0
  202. data/utils/enveomics/enveomics.R/R/barplot.R +165 -0
  203. data/utils/enveomics/enveomics.R/R/cliopts.R +119 -0
  204. data/utils/enveomics/enveomics.R/R/df2dist.R +117 -0
  205. data/utils/enveomics/enveomics.R/R/growthcurve.R +263 -0
  206. data/utils/enveomics/enveomics.R/R/recplot.R +320 -0
  207. data/utils/enveomics/enveomics.R/R/recplot2.R +745 -0
  208. data/utils/enveomics/enveomics.R/R/tribs.R +423 -0
  209. data/utils/enveomics/enveomics.R/R/utils.R +16 -0
  210. data/utils/enveomics/enveomics.R/README.md +52 -0
  211. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  212. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  213. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +30 -0
  214. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +43 -0
  215. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +19 -0
  216. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +37 -0
  217. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +24 -0
  218. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +24 -0
  219. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +33 -0
  220. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +64 -0
  221. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +37 -0
  222. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +19 -0
  223. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +18 -0
  224. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +26 -0
  225. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +25 -0
  226. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +26 -0
  227. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +49 -0
  228. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +28 -0
  229. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +97 -0
  230. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +40 -0
  231. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +40 -0
  232. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +24 -0
  233. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +40 -0
  234. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +18 -0
  235. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +22 -0
  236. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +20 -0
  237. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +18 -0
  238. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  239. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +27 -0
  240. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +53 -0
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -0
  242. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +44 -0
  243. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +21 -0
  244. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +15 -0
  245. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  246. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  247. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +43 -0
  248. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +29 -0
  249. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +30 -0
  250. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +71 -0
  251. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +18 -0
  252. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +18 -0
  253. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +18 -0
  254. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +27 -0
  255. data/utils/enveomics/globals.mk +8 -0
  256. data/utils/enveomics/manifest.json +9 -0
  257. data/utils/index_metadata.rb +0 -0
  258. data/utils/plot-taxdist.R +0 -0
  259. data/utils/requirements.txt +19 -19
  260. metadata +242 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 141b39aed7636b3f2389837e5fd13348a8e11252
4
- data.tar.gz: b4bcec9bc8a8fd8c15c670b6f0fae0925d36138e
3
+ metadata.gz: 20d84d5171a208f02d6278778c7967ff7d1ec3e5
4
+ data.tar.gz: 7f92d205fb8058af84b15c96d158049889efa0ca
5
5
  SHA512:
6
- metadata.gz: c92ae5fb577d1e945d2a8be0f3d3dbe05aefd61094ee386f0bfb198237e83e487660419fa3cafb1cdc13aa25929ddd2022fab03530cac3ab4e927cb35481e82e
7
- data.tar.gz: 6566cad346ff51d1e6b55c2e7bbaa3b5d766ae8292db0e829430237e2254dbb09a67d1ea63cdb9ea42365e47236b01b2af41e6e722aa153523bf39438d5e7ccb
6
+ metadata.gz: 3d78f1bf02243234a668e91cd2bafe608a7ca48fd41a11bcee77f6a7164d0222841a0b54ed8c3076f38d47268ff671ac128519d6433284f7fef8286d638c77b4
7
+ data.tar.gz: 21ae7b9c7514d542496197541a14ddc3d19dd424f97c04ec1a1913f34dadf93f4b25f76c7d54d2c3c79b4ccb67cf567bb552ea35c263dc62f0db4ab235757ffd
data/README.md CHANGED
@@ -8,20 +8,33 @@
8
8
  # MiGA: Microbial Genomes Atlas
9
9
 
10
10
  **Important**: MiGA is under active development, and we currently cannot ensure
11
- any stability on the different interfaces. We'll be launching a Beta Testing
12
- program soon, with dedicated support for a small number of laboratories. If
11
+ any stability on the different interfaces. We're currently running a Beta
12
+ Testing program with dedicated support for a small number of laboratories. If
13
13
  you're interested, please [contact us][contact].
14
14
 
15
15
  For additional information on MiGA, visit:
16
+
17
+ * [MiGA users list][mailing-list]:
18
+ Forum to discuss with other users and developers.
16
19
  * [MiGA manual][gitbook]: The definitive guide to MiGA.
17
20
  * [MiGA API docs][rubydoc]: Inner-workings of the `miga-base` gem.
18
21
  * [MiGA Web][miga-web]: MiGA on Rails!
19
- * [MiGA GUI][miga-gui]: MiGA on Shoes!
20
22
 
23
+ # For the impatient
24
+
25
+ If you're like us, you probably want to see sofware in action from the get go.
26
+ You have two options:
27
+
28
+ 1. Get a peak on MiGA using [MiGA Online][miga-online].
29
+ 2. Install the [requirements](manual/part2/requirements.md) and follow the
30
+ [installation instructions](manual/part2/installation.md). Once you have MiGA
31
+ installed, you can [deploy some examples](manual/part4.md).
21
32
 
22
33
  # Authors
23
34
 
24
- Developed and maintained by [Luis M. Rodriguez-R][lrr].
35
+ Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
36
+ collaboration between [Kostas Lab][kostas] at the Georgia Institute of
37
+ Technology and [RDP][rdp] at Michigan State University.
25
38
 
26
39
 
27
40
  # License
@@ -29,8 +42,12 @@ Developed and maintained by [Luis M. Rodriguez-R][lrr].
29
42
  See [LICENSE](LICENSE).
30
43
 
31
44
  [lrr]: http://lmrodriguezr.github.io/
45
+ [mailing-list]: https://groups.google.com/forum/#!forum/miga-users
32
46
  [gitbook]: https://miga.gitbooks.io/miga/content/
33
47
  [rubydoc]: http://www.rubydoc.info/github/bio-miga/miga
34
48
  [contact]: http://enve-omics.gatech.edu/node/7
35
49
  [miga-web]: https://github.com/bio-miga/miga-web
36
50
  [miga-gui]: https://github.com/bio-miga/miga-gui
51
+ [miga-online]: http://microbial-genomes.org/
52
+ [kostas]: http://enve-omics.gatech.edu/
53
+ [rdp]: http://rdp.cme.msu.edu/
data/actions/init.rb ADDED
@@ -0,0 +1,258 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ require "shellwords"
7
+
8
+ o = {q:true, mytaxa:nil, config:File.expand_path(".miga_modules", ENV["HOME"]),
9
+ ask: false, auto:false, dtype: "bash"}
10
+ opts = OptionParser.new do |opt|
11
+ opt_banner(opt)
12
+ opt.on("-c","--config PATH",
13
+ "Path to the Bash configuration file.",
14
+ "By default: #{o[:config]}."){ |v| o[:config] = v }
15
+ opt.on("--[no-]mytaxa",
16
+ "Should I try setting up MyTaxa? By default: interactive."
17
+ ){ |v| o[:mytaxa] = v }
18
+ opt.on("--daemon-type STRING",
19
+ "Type of daemon launcher, one of: bash, qsub, msub.",
20
+ "By default: #{o[:dtype]}."){ |v| o[:dtype]=v }
21
+ opt.on("--ask-all", "If set, asks for the location of all software.",
22
+ "By default, only the locations missing in PATH are requested"
23
+ ){ |v| o[:ask] = v }
24
+ opt.on("--auto", "If set, accepts all defaults as answers."
25
+ ){ |v| o[:auto] = v }
26
+ opt_common(opt, o)
27
+ end.parse!
28
+ $auto_answer = o[:auto]
29
+
30
+ def ask_user(q, d=nil, ans=nil, force=false)
31
+ $stderr.print "#{q}#{" (#{ans.join(" / ")})" unless ans.nil?}" +
32
+ "#{" [#{d}]" unless d.nil?} > "
33
+ if $auto_answer and not force
34
+ $stderr.puts ""
35
+ else
36
+ o = gets.chomp
37
+ end
38
+ o = d if o.nil? or o.empty?
39
+ unless ans.nil? or ans.include? o
40
+ $stderr.puts "Answer not recognized."
41
+ return ask_user(q, d, ans)
42
+ end
43
+ o
44
+ end
45
+
46
+ ##=> Main <=
47
+
48
+ miga = MiGA::MiGA.root_path
49
+ $stderr.puts <<BANNER
50
+ ===[ Welcome to MiGA, the Microbial Genome Atlas ]===
51
+
52
+ I'm the initialization script, and I'll sniff around your computer to
53
+ make sure you have all the requirements for MiGA data processing.
54
+
55
+ BANNER
56
+
57
+ if ask_user(
58
+ "Would you like to see all the requirements before starting?",
59
+ "no", %w(yes no)) == "yes"
60
+ File.open(File.expand_path("utils/requirements.txt", miga), "r") do |fh|
61
+ fh.each_line{ |ln| $stderr.puts ln }
62
+ end
63
+ end
64
+
65
+ rc_path = File.expand_path(".miga_rc", ENV["HOME"])
66
+ if File.exist? rc_path
67
+ if ask_user(
68
+ "I found a previous configuration. Do you want to continue?",
69
+ "yes", %w(yes no))=="no"
70
+ $stderr.puts "OK, see you soon!"
71
+ exit 0
72
+ end
73
+ end
74
+ rc_fh = File.open(rc_path, "w")
75
+ rc_fh.puts <<BASH
76
+ #!/bin/bash
77
+ # `miga init` made this on #{Time.now}
78
+
79
+ BASH
80
+
81
+ # Check bash configuration file
82
+ unless File.exist? o[:config]
83
+ o[:config] = ask_user(
84
+ "Is there a script I need to load at startup?", o[:config])
85
+ end
86
+ if File.exists? o[:config]
87
+ o[:config] = File.expand_path o[:config]
88
+ $stderr.puts "Found bash configuration script: #{o[:config]}."
89
+ rc_fh.puts "MIGA_STARTUP='#{o[:config]}'"
90
+ rc_fh.puts "source \"$MIGA_STARTUP\""
91
+ end
92
+ $stderr.puts ""
93
+
94
+ # Check for software requirements
95
+ $stderr.puts "Looking for requirements:"
96
+ if o[:mytaxa].nil?
97
+ o[:mytaxa] = ask_user(
98
+ "Should I include MyTaxa modules?","yes",%w(yes no))=="yes"
99
+ end
100
+ rc_fh.puts "export MIGA_MYTAXA=\"no\"" unless o[:mytaxa]
101
+ paths = {}
102
+ File.open(File.expand_path("utils/requirements.txt", miga), "r") do |fh|
103
+ fh.each_line do |ln|
104
+ next if $. < 3
105
+ r = ln.chomp.split(/\t+/)
106
+ next if r[0] =~ /\(opt\)$/ and not o[:mytaxa]
107
+ $stderr.print "Testing #{r[0]}#{" (#{r[3]})" if r[3]}... "
108
+ path = nil
109
+ loop do
110
+ d_path = File.dirname(`which "#{r[1]}"`)
111
+ if o[:ask] or d_path=="."
112
+ path = ask_user("Where can I find it?", d_path, nil, true)
113
+ else
114
+ path = d_path
115
+ $stderr.puts path
116
+ end
117
+ if File.executable? File.expand_path(r[1], path)
118
+ if d_path != path
119
+ rc_fh.puts "MIGA_PATH=\"#{path}:$MIGA_PATH\" # #{r[1]}"
120
+ end
121
+ break
122
+ end
123
+ $stderr.print "I cannot find #{r[1]}. "
124
+ end
125
+ paths[r[1]] = File.expand_path(r[1], path).shellescape
126
+ end
127
+ end
128
+ rc_fh.puts "export PATH=\"$MIGA_PATH$PATH\""
129
+ $stderr.puts ""
130
+
131
+ # Check for other files
132
+ if o[:mytaxa]
133
+ $stderr.puts "Looking for MyTaxa databases:"
134
+ mt = File.dirname paths["MyTaxa"]
135
+ $stderr.print "Looking for scores... "
136
+ unless Dir.exist? File.expand_path("db", mt)
137
+ $stderr.puts "no.\nExecute 'python #{mt}/utils/download_db.py'."
138
+ exit 1
139
+ end
140
+ $stderr.puts "yes."
141
+ $stderr.print "Looking for diamond db... "
142
+ unless File.exist? File.expand_path("AllGenomes.faa.dmnd", mt)
143
+ $stderr.puts "no.\nDownload " +
144
+ "'http://enve-omics.ce.gatech.edu/data/public_mytaxa/" +
145
+ "AllGenomes.faa.dmnd' into #{mt}."
146
+ exit 1
147
+ end
148
+ $stderr.puts ""
149
+ end
150
+
151
+ # Check for R packages
152
+ $stderr.puts "Looking for R packages:"
153
+ %w(enveomics.R ape phangorn phytools cluster vegan).each do |pkg|
154
+ $stderr.print "Testing #{pkg}... "
155
+ `echo "library('#{pkg}')" | #{paths["R"].shellescape} --vanilla -q 2>&1`
156
+ if $?.success?
157
+ $stderr.puts "yes."
158
+ else
159
+ $stderr.puts "no, installing."
160
+ $stderr.print "" +
161
+ `echo "install.packages('#{pkg}', repos='http://cran.rstudio.com/')" \
162
+ | #{paths["R"].shellescape} --vanilla -q 2>&1`
163
+ `echo "library('#{pkg}')" | #{paths["R"].shellescape} --vanilla -q 2>&1`
164
+ raise "Unable to auto-install R package #{pkg}." unless $?.success?
165
+ end
166
+ end
167
+ $stderr.puts ""
168
+
169
+ # Check for Ruby gems
170
+ $stderr.puts "Looking for Ruby gems:"
171
+ %w(rest-client sqlite3 daemons json).each do |pkg|
172
+ $stderr.print "Testing #{pkg}... "
173
+ `#{paths["ruby"].shellescape} -r "#{pkg}" -e "" 2>/dev/null`
174
+ if $?.success?
175
+ $stderr.puts "yes."
176
+ else
177
+ $stderr.puts "no, installing."
178
+ # This hackey mess is meant to ensure the test and installation are done
179
+ # on the configuration Ruby, not on the Ruby currently executing the init
180
+ # action
181
+ $stderr.print `#{paths["ruby"].shellescape} \
182
+ -r rubygems -r rubygems/gem_runner \
183
+ -e "Gem::GemRunner.new.run %w(install --user #{pkg})" 2>&1`
184
+ raise "Unable to auto-install Ruby gem #{pkg}." unless $?.success?
185
+ end
186
+ end
187
+ $stderr.puts ""
188
+
189
+ # Configure daemon
190
+ $stderr.puts "Default daemon configuration:"
191
+ v = {created:Time.now.to_s, updated:Time.now.to_s}
192
+ v[:type] = ask_user("Please select the type of daemon you want to setup",
193
+ o[:dtype], %w(bash qsub msub))
194
+ case v[:type]
195
+ when "bash"
196
+ v[:latency] = ask_user("How long should I sleep? (in seconds)","30").to_i
197
+ v[:maxjobs] = ask_user("How many jobs can I launch at once?", "6").to_i
198
+ v[:ppn] = ask_user("How many CPUs can I use per job?", "2").to_i
199
+ $stderr.puts "Setting up internal daemon defaults."
200
+ $stderr.puts "If you don't understand this just leave default values:"
201
+ v[:cmd] = ask_user(
202
+ "How should I launch tasks?\n %1$s: script path, %2$s: variables, " +
203
+ "%3$d: CPUs, %4$s: log file, %5$s: task name.\n",
204
+ "%2$s '%1$s' > '%4$s' 2>&1")
205
+ v[:var] = ask_user(
206
+ "How should I pass variables?\n %1$s: keys, %2$s: values.\n",
207
+ "%1$s=%2$s")
208
+ v[:sep] = ask_user("What should I use to separate variables?", " ")
209
+ v[:alive] = ask_user(
210
+ "How can I know that a process is still alive?\n %1$s: PID, " +
211
+ "output should be 1 for running and 0 for non-running.\n",
212
+ "ps -p '%1$s'|tail -n+2|wc -l")
213
+ else # [qm]sub
214
+ queue = ask_user("What queue should I use?", nil, nil, true)
215
+ v[:latency] = ask_user("How long should I sleep? (in seconds)", "150").to_i
216
+ v[:maxjobs] = ask_user("How many jobs can I launch at once?", "300").to_i
217
+ v[:ppn] = ask_user("How many CPUs can I use per job?", "4").to_i
218
+ $stderr.puts "Setting up internal daemon defaults."
219
+ $stderr.puts "If you don't understand this just leave default values:"
220
+ v[:cmd] = ask_user(
221
+ "How should I launch tasks?\n %1$s: script path, %2$s: variables, " +
222
+ "%3$d: CPUs, %4$d: log file, %5$s: task name.\n",
223
+ "#{v[:type]} -q '#{queue}' -v '%2$s' -l nodes=1:ppn=%3$d %1$s " +
224
+ "-j oe -o '%4$s' -N '%5$s' -l mem=9g -l walltime=12:00:00 | grep .")
225
+ v[:var] = ask_user(
226
+ "How should I pass variables?\n %1$s: keys, %2$s: values.\n",
227
+ "%1$s=%2$s")
228
+ v[:sep] = ask_user("What should I use to separate variables?", ",")
229
+ if v[:type] == "qsub"
230
+ v[:alive] = ask_user(
231
+ "How can I know that a process is still alive?\n %1$s: job id, " +
232
+ "output should be 1 for running and 0 for non-running.\n",
233
+ "qstat -f '%1$s'|grep ' job_state ='|perl -pe 's/.*= //'|grep '[^C]'" +
234
+ "|tail -n1|wc -l|awk '{print $1}'")
235
+ else
236
+ v[:alive] = ask_user(
237
+ "How can I know that a process is still alive?\n %1$s: job id, " +
238
+ "output should be 1 for running and 0 for non-running.\n",
239
+ "checkjob '%1$s'|grep '^State:'|perl -pe 's/.*: //'" +
240
+ "|grep 'Deferred\\|Hold\\|Idle\\|Starting\\|Running\\|Blocked'"+
241
+ "|tail -n1|wc -l|awk '{print $1}'")
242
+ end
243
+ end
244
+ File.open(File.expand_path(".miga_daemon.json", ENV["HOME"]), "w") do |fh|
245
+ fh.puts JSON.pretty_generate(v)
246
+ end
247
+ $stderr.puts ""
248
+
249
+ rc_fh.puts <<FOOT
250
+
251
+ MIGA_CONFIG_VERSION='#{MiGA::MiGA.VERSION}'
252
+ MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
253
+ MIGA_CONFIG_DATE='#{Time.now}'
254
+
255
+ FOOT
256
+
257
+ $stderr.puts "Configuration complete. MiGA is ready to work!"
258
+ $stderr.puts ""
data/actions/run_local.rb CHANGED
@@ -16,14 +16,13 @@ opts = OptionParser.new do |opt|
16
16
  end.parse!
17
17
 
18
18
  ##=> Main <=
19
- opts.parse!
20
19
  opt_require(o, project:"-P", name:"-r")
21
20
 
22
21
  $stderr.puts "Loading project." unless o[:q]
23
22
  p = MiGA::Project.load(o[:project])
24
23
  raise "Impossible to load project: #{o[:project]}" if p.nil?
25
24
 
26
- miga = File.expand_path("../..", __FILE__)
25
+ miga = MiGA::MiGA.root_path
27
26
  cmd = ["PROJECT=#{p.path.shellescape}", "RUNTYPE=bash",
28
27
  "MIGA=#{miga.shellescape}", "CORES=#{o[:thr]}"]
29
28
  if o[:dataset].nil?
@@ -28,7 +28,10 @@ ds = p.dataset(o[:dataset])
28
28
  $stderr.puts "Finding closest relative." unless o[:q]
29
29
  cr = ds.closest_relatives(1)
30
30
 
31
- unless cr.empty?
31
+ if cr.nil? or cr.empty?
32
+ raise "This action is not supported for the project or dataset." if cr.nil?
33
+ raise "No close relatives found."
34
+ else
32
35
  $stderr.puts "Querying probability distributions." unless o[:q]
33
36
  cr = cr[0]
34
37
  puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
data/bin/miga CHANGED
@@ -30,8 +30,10 @@ $task_desc = {
30
30
  "project.",
31
31
  run_local: "Executes locally one step analysis producing the given result.",
32
32
  # System
33
+ init: "Initialize MiGA to process new projects.",
33
34
  daemon: "Controls the daemon of a MiGA project.",
34
35
  date: "Returns the current date in standard MiGA format.",
36
+ console: "Opens an IRB console with MiGA (alias: c).",
35
37
  # Taxonomy
36
38
  add_taxonomy: "Registers taxonomic information for datasets.",
37
39
  test_taxonomy: "Returns test of taxonomic distributions for query datasets.",
@@ -137,9 +139,14 @@ when "-V", "--long-version"
137
139
  puts MiGA::MiGA.LONG_VERSION
138
140
  when "-C", "--citation"
139
141
  puts MiGA::MiGA.CITATION
142
+ when "console", "c"
143
+ require "irb"
144
+ require "irb/completion"
145
+ ARGV.shift
146
+ IRB.start
140
147
  when *execs
141
148
  $task = ARGV.shift.to_sym
142
- ARGV << "-h" if ARGV.empty? and not [:date].include? $task
149
+ ARGV << "-h" if ARGV.empty? and not [:date, :init].include? $task
143
150
  begin
144
151
  load File.expand_path("../actions/#{$task}.rb", File.dirname(__FILE__))
145
152
  rescue => err
data/lib/miga/dataset.rb CHANGED
@@ -38,13 +38,13 @@ class MiGA::Dataset < MiGA::MiGA
38
38
  def self.KNOWN_TYPES ; @@KNOWN_TYPES end
39
39
  @@KNOWN_TYPES = {
40
40
  genome: {description: "The genome from an isolate.", multi: false},
41
- metagenome: {description: "A metagenome (excluding viromes).",
42
- multi: true},
43
- virome: {description: "A viral metagenome.", multi: true},
44
41
  scgenome: {description: "A Single-cell Genome Amplification (SGA).",
45
42
  multi: false},
46
43
  popgenome: {description: "A population genome (including " +
47
- "metagenomic bins).", :multi=>false}
44
+ "metagenomic bins).", :multi=>false},
45
+ metagenome: {description: "A metagenome (excluding viromes).",
46
+ multi: true},
47
+ virome: {description: "A viral metagenome.", multi: true}
48
48
  }
49
49
 
50
50
  ##
@@ -142,10 +142,11 @@ module MiGA::DatasetResult
142
142
  # Add result type +:mytaxa+ at +base+ (no +_opts+ supported).
143
143
  def add_result_mytaxa(base, _opts)
144
144
  if is_multi?
145
- return nil unless result_files_exist?(base, ".mytaxa")
145
+ return nil unless result_files_exist?(base, ".mytaxa") or
146
+ result_files_exist?(base, ".nomytaxa.txt")
146
147
  r = MiGA::Result.new("#{base}.json")
147
148
  add_files_to_ds_result(r, name, mytaxa:".mytaxa", blast:".blast",
148
- mytaxain:".mytaxain")
149
+ mytaxain:".mytaxain", nomytaxa:".nomytaxa.txt")
149
150
  else
150
151
  MiGA::Result.new("#{base}.json")
151
152
  end
@@ -156,11 +157,13 @@ module MiGA::DatasetResult
156
157
  def add_result_mytaxa_scan(base, _opts)
157
158
  if is_nonmulti?
158
159
  return nil unless
159
- result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
160
+ result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg]) or
161
+ result_files_exist?(base, ".nomytaxa.txt")
160
162
  r = MiGA::Result.new("#{base}.json")
161
163
  add_files_to_ds_result(r, name, mytaxa:".mytaxa", wintax:".wintax",
162
164
  blast:".blast", mytaxain:".mytaxain", report:".pdf", regions:".reg",
163
- gene_ids:".wintax.genes", region_ids:".wintax.regions")
165
+ gene_ids:".wintax.genes", region_ids:".wintax.regions",
166
+ nomytaxa:".nomytaxa.txt")
164
167
  else
165
168
  MiGA::Result.new("#{base}.json")
166
169
  end
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 0, 0]
13
+ VERSION = [0.3, 0, 1]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2017, 5, 29)
21
+ VERSION_DATE = Date.new(2017, 6, 6)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -58,7 +58,9 @@ fi
58
58
 
59
59
  CLASSIF="."
60
60
  [[ -e "$DATASET.$METRIC-medoids.tsv" ]] && rm "$DATASET.$METRIC-medoids.tsv"
61
- while [[ -e "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
61
+ [[ -s "$CLADES/$CLASSIF/miga-project.medoids" ]] || \
62
+ touch "$DATASET.$METRIC-medoids.tsv" "${DATASET}.${METRIC}.db"
63
+ while [[ -s "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
62
64
  MAX_VAL=0
63
65
  VAL_MED=""
64
66
  VAL_CLS=""