miga-base 1.2.17.0 → 1.2.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (265) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +2 -2
  3. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  4. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  6. data/utils/FastAAI/FastAAI +3659 -0
  7. data/utils/FastAAI/FastAAI-legacy/FastAAI +1336 -0
  8. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +1296 -0
  9. data/utils/FastAAI/README.md +84 -0
  10. data/utils/enveomics/Docs/recplot2.md +244 -0
  11. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  12. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  13. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  14. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  15. data/utils/enveomics/LICENSE.txt +73 -0
  16. data/utils/enveomics/Makefile +52 -0
  17. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  18. data/utils/enveomics/Manifest/Tasks/blasttab.json +790 -0
  19. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  20. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  21. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  22. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  23. data/utils/enveomics/Manifest/Tasks/mapping.json +165 -0
  24. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  25. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  26. data/utils/enveomics/Manifest/Tasks/remote.json +356 -0
  27. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +650 -0
  28. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  29. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  30. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  31. data/utils/enveomics/Manifest/categories.json +165 -0
  32. data/utils/enveomics/Manifest/examples.json +162 -0
  33. data/utils/enveomics/Manifest/tasks.json +4 -0
  34. data/utils/enveomics/README.md +42 -0
  35. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  36. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  37. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  38. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  39. data/utils/enveomics/Scripts/BedGraph.tad.rb +138 -0
  40. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  41. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  42. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  43. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  44. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  45. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  46. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  47. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  48. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  49. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  50. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  51. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  52. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  53. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  54. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  55. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  56. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  57. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  58. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +123 -0
  59. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  60. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  61. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  62. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  63. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  64. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  65. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  66. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  67. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  68. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  69. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  70. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  71. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  72. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  73. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  74. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  75. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  76. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  77. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  78. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  79. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  80. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  81. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  82. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  83. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  84. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  85. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  86. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  87. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  88. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  89. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  90. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  91. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  92. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  93. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  94. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  95. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  96. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  97. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  98. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  99. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  100. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  101. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  102. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  103. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  104. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  105. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  106. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  107. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  108. data/utils/enveomics/Scripts/SRA.download.bash +67 -0
  109. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  110. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  111. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  112. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  113. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  114. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  115. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  116. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  117. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  118. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  119. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  120. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  121. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  122. data/utils/enveomics/Scripts/aai.rb +421 -0
  123. data/utils/enveomics/Scripts/ani.rb +362 -0
  124. data/utils/enveomics/Scripts/anir.rb +137 -0
  125. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  126. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  127. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  128. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  129. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  130. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  131. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  132. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  133. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  134. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  135. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  136. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  137. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +88 -0
  138. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  139. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  140. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  141. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  142. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  143. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  144. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  145. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +74 -0
  146. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  147. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  148. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  149. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  150. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  151. data/utils/enveomics/Scripts/ogs.rb +104 -0
  152. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  153. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  154. data/utils/enveomics/Scripts/rbm.rb +108 -0
  155. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  156. data/utils/enveomics/Tests/Makefile +10 -0
  157. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  158. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  159. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  160. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  161. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  162. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  163. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  164. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  165. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  166. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  167. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  168. data/utils/enveomics/Tests/alkB.nwk +1 -0
  169. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  170. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  171. data/utils/enveomics/Tests/hiv1.faa +59 -0
  172. data/utils/enveomics/Tests/hiv1.fna +134 -0
  173. data/utils/enveomics/Tests/hiv2.faa +70 -0
  174. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  175. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  176. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  177. data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
  178. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  179. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  180. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  181. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  182. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  183. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  184. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  185. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  186. data/utils/enveomics/build_enveomics_r.bash +45 -0
  187. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  188. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  189. data/utils/enveomics/enveomics.R/R/autoprune.R +167 -0
  190. data/utils/enveomics/enveomics.R/R/barplot.R +203 -0
  191. data/utils/enveomics/enveomics.R/R/cliopts.R +141 -0
  192. data/utils/enveomics/enveomics.R/R/df2dist.R +192 -0
  193. data/utils/enveomics/enveomics.R/R/growthcurve.R +349 -0
  194. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  195. data/utils/enveomics/enveomics.R/R/recplot.R +419 -0
  196. data/utils/enveomics/enveomics.R/R/recplot2.R +1698 -0
  197. data/utils/enveomics/enveomics.R/R/tribs.R +638 -0
  198. data/utils/enveomics/enveomics.R/R/utils.R +90 -0
  199. data/utils/enveomics/enveomics.R/README.md +81 -0
  200. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  201. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  202. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  203. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  204. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  205. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  206. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +47 -0
  207. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  208. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  209. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +26 -0
  210. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +26 -0
  211. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +44 -0
  212. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +111 -0
  213. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  214. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +34 -0
  215. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +25 -0
  216. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +59 -0
  217. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +63 -0
  218. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +46 -0
  219. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +78 -0
  220. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  221. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  222. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +147 -0
  223. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  224. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +27 -0
  225. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  226. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +28 -0
  227. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +24 -0
  228. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +22 -0
  229. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +22 -0
  230. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +52 -0
  231. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  232. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +21 -0
  233. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  234. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +34 -0
  235. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +23 -0
  236. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +24 -0
  237. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +31 -0
  238. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +56 -0
  239. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +20 -0
  240. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  242. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  243. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  244. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  245. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  246. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  247. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  248. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  249. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  250. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  251. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  252. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +81 -0
  253. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +49 -0
  254. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +48 -0
  255. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  256. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +22 -0
  257. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +22 -0
  258. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +22 -0
  259. data/utils/enveomics/globals.mk +8 -0
  260. data/utils/enveomics/manifest.json +9 -0
  261. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  262. data/utils/multitrim/README.md +67 -0
  263. data/utils/multitrim/multitrim.py +1555 -0
  264. data/utils/multitrim/multitrim.yml +13 -0
  265. metadata +268 -6
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @license Artistic-2.0
5
+
6
+ require 'optparse'
7
+
8
+ o = {q: false}
9
+ ARGV << '-h' if ARGV.size==0
10
+
11
+ OptionParser.new do |opt|
12
+ opt.banner = "
13
+ Estimates Average Amino Acid Identity (AAI) from the essential genes extracted
14
+ and aligned by HMM.essential.rb (see --alignments).
15
+
16
+ Usage: #{$0} [options]"
17
+ opt.separator ''
18
+ opt.separator 'Mandatory'
19
+ opt.on('-1 PATH', 'Input alignments file for genome 1.'){ |v| o[:a] = v }
20
+ opt.on('-2 PATH', 'Input alignments file for genome 2.'){ |v| o[:b] = v }
21
+ opt.separator ''
22
+ opt.separator 'Options'
23
+ opt.on('-a', '--aln-out FILE',
24
+ 'Output file containing the aligned proteins'){ |v| o[:alnout] = v }
25
+ opt.on('-c', '--components FILE',
26
+ 'Output file containing the components of the estimation.',
27
+ 'Tab-delimited file with model name, matches, and columns.'
28
+ ){ |v| o[:compout] = v }
29
+ opt.on('-q', '--quiet', 'Run quietly (no STDERR output).'){ o[:q] = true }
30
+ opt.on('-h', '--help', 'Display this screen.') do
31
+ puts opt
32
+ exit
33
+ end
34
+ opt.separator ''
35
+ end.parse!
36
+ abort '-1 is mandatory.' if o[:a].nil?
37
+ abort '-2 is mandatory.' if o[:b].nil?
38
+
39
+ class HList
40
+ attr_accessor :list
41
+
42
+ def initialize(file)
43
+ @list = {}
44
+ r = File.readlines(file)
45
+ while not r.empty?
46
+ e = HElement.new(*r.shift(3))
47
+ @list[ e.model_id ] = e
48
+ end
49
+ end
50
+
51
+ def [](model_id)
52
+ list[model_id]
53
+ end
54
+
55
+ ##
56
+ # Returns an array of HAln objects.
57
+ def align(other)
58
+ list.keys.map do |model_id|
59
+ self[model_id].align(other[model_id]) unless other[model_id].nil?
60
+ end.compact
61
+ end
62
+
63
+ def models
64
+ list.keys
65
+ end
66
+ end
67
+
68
+ class HElement
69
+ attr_accessor :defline, :model_id, :protein_id, :protein_coords
70
+ attr_accessor :model_aln, :protein_aln
71
+
72
+ def initialize(defline, model_aln, protein_aln)
73
+ @defline = defline.chomp
74
+ @model_aln = model_aln.chomp
75
+ @protein_aln = protein_aln.chomp
76
+ if defline =~ /^# (.+) : (.+) : (.+)/
77
+ @model_id = $1
78
+ @protein_id = $2
79
+ @protein_coords = $3
80
+ end
81
+ end
82
+
83
+ def dup
84
+ HElement.new(defline, model_aln, protein_aln)
85
+ end
86
+
87
+ ##
88
+ # Returns an HAln object
89
+ def align(other)
90
+ return nil unless model_width == other.model_width
91
+ HAln.new(self, other)
92
+ end
93
+
94
+ def masked_protein
95
+ @masked_protein ||= model_aln.chars.
96
+ each_with_index.map{ |c, pos| c == 'X' ? protein_aln[pos] : nil }.
97
+ compact.join('')
98
+ end
99
+
100
+ def model_width
101
+ masked_protein.size
102
+ end
103
+ end
104
+
105
+ class HAln
106
+ attr :protein_1, :protein_2, :model_id, :protein_1_id, :protein_2_id
107
+
108
+ def initialize(a, b)
109
+ @protein_1 = a.masked_protein
110
+ @protein_2 = b.masked_protein
111
+ @model_id = a.model_id
112
+ @protein_1_id = a.protein_id + '/' + a.protein_coords
113
+ @protein_2_id = b.protein_id + '/' + b.protein_coords
114
+ end
115
+
116
+ def stats
117
+ @stats = { len: 0, gaps: 0, matches: 0 }
118
+ return @stats unless @stats[:id].nil?
119
+ protein_1.chars.each_with_index do |v, k|
120
+ # Ignore gaps in both proteins
121
+ next if v == '-' and protein_2[k] == '-'
122
+ # Count matches
123
+ @stats[:len] += 1
124
+ if v == protein_2[k]
125
+ @stats[:matches] += 1
126
+ elsif v == '-' or protein_2[k] == '-'
127
+ @stats[:gaps] += 1
128
+ end
129
+ end
130
+ @stats.tap { |i| i[:id] = 100.0 * i[:matches] / i[:len] }
131
+ end
132
+
133
+ def stats_to_s
134
+ stats.map{ |k,v| "#{k}:#{v}" }.join ' '
135
+ end
136
+
137
+ def to_s
138
+ ["# #{model_id} | #{protein_1_id} | #{protein_2_id} | #{stats_to_s}",
139
+ protein_1, protein_2, ''].join("\n")
140
+ end
141
+ end
142
+
143
+ hlist1 = HList.new(o[:a])
144
+ hlist2 = HList.new(o[:b])
145
+ haln_arr = hlist1.align(hlist2)
146
+
147
+ avg_identity = haln_arr.map{ |i| i.stats[:id] }.inject(:+) / haln_arr.size
148
+ avg2_identity = haln_arr.map{ |i| i.stats[:id] ** 2 }.inject(:+) / haln_arr.size
149
+ sd_identity = Math.sqrt( avg2_identity - avg_identity ** 2 )
150
+ puts "Common models: #{haln_arr.size}"
151
+ puts "All models: #{(hlist1.models | hlist1.models).size}"
152
+ puts "Average identity: #{avg_identity.round(2)}%"
153
+ puts "SD identity: #{sd_identity.round(2)}"
154
+
155
+ if o[:alnout]
156
+ File.open(o[:alnout], 'w') do |fh|
157
+ haln_arr.each { |i| fh.puts i }
158
+ end
159
+ end
160
+
161
+ if o[:compout]
162
+ File.open(o[:compout], 'w') do |fh|
163
+ haln_arr.each do |i|
164
+ fh.puts "#{i.model_id}\t#{i.stats[:matches]}\t#{i.stats[:len]}"
165
+ end
166
+ end
167
+ end
168
+
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
+ # @update Dec-01-2015
6
+ # @license artistic 2.0
7
+ #
8
+
9
+ require "optparse"
10
+
11
+ o = {quiet:false, model:true}
12
+
13
+ OptionParser.new do |opts|
14
+ opts.banner = "
15
+ Extracts the sequence IDs and query model form a (multiple) HMMsearch report
16
+ (for HMMer 3.0).
17
+
18
+ Usage: #{$0} [options] < input.hmmsearch > list.txt"
19
+ opts.separator ""
20
+ opts.separator "Options"
21
+ opts.on("-E", "--all-evalue FLOAT",
22
+ "Maximum e-value of sequence to report result."
23
+ ){|v| o[:all_evalue] = v.to_f }
24
+ opts.on("-S", "--all-score FLOAT",
25
+ "Minimum score of sequence to report result."
26
+ ){|v| o[:all_score] = v.to_f }
27
+ opts.on("-e", "--best-evalue FLOAT",
28
+ "Maximum e-value of best domain to report result."
29
+ ){|v| o[:best_evalue] = v.to_f }
30
+ opts.on("-s", "--best-score FLOAT",
31
+ "Minimum score of best domain to report result."
32
+ ){|v| o[:best_score] = v.to_f }
33
+ opts.on("-n", "--no-model",
34
+ "Do not include the query model in the output list."){ o[:model]=false }
35
+ opts.on("-q", "--quiet", "Run quietly."){ o[:quiet]=true }
36
+ opts.on("-h", "--help", "Display this screen.") do
37
+ puts opts
38
+ exit
39
+ end
40
+ opts.separator ""
41
+ end.parse!
42
+
43
+ at = :header
44
+ query = ""
45
+ i = 0
46
+ ARGF.each_line do |ln|
47
+ next unless /^(#.*)$/.match(ln).nil?
48
+ ln.chomp!
49
+ case at
50
+ when :header
51
+ qm = /Query:\s+(.*?)\s+/.match(ln)
52
+ qm.nil? or query=qm[1]
53
+ unless /^[\-\s]+$/.match(ln).nil?
54
+ at = :list
55
+ i = 0
56
+ STDERR.print "Parsing hits against #{query}: " unless o[:quiet]
57
+ end
58
+ when :list
59
+ if /^\s*$/.match(ln).nil?
60
+ next if ln =~ /^\s*-+ inclusion threshold -+$/
61
+ ln.gsub!(/#.*/,"")
62
+ row = ln.split(/\s+/)
63
+ row << nil if row.count==10
64
+ raise "Unable to parse seemingly malformed list of hits in line " +
65
+ "#{$.}:\n#{ln}" unless row.count==11
66
+ good = true
67
+ good &&= ( o[:all_evalue].nil? || row[1].to_f <= o[:all_evalue] )
68
+ good &&= ( o[:all_score].nil? || row[2].to_f >= o[:all_score] )
69
+ good &&= ( o[:best_evalue].nil? || row[4].to_f <= o[:best_evalue] )
70
+ good &&= ( o[:best_score].nil? || row[5].to_f >= o[:best_score] )
71
+ if good
72
+ puts row[9]+(o[:model]?"\t#{query}":"")
73
+ i+=1
74
+ end
75
+ else
76
+ at = :align
77
+ STDERR.puts "#{i} results." unless o[:quiet]
78
+ end
79
+ when :align
80
+ at = :header unless /^\/\/$/.match(ln).nil?
81
+ end
82
+ end
83
+
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R
5
+ # @update: Jul-14-2015
6
+ # @license: artistic license 2.0
7
+ #
8
+
9
+ $:.push File.expand_path(File.dirname(__FILE__) + '/lib')
10
+ require 'enveomics_rb/jplace'
11
+ require 'optparse'
12
+ require 'json'
13
+
14
+ o = {:q=>false}
15
+ ARGV << '-h' if ARGV.size==0
16
+ OptionParser.new do |opts|
17
+ opts.banner = "
18
+ Extracts the distance (estimated branch length) of each placed read to a given node in a JPlace file.
19
+
20
+ Usage: #{$0} [options]"
21
+ opts.separator ""
22
+ opts.separator "Mandatory"
23
+ opts.on("-i", "--in FILE", ".jplace input file containing the read placement."){ |v| o[:in]=v }
24
+ opts.on("-n", "--node STR", "Index (number in curly brackets) of the node to which distances should be measured."){ |v| o[:node]=v }
25
+ opts.on("-o", "--out FILE", "Ouput file."){ |v| o[:out]=v }
26
+ opts.separator ""
27
+ opts.separator "Other Options"
28
+ opts.on("-N", "--in-node STR","Report only reads placed at this node or it's children."){ |v| o[:onlynode]=v }
29
+ opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
30
+ opts.on("-h", "--help", "Display this screen.") do
31
+ puts opts
32
+ exit
33
+ end
34
+ opts.separator ""
35
+ end.parse!
36
+ abort "-i is mandatory" if o[:in].nil?
37
+ abort "-o is mandatory" if o[:out].nil?
38
+ abort "-n is mandatory" if o[:node].nil?
39
+
40
+ ##### MAIN:
41
+ begin
42
+ $stderr.puts "Loading jplace file." unless o[:q]
43
+ ifh = File.open(o[:in], 'r')
44
+ jplace = JSON.load(ifh)
45
+ ifh.close
46
+
47
+ $stderr.puts "Parsing tree." unless o[:q]
48
+ tree = JPlace::Tree.from_nwk(jplace["tree"])
49
+ node = JPlace::Node.edges[ o[:node].gsub(/[{}]/,"").to_i ]
50
+ from_node = o[:onlynode].nil? ? tree : JPlace::Node.edges[ o[:onlynode].gsub(/[{}]/,"").to_i ]
51
+ raise "Cannot find node with index #{o[:node]}." if node.nil?
52
+ raise "Cannot find node with index #{o[:onlynode]}." if from_node.nil?
53
+
54
+ $stderr.puts "Parsing placements." unless o[:q]
55
+ JPlace::Placement.fields = jplace["fields"]
56
+ placements_n = 0
57
+ jplace["placements"].each do |placement|
58
+ JPlace::Node.link_placement(JPlace::Placement.new(placement))
59
+ placements_n += 1
60
+ end
61
+ $stderr.puts " #{placements_n} placements in tree, #{node.placements.length} direct placements to {#{node.index}}." unless o[:q]
62
+
63
+ # First, calculate distances
64
+ from_node.pre_order do |n|
65
+ d = n.distance(node)
66
+ if node.path_to_root.include? n
67
+ n.placements.each{ |p| p.flag = d + p.pendant_length + p.distal_length }
68
+ else
69
+ n.placements.each{ |p| p.flag = d + p.pendant_length - p.distal_length }
70
+ end
71
+ end
72
+
73
+ # Finally, report results
74
+ ofh = File.open(o[:out], "w")
75
+ ofh.puts %w(read distance multiplicity edge_index node_name).join("\t")
76
+ from_node.pre_order do |n|
77
+ n.placements.each do |p|
78
+ p.nm.each{ |r| ofh.puts [ r[:n], p.flag, r[:m], n.index, n.name ].join("\t") }
79
+ end
80
+ end
81
+ ofh.close
82
+ rescue => err
83
+ $stderr.puts "Exception: #{err}\n\n"
84
+ err.backtrace.each { |l| $stderr.puts l + "\n" }
85
+ err
86
+ end
87
+
88
+
@@ -0,0 +1,320 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @author: Luis M. Rodriguez-R
4
+ # @license: artistic license 2.0
5
+
6
+ $:.push File.expand_path("../lib", __FILE__)
7
+ require "enveomics_rb/enveomics"
8
+ require "enveomics_rb/jplace"
9
+ require "optparse"
10
+ require "json"
11
+
12
+ o = {q: false, regex: "^(?<dataset>.+?):.*", area: false, norm: :counts,
13
+ olditol: false}
14
+ ARGV << "-h" if ARGV.size==0
15
+ OptionParser.new do |opts|
16
+ opts.banner = "
17
+ Generates iToL-compatible files from a .jplace file (produced by RAxML's EPA
18
+ or pplacer), that can be used to draw pie-charts in the nodes of the reference
19
+ tree.
20
+
21
+ Usage: #{$0} [options]"
22
+ opts.separator ""
23
+ opts.separator "Mandatory"
24
+ opts.on("-i", "--in FILE",
25
+ ".jplace input file containing the read placement."){ |v| o[:in]=v }
26
+ opts.on("-o", "--out FILE", "Base of the output files."){ |v| o[:out]=v }
27
+ opts.separator ""
28
+ opts.separator "Other Options"
29
+ opts.on("-u", "--unique STR",
30
+ "Name of the dataset (if only one is used). Conflicts with -r and -s."
31
+ ){ |v| o[:unique]=v }
32
+ opts.on("-r", "--regex STR",
33
+ "Regular expression capturing the sample ID (named dataset) in read names.",
34
+ "By default: '#{o[:regex]}'. Conflicts with -s."){ |v| o[:regex]=v }
35
+ opts.on("-s", "--separator STR",
36
+ "String separating the dataset name and the rest of the read name.",
37
+ "It assumes that the read name starts by the dataset name. Conflicts with -r."
38
+ ){ |v| o[:regex]="^(?<dataset>.+?)#{v}" }
39
+ opts.on("-m", "--metadata FILE",
40
+ "Datasets metadata in tab-delimited format with a header row.",
41
+ "Valid headers: name (required), color (in Hex), size (# reads), norm (any float)."
42
+ ){ |v| o[:metadata]=v }
43
+ opts.on("-n", "--norm STR", %w[none counts size norm],
44
+ "Normalization strategy. Must be one of:",
45
+ "none: Direct read counts are reported without normalization.",
46
+ "count (default): The counts are normalized (divided) by the total counts per dataset.",
47
+ "size: The counts are normalized (divided) by the size column in metadata (must be integer).",
48
+ "norm: The counts are normalized (divided) by the norm column in metadata (can be any float)."
49
+ ){ |v| o[:norm]=v.to_sym }
50
+ opts.on("--old-itol",
51
+ "Generate output file using the old iToL format (pre v3.0)."
52
+ ){ |v| o[:olditol] = v }
53
+ opts.on("-c", "--collapse FILE",
54
+ "Internal nodes to collapse (requires rootted tree)."){ |v| o[:collapse]=v }
55
+ opts.on("-a", "--area",
56
+ "If set, the area of the pies is proportional to the placements. Otherwise, the radius is."
57
+ ){ o[:area]=true }
58
+ opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
59
+ opts.on("-h", "--help", "Display this screen.") do
60
+ puts opts
61
+ exit
62
+ end
63
+ opts.separator ""
64
+ opts.separator "Quick how-to in 5 steps"
65
+ opts.separator " 1. Create the placement file using RAxML's EPA [1] or pplacer [2]. You can use any other software"
66
+ opts.separator " producing a compliant .jplace file [3]. If you're using multiple datasets, include the name of"
67
+ opts.separator " the dataset somewhere in the read names."
68
+ opts.separator " 2. If you have multiple datasets, it's convenient to create a metadata table. It's not necessary,"
69
+ opts.separator " but it allows you to control the colors and the normalization method (see -m)."
70
+ opts.separator " 3. Execute this script passing the .jplace file created in step 1 (see -i). If you have a single"
71
+ opts.separator " dataset, use the option -u to give it a short name. If you have multiple datasets, use the -s"
72
+ opts.separator " or -r options to tell the script how to find the dataset name within the read name. Note that"
73
+ opts.separator " some programs (like CheckM) may produce nonstandard characters that won't be correctly parsed."
74
+ opts.separator " To avoid this problem, install iconv support (gem install iconv) before running this script"
75
+ opts.separator " (currently "+(JPlace::Tree.has_iconv? ? "" : "NOT ")+"installed)."
76
+ opts.separator " 4. Upload the tree (.nwk file) to iToL [4]. Make sure you check 'Keep internal node IDs' in the"
77
+ opts.separator " advanced options. In that same page, upload the dataset (.itol file), pick a name, and select"
78
+ opts.separator " the data type 'Multi-value Bar Chart or Pie Chart'. If you used the -c option, upload the list"
79
+ opts.separator " of nodes to collapse (.collapse file) in the 'Pre-collapsed clades' field (advanced options)."
80
+ opts.separator " 5. Open the tree. You can now see the names of the internal nodes. If you want to collapse nodes,"
81
+ opts.separator " simply list the nodes to collapse and go back to step 3, this time using the -c option."
82
+ opts.separator ""
83
+ opts.separator "References"
84
+ opts.separator " [1] SA Berger, D Krompass and A Stamatakis, 2011, Syst Biol 60(3):291-302."
85
+ opts.separator " http://sysbio.oxfordjournals.org/content/60/3/291"
86
+ opts.separator " [2] FA Matsen, RB Kodner and EV Armbrust, 2010, BMC Bioinf 11:538."
87
+ opts.separator " http://www.biomedcentral.com/1471-2105/11/538/"
88
+ opts.separator " [3] FA Matsen, NG Hoffman, A Gallagher and A Stamatakis, 2012, PLoS ONE 7(2):e31009."
89
+ opts.separator " http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0031009"
90
+ opts.separator " [4] I Letunic and P Bork, 2011, NAR 39(suppl 2):W475-W478."
91
+ opts.separator " http://nar.oxfordjournals.org/content/39/suppl_2/W475.full"
92
+ opts.separator ""
93
+ end.parse!
94
+ abort "-o is mandatory" if o[:out].nil?
95
+
96
+ ##### CLASSES:
97
+
98
+ class Dataset
99
+ attr_reader :name, :data
100
+ def initialize(name)
101
+ @name = name
102
+ @data = {:count=>0}
103
+ end
104
+ def count
105
+ self.datum :count
106
+ end
107
+ def add_count(n)
108
+ @data[:count] += n
109
+ end
110
+ def datum(k)
111
+ @data[k]
112
+ end
113
+ def add_datum(k, v)
114
+ @data[k] = v
115
+ end
116
+ def color
117
+ if @data[:color].nil?
118
+ @data[:color] = "#" + (1 .. 3).map{ |i|
119
+ sprintf("%02X", rand(255)) }.join("")
120
+ end
121
+ @data[:color].sub(/^#?/, "#")
122
+ self.datum :color
123
+ end
124
+ def size
125
+ self.datum :size
126
+ end
127
+ def norm
128
+ self.datum :norm
129
+ end
130
+ end
131
+
132
+ class Metadata
133
+ attr_reader :datasets
134
+ def initialize
135
+ @datasets = {}
136
+ end
137
+ def load_table(file)
138
+ f = File.open(file, "r")
139
+ h = f.gets.chomp.split(/\t/)
140
+ name_idx = h.find_index "name"
141
+ color_idx = h.find_index "color"
142
+ size_idx = h.find_index "size"
143
+ norm_idx = h.find_index "norm"
144
+ abort "The metadata table must contain a 'name' column." if name_idx.nil?
145
+ while ln = f.gets
146
+ vals = ln.chomp.split(/\t/)
147
+ name = vals[name_idx]
148
+ self[name] # Create sample, in case "name" is the only column
149
+ self[name].add_datum(:color, vals[color_idx]) unless color_idx.nil?
150
+ self[name].add_datum(:size, vals[size_idx].to_i) unless size_idx.nil?
151
+ self[name].add_datum(:norm, vals[norm_idx].to_f) unless norm_idx.nil?
152
+ end
153
+ f.close
154
+ end
155
+ def [](name)
156
+ self << Dataset.new(name) unless @datasets.has_key?(name)
157
+ @datasets[name]
158
+ end
159
+ def <<(dataset)
160
+ @datasets[dataset.name] = dataset
161
+ end
162
+ def names
163
+ @datasets.keys
164
+ end
165
+ def colors
166
+ @datasets.values.map{ |d| d.color }
167
+ end
168
+ def data(k)
169
+ self.names.map{ |name| self[name].datum[k] }
170
+ end
171
+ def set_unique!(n)
172
+ u = self[n]
173
+ @datasets = {}
174
+ @datasets[n] = u
175
+ end
176
+ def size
177
+ self.datasets.length
178
+ end
179
+ end
180
+
181
+ ##### MAIN:
182
+ begin
183
+ $stderr.puts "Parsing metadata." unless o[:q]
184
+ metadata = Metadata.new
185
+ metadata.load_table(o[:metadata]) unless o[:metadata].nil?
186
+ metadata.set_unique! o[:unique] unless o[:unique].nil?
187
+
188
+
189
+ $stderr.puts "Loading jplace file." unless o[:q]
190
+ ifh = File.open(o[:in], 'r')
191
+ jplace = JSON.load(ifh)
192
+ ifh.close
193
+
194
+
195
+ $stderr.puts "Parsing tree." unless o[:q]
196
+ tree = JPlace::Tree.from_nwk(jplace["tree"])
197
+
198
+
199
+ $stderr.puts "Parsing placements." unless o[:q]
200
+ JPlace::Placement.fields = jplace["fields"]
201
+ placements_n = 0
202
+ jplace["placements"].each do |placement|
203
+ JPlace::Node.link_placement(JPlace::Placement.new(placement))
204
+ placements_n += 1
205
+ end
206
+ $stderr.puts " #{placements_n} placements." unless o[:q]
207
+ tree.pre_order do |n|
208
+ n.placements.each do |p|
209
+ p.nm.each do |r|
210
+ m = (o[:unique].nil? ? (/#{o[:regex]}/.match(r[:n]) or
211
+ abort "Cannot parse read name: #{r[:n]}, placed at edge #{n.index}") :
212
+ {:dataset=>o[:unique]})
213
+ metadata[ m[:dataset] ].add_count(r[:m])
214
+ end
215
+ end
216
+ end
217
+
218
+
219
+ unless o[:collapse].nil?
220
+ $stderr.puts "Collapsing nodes." unless o[:q]
221
+ collapse = File.readlines(o[:collapse]).map do |ln|
222
+ l = ln.chomp.split(/\t/)
223
+ l[1] = l[0] if l[1].nil?
224
+ l
225
+ end.inject({}) do |hash,ar|
226
+ hash[ar[0]] = ar[1]
227
+ hash
228
+ end
229
+ f = File.open(o[:out] + ".collapse", "w")
230
+ coll_n = 0
231
+ tree.pre_order do |n|
232
+ if collapse.keys.include? n.cannonical_name
233
+ n.collapse!
234
+ n.name = collapse[n.cannonical_name]
235
+ f.puts n.name
236
+ coll_n += 1
237
+ end
238
+ end
239
+ f.close
240
+ $stderr.puts " #{coll_n} nodes collapsed (#{collapse.length} requested)." unless o[:q]
241
+ end
242
+
243
+
244
+ $stderr.puts "Estimating normalizing factors by #{o[:norm].to_s}." unless o[:q] or o[:norm]==:none
245
+ case o[:norm]
246
+ when :none
247
+ metadata.datasets.values.each{ |d| d.add_datum :norm, 1.0 }
248
+ when :counts
249
+ metadata.datasets.values.each{ |d| d.add_datum :norm, d.count.to_f }
250
+ when :size
251
+ abort "Column 'size' required in metadata." if metadata.datasets.values[0].size.nil?
252
+ metadata.datasets.values.each{ |d| d.add_datum :norm, d.size.to_f }
253
+ when :norm
254
+ abort "Column 'norm' required in metadata." if metadata.datasets.values[0].norm.nil?
255
+ end
256
+ max_norm = metadata.datasets.values.map{ |d| d.norm }.max
257
+
258
+
259
+ $stderr.puts "Generating iToL dataset." unless o[:q]
260
+ f = File.open(o[:out] + ".itol.txt", "w")
261
+ if o[:olditol]
262
+ f.puts "LABELS\t" + metadata.names.join("\t")
263
+ f.puts "COLORS\t" + metadata.colors.join("\t")
264
+ else
265
+ f.puts "DATASET_PIECHART"
266
+ f.puts "SEPARATOR TAB"
267
+ f.puts "DATASET_LABEL\tReadPlacement"
268
+ f.puts "COLOR\t#1f2122"
269
+ f.puts "FIELD_LABELS\t" + metadata.names.join("\t")
270
+ f.puts "FIELD_COLORS\t" + metadata.colors.join("\t")
271
+ f.puts "DATA"
272
+ end
273
+ max_norm_sum,min_norm_sum,max_norm_n,min_norm_n = 0.0,Float::INFINITY,"",""
274
+ tree.pre_order do |n|
275
+ ds_counts = Hash.new(0.0)
276
+ n.placements.each do |p|
277
+ p.nm.each do |r|
278
+ m = (o[:unique].nil? ? (/#{o[:regex]}/.match(r[:n]) or
279
+ abort "Cannot parse read name: #{r[:n]}, placed at edge #{n.index}") :
280
+ {:dataset=>o[:unique]})
281
+ ds_counts[ m[:dataset] ] += r[:m] / metadata[ m[:dataset] ].norm
282
+ end
283
+ end
284
+ counts_sum = ds_counts.values.reduce(:+)
285
+ unless counts_sum.nil?
286
+ # In the area option, the radius is "twice" to make the smallest > 1 (since counts_sum is >= 1)
287
+ radius = (o[:area] ? 2*Math.sqrt(counts_sum/Math::PI) : counts_sum)*max_norm
288
+ f.puts n.cannonical_name +
289
+ "#{"\t0.5" unless o[:olditol]}\t#{"R" if o[:olditol]}" +
290
+ radius.to_i.to_s + "\t" +
291
+ metadata.names.map{ |n| ds_counts[n] }.join("\t")
292
+ if counts_sum > max_norm_sum
293
+ max_norm_n = n.cannonical_name
294
+ max_norm_sum = counts_sum
295
+ end
296
+ if counts_sum < min_norm_sum
297
+ min_norm_n = n.cannonical_name
298
+ min_norm_sum = counts_sum
299
+ end
300
+ end
301
+ end
302
+ f.close
303
+ units = {none: "", counts: " per million placements",
304
+ size: " per million reads", norm: " per normalizing unit"}
305
+ $stderr.puts " The pie #{o[:area] ? "areas" : "radii"} are proportional to the placements#{units[o[:norm]]}." unless o[:q]
306
+ $stderr.puts " The minimum radius (#{min_norm_n}) represents #{min_norm_sum*(([:none, :norm].include? o[:norm]) ? 1 : 1e6)} placements#{units[o[:norm]]}." unless o[:q]
307
+ $stderr.puts " The maximum radius (#{max_norm_n}) represents #{max_norm_sum*(([:none, :norm].include? o[:norm]) ? 1 : 1e6)} placements#{units[o[:norm]]}." unless o[:q]
308
+
309
+
310
+ $stderr.puts "Re-formatting tree for iToL." unless o[:q]
311
+ f = File.open(o[:out] + ".nwk", "w")
312
+ f.puts tree.to_s + ";"
313
+ f.close
314
+
315
+ rescue => err
316
+ $stderr.puts "Exception: #{err}\n\n"
317
+ err.backtrace.each { |l| $stderr.puts l + "\n" }
318
+ err
319
+ end
320
+