miga-base 0.3.0.0 → 0.3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +21 -4
  3. data/actions/init.rb +258 -0
  4. data/actions/run_local.rb +1 -2
  5. data/actions/test_taxonomy.rb +4 -1
  6. data/bin/miga +8 -1
  7. data/lib/miga/dataset.rb +4 -4
  8. data/lib/miga/dataset_result.rb +7 -4
  9. data/lib/miga/version.rb +2 -2
  10. data/scripts/_distances_noref_nomulti.bash +3 -1
  11. data/scripts/clade_finding.bash +1 -1
  12. data/scripts/init.bash +1 -1
  13. data/scripts/miga.bash +1 -1
  14. data/scripts/mytaxa.bash +78 -72
  15. data/scripts/mytaxa_scan.bash +67 -62
  16. data/scripts/ogs.bash +1 -1
  17. data/scripts/trimmed_fasta.bash +4 -3
  18. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  19. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  20. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  21. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  22. data/utils/enveomics/LICENSE.txt +73 -0
  23. data/utils/enveomics/Makefile +52 -0
  24. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  25. data/utils/enveomics/Manifest/Tasks/blasttab.json +703 -0
  26. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  27. data/utils/enveomics/Manifest/Tasks/fasta.json +571 -0
  28. data/utils/enveomics/Manifest/Tasks/fastq.json +208 -0
  29. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  30. data/utils/enveomics/Manifest/Tasks/ogs.json +339 -0
  31. data/utils/enveomics/Manifest/Tasks/other.json +746 -0
  32. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  33. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +454 -0
  34. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  35. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  36. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  37. data/utils/enveomics/Manifest/categories.json +132 -0
  38. data/utils/enveomics/Manifest/examples.json +154 -0
  39. data/utils/enveomics/Manifest/tasks.json +4 -0
  40. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  41. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +56 -0
  42. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +60 -0
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +38 -0
  44. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  45. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  46. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  49. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  50. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  51. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  52. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  53. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  54. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  55. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  56. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  57. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  58. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  59. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  60. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +55 -0
  61. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  62. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  63. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  64. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  68. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  69. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  70. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  71. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  72. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  73. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  74. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  75. data/utils/enveomics/README.md +40 -0
  76. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  77. data/utils/enveomics/Scripts/Aln.cat.rb +162 -0
  78. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  79. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  80. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  81. data/utils/enveomics/Scripts/BlastTab.addlen.rb +61 -0
  82. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  83. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  84. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +106 -0
  85. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  86. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  87. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  88. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  89. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  90. data/utils/enveomics/Scripts/BlastTab.recplot2.R +40 -0
  91. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  92. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  93. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  94. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  95. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  96. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  97. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  98. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  99. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  100. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  101. data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
  102. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  103. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  104. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  105. data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
  106. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  107. data/utils/enveomics/Scripts/FastA.interpose.pl +87 -0
  108. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  109. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  110. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  111. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  112. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  113. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  114. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  115. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  116. data/utils/enveomics/Scripts/FastA.tag.rb +64 -0
  117. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  118. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  119. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  120. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  121. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  122. data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
  123. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  124. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  125. data/utils/enveomics/Scripts/HMM.essential.rb +254 -0
  126. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  127. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  128. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +306 -0
  129. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  130. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  131. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  132. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  133. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  134. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  135. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  136. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  137. data/utils/enveomics/Scripts/SRA.download.bash +50 -0
  138. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  139. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  140. data/utils/enveomics/Scripts/Table.barplot.R +30 -0
  141. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  142. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  143. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  144. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  145. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  146. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  147. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  148. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  149. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  150. data/utils/enveomics/Scripts/aai.rb +373 -0
  151. data/utils/enveomics/Scripts/ani.rb +362 -0
  152. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  153. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  154. data/utils/enveomics/Scripts/lib/data/essential.hmm.gz +0 -0
  155. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +26 -0
  156. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  157. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  158. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  159. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  160. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
  161. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  162. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  163. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  164. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  165. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  166. data/utils/enveomics/Scripts/ogs.rb +104 -0
  167. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  168. data/utils/enveomics/Scripts/rbm.rb +137 -0
  169. data/utils/enveomics/Tests/Makefile +10 -0
  170. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  171. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  172. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  173. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  174. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  175. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  176. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  177. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  178. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  179. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  180. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  181. data/utils/enveomics/Tests/alkB.nwk +1 -0
  182. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  183. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  184. data/utils/enveomics/Tests/hiv1.faa +59 -0
  185. data/utils/enveomics/Tests/hiv1.fna +134 -0
  186. data/utils/enveomics/Tests/hiv2.faa +70 -0
  187. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  188. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  189. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  190. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  191. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  192. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  193. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  194. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  195. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  196. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  197. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  198. data/utils/enveomics/build_enveomics_r.bash +44 -0
  199. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  200. data/utils/enveomics/enveomics.R/NAMESPACE +35 -0
  201. data/utils/enveomics/enveomics.R/R/autoprune.R +121 -0
  202. data/utils/enveomics/enveomics.R/R/barplot.R +165 -0
  203. data/utils/enveomics/enveomics.R/R/cliopts.R +119 -0
  204. data/utils/enveomics/enveomics.R/R/df2dist.R +117 -0
  205. data/utils/enveomics/enveomics.R/R/growthcurve.R +263 -0
  206. data/utils/enveomics/enveomics.R/R/recplot.R +320 -0
  207. data/utils/enveomics/enveomics.R/R/recplot2.R +745 -0
  208. data/utils/enveomics/enveomics.R/R/tribs.R +423 -0
  209. data/utils/enveomics/enveomics.R/R/utils.R +16 -0
  210. data/utils/enveomics/enveomics.R/README.md +52 -0
  211. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  212. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  213. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +30 -0
  214. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +43 -0
  215. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +19 -0
  216. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +37 -0
  217. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +24 -0
  218. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +24 -0
  219. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +33 -0
  220. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +64 -0
  221. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +37 -0
  222. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +19 -0
  223. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +18 -0
  224. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +26 -0
  225. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +25 -0
  226. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +26 -0
  227. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +49 -0
  228. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +28 -0
  229. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +97 -0
  230. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +40 -0
  231. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +40 -0
  232. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +24 -0
  233. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +40 -0
  234. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +18 -0
  235. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +22 -0
  236. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +20 -0
  237. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +18 -0
  238. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  239. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +27 -0
  240. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +53 -0
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -0
  242. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +44 -0
  243. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +21 -0
  244. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +15 -0
  245. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  246. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  247. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +43 -0
  248. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +29 -0
  249. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +30 -0
  250. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +71 -0
  251. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +18 -0
  252. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +18 -0
  253. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +18 -0
  254. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +27 -0
  255. data/utils/enveomics/globals.mk +8 -0
  256. data/utils/enveomics/manifest.json +9 -0
  257. data/utils/index_metadata.rb +0 -0
  258. data/utils/plot-taxdist.R +0 -0
  259. data/utils/requirements.txt +19 -19
  260. metadata +242 -2
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
+ # @update Dec-01-2015
6
+ # @license artistic 2.0
7
+ #
8
+
9
+ require "optparse"
10
+
11
+ o = {quiet:false, model:true}
12
+
13
+ OptionParser.new do |opts|
14
+ opts.banner = "
15
+ Extracts the sequence IDs and query model form a (multiple) HMMsearch report
16
+ (for HMMer 3.0).
17
+
18
+ Usage: #{$0} [options] < input.hmmsearch > list.txt"
19
+ opts.separator ""
20
+ opts.separator "Options"
21
+ opts.on("-E", "--all-evalue FLOAT",
22
+ "Maximum e-value of sequence to report result."
23
+ ){|v| o[:all_evalue] = v.to_f }
24
+ opts.on("-S", "--all-score FLOAT",
25
+ "Minimum score of sequence to report result."
26
+ ){|v| o[:all_score] = v.to_f }
27
+ opts.on("-e", "--best-evalue FLOAT",
28
+ "Maximum e-value of best domain to report result."
29
+ ){|v| o[:best_evalue] = v.to_f }
30
+ opts.on("-s", "--best-score FLOAT",
31
+ "Minimum score of best domain to report result."
32
+ ){|v| o[:best_score] = v.to_f }
33
+ opts.on("-n", "--no-model",
34
+ "Do not include the query model in the output list."){ o[:model]=false }
35
+ opts.on("-q", "--quiet", "Run quietly."){ o[:quiet]=true }
36
+ opts.on("-h", "--help", "Display this screen.") do
37
+ puts opts
38
+ exit
39
+ end
40
+ opts.separator ""
41
+ end.parse!
42
+
43
+ at = :header
44
+ query = ""
45
+ i = 0
46
+ ARGF.each_line do |ln|
47
+ next unless /^(#.*)$/.match(ln).nil?
48
+ ln.chomp!
49
+ case at
50
+ when :header
51
+ qm = /Query:\s+(.*?)\s+/.match(ln)
52
+ qm.nil? or query=qm[1]
53
+ unless /^[\-\s]+$/.match(ln).nil?
54
+ at = :list
55
+ i = 0
56
+ STDERR.print "Parsing hits against #{query}: " unless o[:quiet]
57
+ end
58
+ when :list
59
+ if /^\s*$/.match(ln).nil?
60
+ next if ln =~ /^\s*-+ inclusion threshold -+$/
61
+ ln.gsub!(/#.*/,"")
62
+ row = ln.split(/\s+/)
63
+ row << nil if row.count==10
64
+ raise "Unable to parse seemingly malformed list of hits in line " +
65
+ "#{$.}:\n#{ln}" unless row.count==11
66
+ good = true
67
+ good &&= ( o[:all_evalue].nil? || row[1].to_f <= o[:all_evalue] )
68
+ good &&= ( o[:all_score].nil? || row[2].to_f >= o[:all_score] )
69
+ good &&= ( o[:best_evalue].nil? || row[4].to_f <= o[:best_evalue] )
70
+ good &&= ( o[:best_score].nil? || row[5].to_f >= o[:best_score] )
71
+ if good
72
+ puts row[9]+(o[:model]?"\t#{query}":"")
73
+ i+=1
74
+ end
75
+ else
76
+ at = :align
77
+ STDERR.puts "#{i} results." unless o[:quiet]
78
+ end
79
+ when :align
80
+ at = :header unless /^\/\/$/.match(ln).nil?
81
+ end
82
+ end
83
+
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R
5
+ # @update: Jul-14-2015
6
+ # @license: artistic license 2.0
7
+ #
8
+
9
+ $:.push File.expand_path(File.dirname(__FILE__) + '/lib')
10
+ require 'enveomics_rb/jplace'
11
+ require 'optparse'
12
+ require 'json'
13
+
14
+ o = {:q=>false}
15
+ ARGV << '-h' if ARGV.size==0
16
+ OptionParser.new do |opts|
17
+ opts.banner = "
18
+ Extracts the distance (estimated branch length) of each placed read to a given node in a JPlace file.
19
+
20
+ Usage: #{$0} [options]"
21
+ opts.separator ""
22
+ opts.separator "Mandatory"
23
+ opts.on("-i", "--in FILE", ".jplace input file containing the read placement."){ |v| o[:in]=v }
24
+ opts.on("-n", "--node STR", "Index (number in curly brackets) of the node to which distances should be measured."){ |v| o[:node]=v }
25
+ opts.on("-o", "--out FILE", "Ouput file."){ |v| o[:out]=v }
26
+ opts.separator ""
27
+ opts.separator "Other Options"
28
+ opts.on("-N", "--in-node STR","Report only reads placed at this node or it's children."){ |v| o[:onlynode]=v }
29
+ opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
30
+ opts.on("-h", "--help", "Display this screen.") do
31
+ puts opts
32
+ exit
33
+ end
34
+ opts.separator ""
35
+ end.parse!
36
+ abort "-i is mandatory" if o[:in].nil?
37
+ abort "-o is mandatory" if o[:out].nil?
38
+ abort "-n is mandatory" if o[:node].nil?
39
+
40
+ ##### MAIN:
41
+ begin
42
+ $stderr.puts "Loading jplace file." unless o[:q]
43
+ ifh = File.open(o[:in], 'r')
44
+ jplace = JSON.load(ifh)
45
+ ifh.close
46
+
47
+ $stderr.puts "Parsing tree." unless o[:q]
48
+ tree = JPlace::Tree.from_nwk(jplace["tree"])
49
+ node = JPlace::Node.edges[ o[:node].gsub(/[{}]/,"").to_i ]
50
+ from_node = o[:onlynode].nil? ? tree : JPlace::Node.edges[ o[:onlynode].gsub(/[{}]/,"").to_i ]
51
+ raise "Cannot find node with index #{o[:node]}." if node.nil?
52
+ raise "Cannot find node with index #{o[:onlynode]}." if from_node.nil?
53
+
54
+ $stderr.puts "Parsing placements." unless o[:q]
55
+ JPlace::Placement.fields = jplace["fields"]
56
+ placements_n = 0
57
+ jplace["placements"].each do |placement|
58
+ JPlace::Node.link_placement(JPlace::Placement.new(placement))
59
+ placements_n += 1
60
+ end
61
+ $stderr.puts " #{placements_n} placements in tree, #{node.placements.length} direct placements to {#{node.index}}." unless o[:q]
62
+
63
+ # First, calculate distances
64
+ from_node.pre_order do |n|
65
+ d = n.distance(node)
66
+ if node.path_to_root.include? n
67
+ n.placements.each{ |p| p.flag = d + p.pendant_length + p.distal_length }
68
+ else
69
+ n.placements.each{ |p| p.flag = d + p.pendant_length - p.distal_length }
70
+ end
71
+ end
72
+
73
+ # Finally, report results
74
+ ofh = File.open(o[:out], "w")
75
+ ofh.puts %w(read distance multiplicity edge_index node_name).join("\t")
76
+ from_node.pre_order do |n|
77
+ n.placements.each do |p|
78
+ p.nm.each{ |r| ofh.puts [ r[:n], p.flag, r[:m], n.index, n.name ].join("\t") }
79
+ end
80
+ end
81
+ ofh.close
82
+ rescue => err
83
+ $stderr.puts "Exception: #{err}\n\n"
84
+ err.backtrace.each { |l| $stderr.puts l + "\n" }
85
+ err
86
+ end
87
+
88
+
@@ -0,0 +1,306 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R
5
+ # @update: Feb-28-2016
6
+ # @license: artistic license 2.0
7
+ #
8
+
9
+ $:.push File.expand_path("../lib", __FILE__)
10
+ require "enveomics_rb/jplace"
11
+ require "optparse"
12
+ require "json"
13
+
14
+ o = {:q=>false, :regex=>'^(?<dataset>.+?):.*', :area=>false, :norm=>:counts,
15
+ :olditol=>false}
16
+ ARGV << '-h' if ARGV.size==0
17
+ OptionParser.new do |opts|
18
+ opts.banner = "
19
+ Generates iToL-compatible files from a .jplace file (produced by RAxML's EPA
20
+ or pplacer), that can be used to draw pie-charts in the nodes of the reference
21
+ tree.
22
+
23
+ Usage: #{$0} [options]"
24
+ opts.separator ""
25
+ opts.separator "Mandatory"
26
+ opts.on("-i", "--in FILE", ".jplace input file containing the read placement."){ |v| o[:in]=v }
27
+ opts.on("-o", "--out FILE", "Base of the output files."){ |v| o[:out]=v }
28
+ opts.separator ""
29
+ opts.separator "Other Options"
30
+ opts.on("-u", "--unique STR", "Name of the dataset (if only one is used). Conflicts with -r and -s."){ |v| o[:unique]=v }
31
+ opts.on("-r", "--regex STR", "Regular expression capturing the sample ID (named dataset) in read names.",
32
+ "By default: '#{o[:regex]}'. Conflicts with -s."){ |v| o[:regex]=v }
33
+ opts.on("-s", "--separator STR", "String separating the dataset name and the rest of the read name.",
34
+ "It assumes that the read name starts by the dataset name. Conflicts with -r."){ |v| o[:regex]="^(?<dataset>.+?)#{v}" }
35
+ opts.on("-m", "--metadata FILE", "Datasets metadata in tab-delimited format with a header row.",
36
+ "Valid headers: name (required), color (in Hex), size (# reads), norm (any float)."){ |v| o[:metadata]=v }
37
+ opts.on("-n", "--norm STR", %w[none counts size norm], "Normalization strategy. Must be one of:",
38
+ "none: Direct read counts are reported without normalization.",
39
+ "count (default): The counts are normalized (divided) by the total counts per dataset.",
40
+ "size: The counts are normalized (divided) by the size column in metadata (must be integer).",
41
+ "norm: The counts are normalized (divided) by the norm column in metadata (can be any float)."){ |v| o[:norm]=v.to_sym }
42
+ opts.on("--old-itol",
43
+ "Generate output file using the old iToL format (pre v3.0)."
44
+ ){ |v| o[:olditol] = v }
45
+ opts.on("-c", "--collapse FILE", "Internal nodes to collapse (requires rootted tree)."){ |v| o[:collapse]=v }
46
+ opts.on("-a", "--area", "If set, the area of the pies is proportional to the placements. Otherwise, the radius is."){ o[:area]=true }
47
+ opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
48
+ opts.on("-h", "--help", "Display this screen.") do
49
+ puts opts
50
+ exit
51
+ end
52
+ opts.separator ""
53
+ opts.separator "Quick how-to in 5 steps"
54
+ opts.separator " 1. Create the placement file using RAxML's EPA [1] or pplacer [2]. You can use any other software"
55
+ opts.separator " producing a compliant .jplace file [3]. If you're using multiple datasets, include the name of"
56
+ opts.separator " the dataset somewhere in the read names."
57
+ opts.separator " 2. If you have multiple datasets, it's convenient to create a metadata table. It's not necessary,"
58
+ opts.separator " but it allows you to control the colors and the normalization method (see -m)."
59
+ opts.separator " 3. Execute this script passing the .jplace file created in step 1 (see -i). If you have a single"
60
+ opts.separator " dataset, use the option -u to give it a short name. If you have multiple datasets, use the -s"
61
+ opts.separator " or -r options to tell the script how to find the dataset name within the read name. Note that"
62
+ opts.separator " some programs (like CheckM) may produce nonstandard characters that won't be correctly parsed."
63
+ opts.separator " To avoid this problem, install iconv support (gem install iconv) before running this script"
64
+ opts.separator " (currently "+(JPlace::Tree.has_iconv? ? "" : "NOT ")+"installed)."
65
+ opts.separator " 4. Upload the tree (.nwk file) to iToL [4]. Make sure you check 'Keep internal node IDs' in the"
66
+ opts.separator " advanced options. In that same page, upload the dataset (.itol file), pick a name, and select"
67
+ opts.separator " the data type 'Multi-value Bar Chart or Pie Chart'. If you used the -c option, upload the list"
68
+ opts.separator " of nodes to collapse (.collapse file) in the 'Pre-collapsed clades' field (advanced options)."
69
+ opts.separator " 5. Open the tree. You can now see the names of the internal nodes. If you want to collapse nodes,"
70
+ opts.separator " simply list the nodes to collapse and go back to step 3, this time using the -c option."
71
+ opts.separator ""
72
+ opts.separator "References"
73
+ opts.separator " [1] SA Berger, D Krompass and A Stamatakis, 2011, Syst Biol 60(3):291-302."
74
+ opts.separator " http://sysbio.oxfordjournals.org/content/60/3/291"
75
+ opts.separator " [2] FA Matsen, RB Kodner and EV Armbrust, 2010, BMC Bioinf 11:538."
76
+ opts.separator " http://www.biomedcentral.com/1471-2105/11/538/"
77
+ opts.separator " [3] FA Matsen, NG Hoffman, A Gallagher and A Stamatakis, 2012, PLoS ONE 7(2):e31009."
78
+ opts.separator " http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0031009"
79
+ opts.separator " [4] I Letunic and P Bork, 2011, NAR 39(suppl 2):W475-W478."
80
+ opts.separator " http://nar.oxfordjournals.org/content/39/suppl_2/W475.full"
81
+ opts.separator ""
82
+ end.parse!
83
+ abort "-o is mandatory" if o[:out].nil?
84
+
85
+ ##### CLASSES:
86
+
87
+ class Dataset
88
+ attr_reader :name, :data
89
+ def initialize(name)
90
+ @name = name
91
+ @data = {:count=>0}
92
+ end
93
+ def count
94
+ self.datum :count
95
+ end
96
+ def add_count(n)
97
+ @data[:count] += n
98
+ end
99
+ def datum(k)
100
+ @data[k]
101
+ end
102
+ def add_datum(k, v)
103
+ @data[k] = v
104
+ end
105
+ def color
106
+ if @data[:color].nil?
107
+ @data[:color] = '#' + (1 .. 3).map{ |i| sprintf("%02X", rand(255)) }.join('')
108
+ end
109
+ @data[:color].sub(/^#?/, '#')
110
+ self.datum :color
111
+ end
112
+ def size
113
+ self.datum :size
114
+ end
115
+ def norm
116
+ self.datum :norm
117
+ end
118
+ end
119
+
120
+ class Metadata
121
+ attr_reader :datasets
122
+ def initialize
123
+ @datasets = {}
124
+ end
125
+ def load_table(file)
126
+ f = File.open(file, 'r')
127
+ h = f.gets.chomp.split(/\t/)
128
+ name_idx = h.find_index 'name'
129
+ color_idx = h.find_index 'color'
130
+ size_idx = h.find_index 'size'
131
+ norm_idx = h.find_index 'norm'
132
+ abort "The metadata table must contain a 'name' column." if name_idx.nil?
133
+ while ln = f.gets
134
+ vals = ln.chomp.split(/\t/)
135
+ name = vals[name_idx]
136
+ self[name] # Create sample, in case "name" is the only column
137
+ self[name].add_datum(:color, vals[color_idx]) unless color_idx.nil?
138
+ self[name].add_datum(:size, vals[size_idx].to_i) unless size_idx.nil?
139
+ self[name].add_datum(:norm, vals[norm_idx].to_f) unless norm_idx.nil?
140
+ end
141
+ f.close
142
+ end
143
+ def [](name)
144
+ self << Dataset.new(name) unless @datasets.has_key?(name)
145
+ @datasets[name]
146
+ end
147
+ def <<(dataset)
148
+ @datasets[dataset.name] = dataset
149
+ end
150
+ def names
151
+ @datasets.keys
152
+ end
153
+ def colors
154
+ @datasets.values.map{ |d| d.color }
155
+ end
156
+ def data(k)
157
+ self.names.map{ |name| self[name].datum[k] }
158
+ end
159
+ def set_unique!(n)
160
+ u = self[n]
161
+ @datasets = {}
162
+ @datasets[n] = u
163
+ end
164
+ def size
165
+ self.datasets.length
166
+ end
167
+ end
168
+
169
+ ##### MAIN:
170
+ begin
171
+ $stderr.puts "Parsing metadata." unless o[:q]
172
+ metadata = Metadata.new
173
+ metadata.load_table(o[:metadata]) unless o[:metadata].nil?
174
+ metadata.set_unique! o[:unique] unless o[:unique].nil?
175
+
176
+
177
+ $stderr.puts "Loading jplace file." unless o[:q]
178
+ ifh = File.open(o[:in], 'r')
179
+ jplace = JSON.load(ifh)
180
+ ifh.close
181
+
182
+
183
+ $stderr.puts "Parsing tree." unless o[:q]
184
+ tree = JPlace::Tree.from_nwk(jplace["tree"])
185
+
186
+
187
+ $stderr.puts "Parsing placements." unless o[:q]
188
+ JPlace::Placement.fields = jplace["fields"]
189
+ placements_n = 0
190
+ jplace["placements"].each do |placement|
191
+ JPlace::Node.link_placement(JPlace::Placement.new(placement))
192
+ placements_n += 1
193
+ end
194
+ $stderr.puts " #{placements_n} placements." unless o[:q]
195
+ tree.pre_order do |n|
196
+ n.placements.each do |p|
197
+ p.nm.each do |r|
198
+ m = (o[:unique].nil? ? (/#{o[:regex]}/.match(r[:n]) or
199
+ abort "Cannot parse read name: #{r[:n]}, placed at edge #{n.index}") : {:dataset=>o[:unique]})
200
+ metadata[ m[:dataset] ].add_count(r[:m])
201
+ end
202
+ end
203
+ end
204
+
205
+
206
+ unless o[:collapse].nil?
207
+ $stderr.puts "Collapsing nodes." unless o[:q]
208
+ collapse = File.readlines(o[:collapse]).map do |ln|
209
+ l = ln.chomp.split(/\t/)
210
+ l[1] = l[0] if l[1].nil?
211
+ l
212
+ end.inject({}) do |hash,ar|
213
+ hash[ar[0]] = ar[1]
214
+ hash
215
+ end
216
+ f = File.open(o[:out] + ".collapse", 'w')
217
+ coll_n = 0
218
+ tree.pre_order do |n|
219
+ if collapse.keys.include? n.cannonical_name
220
+ n.collapse!
221
+ n.name = collapse[n.cannonical_name]
222
+ f.puts n.name
223
+ coll_n += 1
224
+ end
225
+ end
226
+ f.close
227
+ $stderr.puts " #{coll_n} nodes collapsed (#{collapse.length} requested)." unless o[:q]
228
+ end
229
+
230
+
231
+ $stderr.puts "Estimating normalizing factors by #{o[:norm].to_s}." unless o[:q] or o[:norm]==:none
232
+ case o[:norm]
233
+ when :none
234
+ metadata.datasets.values.each{ |d| d.add_datum :norm, 1.0 }
235
+ when :counts
236
+ metadata.datasets.values.each{ |d| d.add_datum :norm, d.count.to_f }
237
+ when :size
238
+ abort "Column 'size' required in metadata." if metadata.datasets.values[0].size.nil?
239
+ metadata.datasets.values.each{ |d| d.add_datum :norm, d.size.to_f }
240
+ when :norm
241
+ abort "Column 'norm' required in metadata." if metadata.datasets.values[0].norm.nil?
242
+ end
243
+ max_norm = metadata.datasets.values.map{ |d| d.norm }.max
244
+
245
+
246
+ $stderr.puts "Generating iToL dataset." unless o[:q]
247
+ f = File.open(o[:out] + ".itol.txt", "w")
248
+ if o[:olditol]
249
+ f.puts "LABELS\t" + metadata.names.join("\t")
250
+ f.puts "COLORS\t" + metadata.colors.join("\t")
251
+ else
252
+ f.puts "DATASET_PIECHART"
253
+ f.puts "SEPARATOR TAB"
254
+ f.puts "DATASET_LABEL\tReadPlacement"
255
+ f.puts "COLOR\t#1f2122"
256
+ f.puts "FIELD_LABELS\t" + metadata.names.join("\t")
257
+ f.puts "FIELD_COLORS\t" + metadata.colors.join("\t")
258
+ f.puts "DATA"
259
+ end
260
+ max_norm_sum,min_norm_sum,max_norm_n,min_norm_n = 0.0,Float::INFINITY,"",""
261
+ tree.pre_order do |n|
262
+ ds_counts = Hash.new(0.0)
263
+ n.placements.each do |p|
264
+ p.nm.each do |r|
265
+ m = (o[:unique].nil? ? (/#{o[:regex]}/.match(r[:n]) or
266
+ abort "Cannot parse read name: #{r[:n]}, placed at edge #{n.index}") : {:dataset=>o[:unique]})
267
+ ds_counts[ m[:dataset] ] += r[:m] / metadata[ m[:dataset] ].norm
268
+ end
269
+ end
270
+ counts_sum = ds_counts.values.reduce(:+)
271
+ unless counts_sum.nil?
272
+ # In the area option, the radius is "twice" to make the smallest > 1 (since counts_sum is >= 1)
273
+ radius = (o[:area] ? 2*Math.sqrt(counts_sum/Math::PI) : counts_sum)*max_norm
274
+ f.puts n.cannonical_name +
275
+ "#{"\t0.5" unless o[:olditol]}\t#{"R" if o[:olditol]}" +
276
+ radius.to_i.to_s + "\t" +
277
+ metadata.names.map{ |n| ds_counts[n] }.join("\t")
278
+ if counts_sum > max_norm_sum
279
+ max_norm_n = n.cannonical_name
280
+ max_norm_sum = counts_sum
281
+ end
282
+ if counts_sum < min_norm_sum
283
+ min_norm_n = n.cannonical_name
284
+ min_norm_sum = counts_sum
285
+ end
286
+ end
287
+ end
288
+ f.close
289
+ units = {:none=>'', :counts=>' per million placements', :size=>' per million reads', :norm=>' per normalizing unit'}
290
+ $stderr.puts " The pie #{o[:area] ? 'areas' : 'radii'} are proportional to the placements#{units[o[:norm]]}." unless o[:q]
291
+ $stderr.puts " The minimum radius (#{min_norm_n}) represents #{min_norm_sum*(([:none, :norm].include? o[:norm]) ? 1 : 1e6)} placements#{units[o[:norm]]}." unless o[:q]
292
+ $stderr.puts " The maximum radius (#{max_norm_n}) represents #{max_norm_sum*(([:none, :norm].include? o[:norm]) ? 1 : 1e6)} placements#{units[o[:norm]]}." unless o[:q]
293
+
294
+
295
+ $stderr.puts "Re-formatting tree for iToL." unless o[:q]
296
+ f = File.open(o[:out]+'.nwk', "w")
297
+ f.puts tree.to_s+';'
298
+ f.close
299
+
300
+ rescue => err
301
+ $stderr.puts "Exception: #{err}\n\n"
302
+ err.backtrace.each { |l| $stderr.puts l + "\n" }
303
+ err
304
+ end
305
+
306
+