bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -0,0 +1,1854 @@
1
+ #
2
+ # = bio/db/nexus.rb - Nexus Standard phylogenetic tree parser / formatter
3
+ #
4
+ # Copyright:: Copyright (C) 2006 Christian M Zmasek <cmzmasek@yahoo.com>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: nexus.rb,v 1.3 2007/04/05 23:35:40 trevor Exp $
9
+ #
10
+ # == Description
11
+ #
12
+ # This file contains classes that implement a parser for NEXUS formatted
13
+ # data as well as objects to store, access, and write the parsed data.
14
+ #
15
+ # The following five blocks:
16
+ # taxa, characters, distances, trees, data
17
+ # are recognizable and parsable.
18
+ #
19
+ # The parser can deal with (nested) comments (indicated by square brackets),
20
+ # unless the comments are inside a command or data item (e.g.
21
+ # "Dim[comment]ensions" or inside a matrix).
22
+ #
23
+ # Single or double quoted TaxLabels are processed as follows (by way
24
+ # of example): "mus musculus" -> mus_musculus
25
+ #
26
+ #
27
+ # == USAGE
28
+ #
29
+ # require 'bio/db/nexus'
30
+ #
31
+ # # Create a new parser:
32
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
33
+ #
34
+ # # Get first taxa block:
35
+ # taxa_block = nexus.get_taxa_blocks[ 0 ]
36
+ # # Get number of taxa:
37
+ # number_of_taxa = taxa_block.get_number_of_taxa.to_i
38
+ # # Get name of first taxon:
39
+ # first_taxon = taxa_block.get_taxa[ 0 ]
40
+ #
41
+ # # Get first data block:
42
+ # data_block = nexus.get_data_blocks[ 0 ]
43
+ # # Get first characters name:
44
+ # seq_name = data_block.get_row_name( 0 )
45
+ # # Get first characters row named "taxon_2" as Bio::Sequence sequence:
46
+ # seq_tax_2 = data_block.get_sequences_by_name( "taxon_2" )[ 0 ]
47
+ # # Get third characters row as Bio::Sequence sequence:
48
+ # seq_2 = data_block.get_sequence( 2 )
49
+ # # Get first characters row named "taxon_3" as String:
50
+ # string_tax_3 = data_block.get_characters_strings_by_name( "taxon_3" )
51
+ # # Get name of first taxon:
52
+ # taxon_0 = data_block.get_taxa[ 0 ]
53
+ # # Get characters matrix as Bio::Nexus::NexusMatrix (names are in column 0)
54
+ # characters_matrix = data_block.get_matrix
55
+ #
56
+ # # Get first characters block (same methods as Nexus::DataBlock except
57
+ # # it lacks get_taxa method):
58
+ # characters_block = nexus.get_characters_blocks[ 0 ]
59
+ #
60
+ # # Get trees block(s):
61
+ # trees_block = nexus.get_trees_blocks[ 0 ]
62
+ # # Get first tree named "best" as String:
63
+ # string_fish = trees_block.get_tree_strings_by_name( "best" )[ 0 ]
64
+ # # Get first tree named "best" as Bio::Db::Newick object:
65
+ # tree_fish = trees_block.get_trees_by_name( "best" )[ 0 ]
66
+ # # Get first tree as Bio::Db::Newick object:
67
+ # tree_first = trees_block.get_tree( 0 )
68
+ #
69
+ # # Get distances block(s):
70
+ # distances_blocks = nexus.get_distances_blocks
71
+ # # Get matrix as Bio::Nexus::NexusMatrix object:
72
+ # matrix = distances_blocks[ 0 ].get_matrix
73
+ # # Get value (column 0 are names):
74
+ # val = matrix.get_value( 1, 5 )
75
+ #
76
+ # # Get blocks for which no class exists (private blocks):
77
+ # private_blocks = nexus.get_blocks_by_name( "my_block" )
78
+ # # Get first block names "my_block":
79
+ # my_block_0 = private_blocks[ 0 ]
80
+ # # Get first token in first block names "my_block":
81
+ # first_token = my_block_0.get_tokens[ 0 ]
82
+ #
83
+ #
84
+ # == References
85
+ #
86
+ # * Maddison DR, Swofford DL, Maddison WP (1997). NEXUS: an extensible file
87
+ # format for systematic information.
88
+ # Syst Biol. 1997 46(4):590-621.
89
+ #
90
+
91
+ require 'bio/sequence'
92
+ require 'bio/tree'
93
+ require 'bio/db/newick'
94
+
95
+ module Bio
96
+
97
+ # == DESCRIPTION
98
+ # Bio::Nexus is a parser for nexus formatted data.
99
+ # It contains classes and constants enabling the representation and
100
+ # processing of nexus data.
101
+ #
102
+ # == USAGE
103
+ #
104
+ # # Parsing a nexus formatted string str:
105
+ # nexus = Bio::Nexus.new( nexus_str )
106
+ #
107
+ # # Obtaining of the nexus blocks as array of GenericBlock or
108
+ # # any of its subclasses (such as DistancesBlock):
109
+ # blocks = nexus.get_blocks
110
+ #
111
+ # # Getting a block by name:
112
+ # my_blocks = nexus.get_blocks_by_name( "my_block" )
113
+ #
114
+ # # Getting distance blocks:
115
+ # distances_blocks = nexus.get_distances_blocks
116
+ #
117
+ # # Getting trees blocks:
118
+ # trees_blocks = nexus.get_trees_blocks
119
+ #
120
+ # # Getting data blocks:
121
+ # data_blocks = nexus.get_data_blocks
122
+ #
123
+ # # Getting characters blocks:
124
+ # character_blocks = nexus.get_characters_blocks
125
+ #
126
+ # # Getting taxa blocks:
127
+ # taxa_blocks = nexus.get_taxa_blocks
128
+ #
129
+ class Nexus
130
+
131
+
132
+ END_OF_LINE = "\n"
133
+ INDENTENTION = " "
134
+ DOUBLE_QUOTE = "\""
135
+ SINGLE_QUOTE = "'"
136
+
137
+
138
+ BEGIN_NEXUS = "#NEXUS"
139
+ DELIMITER = ";"
140
+ BEGIN_BLOCK = "Begin"
141
+ END_BLOCK = "End" + DELIMITER
142
+ BEGIN_COMMENT = "["
143
+ END_COMMENT = "]"
144
+
145
+
146
+ TAXA = "Taxa"
147
+ CHARACTERS = "Characters"
148
+ DATA = "Data"
149
+ DISTANCES = "Distances"
150
+ TREES = "Trees"
151
+ TAXA_BLOCK = TAXA + DELIMITER
152
+ CHARACTERS_BLOCK = CHARACTERS + DELIMITER
153
+ DATA_BLOCK = DATA + DELIMITER
154
+ DISTANCES_BLOCK = DISTANCES + DELIMITER
155
+ TREES_BLOCK = TREES + DELIMITER
156
+
157
+
158
+ DIMENSIONS = "Dimensions"
159
+ FORMAT = "Format"
160
+ NTAX = "NTax"
161
+ NCHAR = "NChar"
162
+ DATATYPE = "DataType"
163
+ TAXLABELS = "TaxLabels"
164
+ MATRIX = "Matrix"
165
+ # End of constants.
166
+
167
+
168
+ # Nexus parse error class,
169
+ # indicates error during parsing of nexus formatted data.
170
+ class NexusParseError < RuntimeError; end
171
+
172
+ # Creates a new nexus parser for 'nexus_str'.
173
+ #
174
+ # ---
175
+ # *Arguments*:
176
+ # * (required) _nexus_str_: String - nexus formatted data
177
+ def initialize( nexus_str )
178
+ @blocks = Array.new
179
+ @current_cmd = nil
180
+ @current_subcmd = nil
181
+ @current_block_name = nil
182
+ @current_block = nil
183
+ parse( nexus_str )
184
+ end
185
+
186
+
187
+ # Returns an Array of all blocks found in the String 'nexus_str'
188
+ # set via Bio::Nexus.new( nexus_str ).
189
+ #
190
+ # ---
191
+ # *Returns*:: Array of GenericBlocks or any of its subclasses
192
+ def get_blocks
193
+ @blocks
194
+ end
195
+
196
+ # A convenience methods which returns an array of
197
+ # all nexus blocks for which the name equals 'name' found
198
+ # in the String 'nexus_str' set via Bio::Nexus.new( nexus_str ).
199
+ #
200
+ # ---
201
+ # *Arguments*:
202
+ # * (required) _name_: String
203
+ # *Returns*:: Array of GenericBlocks or any of its subclasses
204
+ def get_blocks_by_name( name )
205
+ found_blocks = Array.new
206
+ @blocks.each do | block |
207
+ if ( name == block.get_name )
208
+ found_blocks.push( block )
209
+ end
210
+ end
211
+ found_blocks
212
+ end
213
+
214
+ # A convenience methods which returns an array of
215
+ # all data blocks.
216
+ #
217
+ # ---
218
+ # *Returns*:: Array of DataBlocks
219
+ def get_data_blocks
220
+ get_blocks_by_name( DATA_BLOCK.chomp( ";").downcase )
221
+ end
222
+
223
+ # A convenience methods which returns an array of
224
+ # all characters blocks.
225
+ #
226
+ # ---
227
+ # *Returns*:: Array of CharactersBlocks
228
+ def get_characters_blocks
229
+ get_blocks_by_name( CHARACTERS_BLOCK.chomp( ";").downcase )
230
+ end
231
+
232
+ # A convenience methods which returns an array of
233
+ # all trees blocks.
234
+ #
235
+ # ---
236
+ # *Returns*:: Array of TreesBlocks
237
+ def get_trees_blocks
238
+ get_blocks_by_name( TREES_BLOCK.chomp( ";").downcase )
239
+ end
240
+
241
+ # A convenience methods which returns an array of
242
+ # all distances blocks.
243
+ #
244
+ # ---
245
+ # *Returns*:: Array of DistancesBlock
246
+ def get_distances_blocks
247
+ get_blocks_by_name( DISTANCES_BLOCK.chomp( ";").downcase )
248
+ end
249
+
250
+ # A convenience methods which returns an array of
251
+ # all taxa blocks.
252
+ #
253
+ # ---
254
+ # *Returns*:: Array of TaxaBlocks
255
+ def get_taxa_blocks
256
+ get_blocks_by_name( TAXA_BLOCK.chomp( ";").downcase )
257
+ end
258
+
259
+ # Returns a String listing how many of each blocks it parsed.
260
+ #
261
+ # ---
262
+ # *Returns*:: String
263
+ def to_s
264
+ str = String.new
265
+ if get_blocks.length < 1
266
+ str << "empty"
267
+ else
268
+ str << "number of blocks: " << get_blocks.length.to_s
269
+ if get_characters_blocks.length > 0
270
+ str << " [characters blocks: " << get_characters_blocks.length.to_s << "] "
271
+ end
272
+ if get_data_blocks.length > 0
273
+ str << " [data blocks: " << get_data_blocks.length.to_s << "] "
274
+ end
275
+ if get_distances_blocks.length > 0
276
+ str << " [distances blocks: " << get_distances_blocks.length.to_s << "] "
277
+ end
278
+ if get_taxa_blocks.length > 0
279
+ str << " [taxa blocks: " << get_taxa_blocks.length.to_s << "] "
280
+ end
281
+ if get_trees_blocks.length > 0
282
+ str << " [trees blocks: " << get_trees_blocks.length.to_s << "] "
283
+ end
284
+ end
285
+ str
286
+ end
287
+ alias to_str to_s
288
+
289
+ private
290
+
291
+ # The master method for parsing.
292
+ # Stores the resulting block in array @blocks.
293
+ #
294
+ # ---
295
+ # *Arguments*:
296
+ # * (required) _str_: String - the String to be parsed
297
+ def parse( str )
298
+ str = str.chop if str[-1..-1] == ';'
299
+ ary = str.split(/[\s+=]/)
300
+ ary.collect! { |x| x.strip!; x.empty? ? nil : x }
301
+ ary.compact!
302
+ in_comment = false
303
+ comment_level = 0
304
+
305
+ # Main loop
306
+ while token = ary.shift
307
+ # Quotes:
308
+ if ( token.index( SINGLE_QUOTE ) == 0 ||
309
+ token.index( DOUBLE_QUOTE ) == 0 )
310
+ token << "_" << ary.shift
311
+ token = token.chop if token[-1..-1] == ';'
312
+ token = token.slice( 1, token.length - 2 )
313
+ end
314
+ # Comments:
315
+ open = token.count( BEGIN_COMMENT )
316
+ close = token.count( END_COMMENT )
317
+ comment = comment_level > 0
318
+ comment_level = comment_level + open - close
319
+ if ( open > 0 && open == close )
320
+ next
321
+ elsif comment_level > 0 || comment
322
+ next
323
+ elsif equal?( token, END_BLOCK )
324
+ end_block()
325
+ elsif equal?( token, BEGIN_BLOCK )
326
+ begin_block()
327
+ @current_block_name = token = ary.shift
328
+ @current_block_name.downcase!
329
+ @current_block = create_block()
330
+ @blocks.push( @current_block )
331
+ elsif ( @current_block_name != nil )
332
+ process_token( token.chomp( DELIMITER ), ary )
333
+ end
334
+ end # main loop
335
+ @blocks.compact!
336
+ end # parse
337
+
338
+ # Operations required when beginnig of block encountered.
339
+ #
340
+ # ---
341
+ def begin_block()
342
+ if @current_block_name != nil
343
+ raise NexusParseError, "Cannot have nested nexus blocks (\"end;\" might be missing)"
344
+ end
345
+ reset_command_state()
346
+ end
347
+
348
+ # Operations required when ending of block encountered.
349
+ #
350
+ # ---
351
+ def end_block()
352
+ if @current_block_name == nil
353
+ raise NexusParseError, "Cannot have two or more \"end;\" tokens in sequence"
354
+ end
355
+ @current_block_name = nil
356
+ end
357
+
358
+ # This calls various process_token_for_<name>_block methods
359
+ # depeding on state of @current_block_name.
360
+ #
361
+ # ---
362
+ # *Arguments*:
363
+ # * (required) _token_: String
364
+ # * (required) _ary_: Array
365
+ def process_token( token, ary )
366
+ case @current_block_name
367
+ when TAXA_BLOCK.downcase
368
+ process_token_for_taxa_block( token )
369
+ when CHARACTERS_BLOCK.downcase
370
+ process_token_for_character_block( token, ary )
371
+ when DATA_BLOCK.downcase
372
+ process_token_for_data_block( token, ary )
373
+ when DISTANCES_BLOCK.downcase
374
+ process_token_for_distances_block( token, ary )
375
+ when TREES_BLOCK.downcase
376
+ process_token_for_trees_block( token, ary )
377
+ else
378
+ process_token_for_generic_block( token )
379
+ end
380
+ end
381
+
382
+ # Resets @current_cmd and @current_subcmd to nil.
383
+ #
384
+ # ---
385
+ def reset_command_state()
386
+ @current_cmd = nil
387
+ @current_subcmd = nil
388
+ end
389
+
390
+ # Creates GenericBlock (or any of its subclasses) the type of
391
+ # which is determined by the state of @current_block_name.
392
+ #
393
+ # ---
394
+ # *Returns*:: GenericBlock (or any of its subclasses) object
395
+ def create_block()
396
+ case @current_block_name
397
+ when TAXA_BLOCK.downcase
398
+ return Bio::Nexus::TaxaBlock.new( @current_block_name )
399
+ when CHARACTERS_BLOCK.downcase
400
+ return Bio::Nexus::CharactersBlock.new( @current_block_name )
401
+ when DATA_BLOCK.downcase
402
+ return Bio::Nexus::DataBlock.new( @current_block_name )
403
+ when DISTANCES_BLOCK.downcase
404
+ return Bio::Nexus::DistancesBlock.new( @current_block_name )
405
+ when TREES_BLOCK.downcase
406
+ return Bio::Nexus::TreesBlock.new( @current_block_name )
407
+ else
408
+ return Bio::Nexus::GenericBlock.new( @current_block_name )
409
+ end
410
+ end
411
+
412
+ # This processes the tokens (between Begin Taxa; and End;) for a taxa block
413
+ # Example of a currently parseable taxa block:
414
+ # Begin Taxa;
415
+ # Dimensions NTax=4;
416
+ # TaxLabels fish [comment] 'african frog' "rat snake" 'red mouse';
417
+ # End;
418
+ #
419
+ # ---
420
+ # *Arguments*:
421
+ # * (required) _token_: String
422
+ def process_token_for_taxa_block( token )
423
+ if ( equal?( token, DIMENSIONS ) )
424
+ @current_cmd = DIMENSIONS
425
+ @current_subcmd = nil
426
+ elsif ( equal?( token, TAXLABELS ) )
427
+ @current_cmd = TAXLABELS
428
+ @current_subcmd = nil
429
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
430
+ @current_subcmd = NTAX
431
+ elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
432
+ @current_block.set_number_of_taxa( token )
433
+ elsif ( cmds_equal_to?( TAXLABELS, nil ) )
434
+ @current_block.add_taxon( token )
435
+ end
436
+ end
437
+
438
+ # This processes the tokens (between Begin Taxa; and End;) for a character
439
+ # block
440
+ # Example of a currently parseable character block:
441
+ # Begin Characters;
442
+ # Dimensions NChar=20
443
+ # NTax=4;
444
+ # Format DataType=DNA
445
+ # Missing=x
446
+ # Gap=- MatchChar=.;
447
+ # Matrix
448
+ # fish ACATA GAGGG TACCT CTAAG
449
+ # frog ACTTA GAGGC TACCT CTAGC
450
+ # snake ACTCA CTGGG TACCT TTGCG
451
+ # mouse ACTCA GACGG TACCT TTGCG;
452
+ # End;
453
+ #
454
+ # ---
455
+ # *Arguments*:
456
+ # * (required) _token_: String
457
+ # * (required) _ary_: Array
458
+ def process_token_for_character_block( token, ary )
459
+ if ( equal?( token, DIMENSIONS ) )
460
+ @current_cmd = DIMENSIONS
461
+ @current_subcmd = nil
462
+ elsif ( equal?( token, FORMAT ) )
463
+ @current_cmd = FORMAT
464
+ @current_subcmd = nil
465
+ elsif ( equal?( token, MATRIX ) )
466
+ @current_cmd = MATRIX
467
+ @current_subcmd = nil
468
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
469
+ @current_subcmd = NTAX
470
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
471
+ @current_subcmd = NCHAR
472
+ elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
473
+ @current_subcmd = DATATYPE
474
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MISSING ) )
475
+ @current_subcmd = CharactersBlock::MISSING
476
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::GAP ) )
477
+ @current_subcmd = CharactersBlock::GAP
478
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MATCHCHAR ) )
479
+ @current_subcmd = CharactersBlock::MATCHCHAR
480
+ elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
481
+ @current_block.set_number_of_taxa( token )
482
+ elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
483
+ @current_block.set_number_of_characters( token )
484
+ elsif ( cmds_equal_to?( FORMAT, DATATYPE ) )
485
+ @current_block.set_datatype( token )
486
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MISSING ) )
487
+ @current_block.set_missing( token )
488
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::GAP ) )
489
+ @current_block.set_gap_character( token )
490
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MATCHCHAR ) )
491
+ @current_block.set_match_character( token )
492
+ elsif ( cmds_equal_to?( MATRIX, nil ) )
493
+ @current_block.set_matrix( make_matrix( token, ary,
494
+ @current_block.get_number_of_characters, true ) )
495
+ end
496
+ end
497
+
498
+ # This processes the tokens (between Begin Trees; and End;) for a trees block
499
+ # Example of a currently parseable taxa block:
500
+ # Begin Trees;
501
+ # Tree best=(fish,(frog,(snake, mouse)));
502
+ # Tree other=(snake,(frog,( fish, mouse)));
503
+ # End;
504
+ #
505
+ # ---
506
+ # *Arguments*:
507
+ # * (required) _token_: String
508
+ # * (required) _ary_: Array
509
+ def process_token_for_trees_block( token, ary )
510
+ if ( equal?( token, TreesBlock::TREE ) )
511
+ @current_cmd = TreesBlock::TREE
512
+ @current_subcmd = nil
513
+ elsif ( cmds_equal_to?( TreesBlock::TREE, nil ) )
514
+ @current_block.add_tree_name( token )
515
+ tree_string = ary.shift
516
+ while ( tree_string.index( ";" ) == nil )
517
+ tree_string << ary.shift
518
+ end
519
+ @current_block.add_tree( tree_string )
520
+ @current_cmd = nil
521
+ end
522
+ end
523
+
524
+ # This processes the tokens (between Begin Taxa; and End;) for a character
525
+ # block.
526
+ # Example of a currently parseable character block:
527
+ # Begin Distances;
528
+ # Dimensions nchar=20 ntax=5;
529
+ # Format Triangle=Upper;
530
+ # Matrix
531
+ # taxon_1 0.0 1.0 2.0 4.0 7.0
532
+ # taxon_2 1.0 0.0 3.0 5.0 8.0
533
+ # taxon_3 3.0 4.0 0.0 6.0 9.0
534
+ # taxon_4 7.0 3.0 1.0 0.0 9.5
535
+ # taxon_5 1.2 1.3 1.4 1.5 0.0;
536
+ # End;
537
+ #
538
+ # ---
539
+ # *Arguments*:
540
+ # * (required) _token_: String
541
+ # * (required) _ary_: Array
542
+ def process_token_for_distances_block( token, ary )
543
+ if ( equal?( token, DIMENSIONS ) )
544
+ @current_cmd = DIMENSIONS
545
+ @current_subcmd = nil
546
+ elsif ( equal?( token, FORMAT ) )
547
+ @current_cmd = FORMAT
548
+ @current_subcmd = nil
549
+ elsif ( equal?( token, MATRIX ) )
550
+ @current_cmd = MATRIX
551
+ @current_subcmd = nil
552
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
553
+ @current_subcmd = NTAX
554
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
555
+ @current_subcmd = NCHAR
556
+ elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
557
+ @current_subcmd = DATATYPE
558
+ elsif ( @current_cmd == FORMAT && equal?( token, DistancesBlock::TRIANGLE ) )
559
+ @current_subcmd = DistancesBlock::TRIANGLE
560
+ elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
561
+ @current_block.set_number_of_taxa( token )
562
+ elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
563
+ @current_block.set_number_of_characters( token )
564
+ elsif ( cmds_equal_to?( FORMAT, DistancesBlock::TRIANGLE ) )
565
+ @current_block.set_triangle( token )
566
+ elsif ( cmds_equal_to?( MATRIX, nil ) )
567
+ @current_block.set_matrix( make_matrix( token, ary,
568
+ @current_block.get_number_of_taxa, false ) )
569
+ end
570
+ end
571
+
572
+ # This processes the tokens (between Begin Taxa; and End;) for a data
573
+ # block.
574
+ # Example of a currently parseable data block:
575
+ # Begin Data;
576
+ # Dimensions ntax=5 nchar=14;
577
+ # Format Datatype=RNA gap=# MISSING=x MatchChar=^;
578
+ # TaxLabels ciona cow [comment] ape 'purple urchin' "green lizard";
579
+ # Matrix
580
+ # taxon_1 A- CCGTCGA-GTTA
581
+ # taxon_2 T- CCG-CGA-GATA
582
+ # taxon_3 A- C-GTCGA-GATA
583
+ # taxon_4 A- CCTCGA--GTTA
584
+ # taxon_5 T- CGGTCGT-CTTA;
585
+ # End;
586
+ #
587
+ # ---
588
+ # *Arguments*:
589
+ # * (required) _token_: String
590
+ # * (required) _ary_: Array
591
+ def process_token_for_data_block( token, ary )
592
+ if ( equal?( token, DIMENSIONS ) )
593
+ @current_cmd = DIMENSIONS
594
+ @current_subcmd = nil
595
+ elsif ( equal?( token, FORMAT ) )
596
+ @current_cmd = FORMAT
597
+ @current_subcmd = nil
598
+ elsif ( equal?( token, TAXLABELS ) )
599
+ @current_cmd = TAXLABELS
600
+ @current_subcmd = nil
601
+ elsif ( equal?( token, MATRIX ) )
602
+ @current_cmd = MATRIX
603
+ @current_subcmd = nil
604
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
605
+ @current_subcmd = NTAX
606
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
607
+ @current_subcmd = NCHAR
608
+ elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
609
+ @current_subcmd = DATATYPE
610
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MISSING ) )
611
+ @current_subcmd = CharactersBlock::MISSING
612
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::GAP ) )
613
+ @current_subcmd = CharactersBlock::GAP
614
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MATCHCHAR ) )
615
+ @current_subcmd = CharactersBlock::MATCHCHAR
616
+ elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
617
+ @current_block.set_number_of_taxa( token )
618
+ elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
619
+ @current_block.set_number_of_characters( token )
620
+ elsif ( cmds_equal_to?( FORMAT, DATATYPE ) )
621
+ @current_block.set_datatype( token )
622
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MISSING ) )
623
+ @current_block.set_missing( token )
624
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::GAP ) )
625
+ @current_block.set_gap_character( token )
626
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MATCHCHAR ) )
627
+ @current_block.set_match_character( token )
628
+ elsif ( cmds_equal_to?( TAXLABELS, nil ) )
629
+ @current_block.add_taxon( token )
630
+ elsif ( cmds_equal_to?( MATRIX, nil ) )
631
+ @current_block.set_matrix( make_matrix( token, ary,
632
+ @current_block.get_number_of_characters, true ) )
633
+ end
634
+ end
635
+
636
+ # Makes a NexusMatrix out of token from token Array ary
637
+ # Used by process_token_for_X_block methods which contain
638
+ # data in a matrix form. Column 0 contains names.
639
+ # This will shift tokens from ary.
640
+ # ---
641
+ # *Arguments*:
642
+ # * (required) _token_: String
643
+ # * (required) _ary_: Array
644
+ # * (required) _size_: Integer
645
+ # * (optional) _scan_token_: true or false
646
+ # *Returns*:: NexusMatrix
647
+ def make_matrix( token, ary, size, scan_token = false )
648
+ matrix = NexusMatrix.new
649
+ col = -1
650
+ row = 0
651
+ done = false
652
+ while ( !done )
653
+ if ( col == -1 )
654
+ # name
655
+ col = 0
656
+ matrix.set_value( row, col, token ) # name is in col 0
657
+ else
658
+ # values
659
+ col = add_token_to_matrix( token, scan_token, matrix, row, col )
660
+ if ( col == size.to_i )
661
+ col = -1
662
+ row += 1
663
+ end
664
+ end
665
+ token = ary.shift
666
+ if ( token.index( DELIMITER ) != nil )
667
+ col = add_token_to_matrix( token.chomp( ";" ), scan_token, matrix, row, col )
668
+ done = true
669
+ end
670
+ end # while
671
+ matrix
672
+ end
673
+
674
+ # Helper method for make_matrix.
675
+ #
676
+ # ---
677
+ # *Arguments*:
678
+ # * (required) _token_: String
679
+ # * (required) _scan_token_: true or false - add whole token
680
+ # or
681
+ # scan into chars
682
+ # * (required) _matrix_: NexusMatrix - the matrix to which to add token
683
+ # * (required) _row_: Integer - the row for matrix
684
+ # * (required) _col_: Integer - the starting row
685
+ # *Returns*:: Integer - ending row
686
+ def add_token_to_matrix( token, scan_token, matrix, row, col )
687
+ if ( scan_token )
688
+ token.scan(/./) { |w|
689
+ col += 1
690
+ matrix.set_value( row, col, w )
691
+ }
692
+ else
693
+ col += 1
694
+ matrix.set_value( row, col, token )
695
+ end
696
+ col
697
+ end
698
+
699
+ # This processes the tokens (between Begin Taxa; and End;) for a block
700
+ # for which a specific parser is not available.
701
+ # Example of a currently parseable generic block:
702
+ # Begin Taxa;
703
+ # token1 token2 token3 ...
704
+ # End;
705
+ #
706
+ # ---
707
+ # *Arguments*:
708
+ # * (required) _token_: String
709
+ def process_token_for_generic_block( token )
710
+ @current_block.add_token( token )
711
+ end
712
+
713
+ # Returns true if Strings str1 and str2 are
714
+ # equal - ignoring case.
715
+ #
716
+ # ---
717
+ # *Arguments*:
718
+ # * (required) _str1_: String
719
+ # * (required) _str2_: String
720
+ # *Returns*:: true or false
721
+ def equal?( str1, str2 )
722
+ if ( str1 == nil || str2 == nil )
723
+ return false
724
+ else
725
+ return ( str1.downcase == str2.downcase )
726
+ end
727
+ end
728
+
729
+ # Returns true if @current_cmd == command
730
+ # and @current_subcmd == subcommand, false otherwise
731
+ # ---
732
+ # *Arguments*:
733
+ # * (required) _command_: String
734
+ # * (required) _subcommand_: String
735
+ # *Returns*:: true or false
736
+ def cmds_equal_to?( command, subcommand )
737
+ return ( @current_cmd == command && @current_subcmd == subcommand )
738
+ end
739
+
740
+ # Classes to represent nexus data follow.
741
+
742
+ # == DESCRIPTION
743
+ # Bio::Nexus::GenericBlock represents a generic nexus block.
744
+ # It is mainly intended to be extended into more specific classes,
745
+ # although it is used for blocks not represented by more specific
746
+ # block classes.
747
+ # It has a name and a array for the tokenized content of a
748
+ # nexus block.
749
+ #
750
+ # == USAGE
751
+ #
752
+ # require 'bio/db/nexus'
753
+ #
754
+ # # Create a new parser:
755
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
756
+ #
757
+ # # Get blocks for which no class exists (private blocks)
758
+ # as Nexus::GenericBlock:
759
+ # private_blocks = nexus.get_blocks_by_name( "my_block" )
760
+ # # Get first block names "my_block":
761
+ # my_block_0 = private_blocks[ 0 ]
762
+ # # Get first token in first block names "my_block":
763
+ # first_token = my_block_0.get_tokens[ 0 ]
764
+ # # Get name of block (would return "my_block" in this case):
765
+ # name = my_block_0.get_name
766
+ # # Return data of block as nexus formatted String:
767
+ # name = my_block_0.to_nexus
768
+ #
769
+ class GenericBlock
770
+
771
+ # Creates a new GenericBlock object named 'name'.
772
+ # ---
773
+ # *Arguments*:
774
+ # * (required) _name_: String
775
+ def initialize( name )
776
+ @name = name.chomp(";")
777
+ @tokens = Array.new
778
+ end
779
+
780
+ # Gets the name of this block.
781
+ #
782
+ # ---
783
+ # *Returns*:: String
784
+ def get_name
785
+ @name
786
+ end
787
+
788
+ # Returns contents as Array of Strings.
789
+ #
790
+ # ---
791
+ # *Returns*:: Array
792
+ def get_tokens
793
+ @tokens
794
+ end
795
+
796
+ # Same as to_nexus.
797
+ #
798
+ # ---
799
+ # *Returns*:: String
800
+ def to_s
801
+ to_nexus
802
+ end
803
+ alias to_str to_s
804
+
805
+ # Should return a String describing this block as nexus formatted data.
806
+ # ---
807
+ # *Returns*:: String
808
+ def to_nexus
809
+ str = "generic block \"" + get_name + "\" [do not know how to write in nexus format]"
810
+ end
811
+
812
+ # Adds a token to this.
813
+ #
814
+ # ---
815
+ # *Arguments*:
816
+ # * (required) _token_: String
817
+ def add_token( token )
818
+ @tokens.push( token )
819
+ end
820
+
821
+ end # class GenericBlock
822
+
823
+
824
+ # == DESCRIPTION
825
+ # Bio::Nexus::TaxaBlock represents a taxa nexus block.
826
+ #
827
+ # = Example of Taxa block:
828
+ # Begin Taxa;
829
+ # Dimensions NTax=4;
830
+ # TaxLabels fish [comment] 'african frog' "rat snake" 'red mouse';
831
+ # End;
832
+ #
833
+ # == USAGE
834
+ #
835
+ # require 'bio/db/nexus'
836
+ #
837
+ # # Create a new parser:
838
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
839
+ #
840
+ # # Get first taxa block:
841
+ # taxa_block = nexus.get_taxa_blocks[ 0 ]
842
+ # # Get number of taxa:
843
+ # number_of_taxa = taxa_block.get_number_of_taxa.to_i
844
+ # # Get name of first taxon:
845
+ # first_taxon = taxa_block.get_taxa[ 0 ]
846
+ #
847
+ class TaxaBlock < GenericBlock
848
+
849
+ # Creates a new TaxaBlock object named 'name'.
850
+ # ---
851
+ # *Arguments*:
852
+ # * (required) _name_: String
853
+ def initialize( name )
854
+ super( name )
855
+ @number_of_taxa = 0
856
+ @taxa = Array.new
857
+ end
858
+
859
+ # Returns a String describing this block as nexus formatted data.
860
+ # ---
861
+ # *Returns*:: String
862
+ def to_nexus
863
+ line_1 = String.new
864
+ line_1 << DIMENSIONS
865
+ if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
866
+ line_1 << " " << NTAX << "=" << get_number_of_taxa
867
+ end
868
+ line_1 << DELIMITER
869
+ line_2 = String.new
870
+ line_2 << TAXLABELS << " " << Nexus::Util::array_to_string( get_taxa ) << DELIMITER
871
+ Nexus::Util::to_nexus_helper( TAXA_BLOCK, [ line_1, line_2 ] )
872
+ end
873
+
874
+ # Gets the "number of taxa" property.
875
+ #
876
+ # ---
877
+ # *Returns*:: Integer
878
+ def get_number_of_taxa
879
+ @number_of_taxa
880
+ end
881
+
882
+ # Gets the taxa of this block.
883
+ #
884
+ # ---
885
+ # *Returns*:: Array
886
+ def get_taxa
887
+ @taxa
888
+ end
889
+
890
+ # Sets the "number of taxa" property.
891
+ #
892
+ # ---
893
+ # *Arguments*:
894
+ # * (required) _number_of_taxa_: Integer
895
+ def set_number_of_taxa( number_of_taxa )
896
+ @number_of_taxa = number_of_taxa
897
+ end
898
+
899
+ # Adds a taxon name to this block.
900
+ #
901
+ # ---
902
+ # *Arguments*:
903
+ # * (required) _taxon_: String
904
+ def add_taxon( taxon )
905
+ @taxa.push( taxon )
906
+ end
907
+
908
+ end # class TaxaBlock
909
+
910
+
911
+ # == DESCRIPTION
912
+ # Bio::Nexus::CharactersBlock represents a characters nexus block.
913
+ #
914
+ # = Example of Characters block:
915
+ # Begin Characters;
916
+ # Dimensions NChar=20
917
+ # NTax=4;
918
+ # Format DataType=DNA
919
+ # Missing=x
920
+ # Gap=- MatchChar=.;
921
+ # Matrix
922
+ # fish ACATA GAGGG TACCT CTAAG
923
+ # frog ACTTA GAGGC TACCT CTAGC
924
+ # snake ACTCA CTGGG TACCT TTGCG
925
+ # mouse ACTCA GACGG TACCT TTGCG;
926
+ # End;
927
+ #
928
+ #
929
+ # == USAGE
930
+ #
931
+ # require 'bio/db/nexus'
932
+ #
933
+ # # Create a new parser:
934
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
935
+ #
936
+ #
937
+ # # Get first characters block (same methods as Nexus::DataBlock except
938
+ # # it lacks get_taxa method):
939
+ # characters_block = nexus.get_characters_blocks[ 0 ]
940
+ #
941
+ class CharactersBlock < GenericBlock
942
+
943
+ MISSING = "Missing"
944
+ GAP = "Gap"
945
+ MATCHCHAR = "MatchChar"
946
+
947
+ # Creates a new CharactersBlock object named 'name'.
948
+ # ---
949
+ # *Arguments*:
950
+ # * (required) _name_: String
951
+ def initialize( name )
952
+ super( name )
953
+ @number_of_taxa = 0
954
+ @number_of_characters = 0
955
+ @data_type = String.new
956
+ @gap_character = String.new
957
+ @missing = String.new
958
+ @match_character = String.new
959
+ @matrix = NexusMatrix.new
960
+ end
961
+
962
+ # Returns a String describing this block as nexus formatted data.
963
+ #
964
+ # ---
965
+ # *Returns*:: String
966
+ def to_nexus
967
+ line_1 = String.new
968
+ line_1 << DIMENSIONS
969
+ if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
970
+ line_1 << " " << NTAX << "=" << get_number_of_taxa
971
+ end
972
+ if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
973
+ line_1 << " " << NCHAR << "=" << get_number_of_characters
974
+ end
975
+ line_1 << DELIMITER
976
+
977
+ line_2 = String.new
978
+ line_2 << FORMAT
979
+ if ( Nexus::Util::longer_than_zero( get_datatype ) )
980
+ line_2 << " " << DATATYPE << "=" << get_datatype
981
+ end
982
+ if ( Nexus::Util::longer_than_zero( get_missing ) )
983
+ line_2 << " " << MISSING << "=" << get_missing
984
+ end
985
+ if ( Nexus::Util::longer_than_zero( get_gap_character ) )
986
+ line_2 << " " << GAP << "=" << get_gap_character
987
+ end
988
+ if ( Nexus::Util::longer_than_zero( get_match_character ) )
989
+ line_2 << " " << MATCHCHAR << "=" << get_match_character
990
+ end
991
+ line_2 << DELIMITER
992
+
993
+ line_3 = String.new
994
+ line_3 << MATRIX
995
+ Nexus::Util::to_nexus_helper( CHARACTERS_BLOCK, [ line_1, line_2, line_3 ] +
996
+ get_matrix.to_nexus_row_array )
997
+ end
998
+
999
+ # Gets the "number of taxa" property.
1000
+ #
1001
+ # ---
1002
+ # *Returns*:: Integer
1003
+ def get_number_of_taxa
1004
+ @number_of_taxa
1005
+ end
1006
+
1007
+ # Gets the "number of characters" property.
1008
+ #
1009
+ # ---
1010
+ # *Returns*:: Integer
1011
+ def get_number_of_characters
1012
+ @number_of_characters
1013
+ end
1014
+
1015
+ # Gets the "datatype" property.
1016
+ # ---
1017
+ # *Returns*:: String
1018
+ def get_datatype
1019
+ @data_type
1020
+ end
1021
+
1022
+ # Gets the "gap character" property.
1023
+ # ---
1024
+ # *Returns*:: String
1025
+ def get_gap_character
1026
+ @gap_character
1027
+ end
1028
+
1029
+ # Gets the "missing" property.
1030
+ # ---
1031
+ # *Returns*:: String
1032
+ def get_missing
1033
+ @missing
1034
+ end
1035
+
1036
+ # Gets the "match character" property.
1037
+ # ---
1038
+ # *Returns*:: String
1039
+ def get_match_character
1040
+ @match_character
1041
+ end
1042
+
1043
+ # Gets the matrix.
1044
+ # ---
1045
+ # *Returns*:: Bio::Nexus::NexusMatrix
1046
+ def get_matrix
1047
+ @matrix
1048
+ end
1049
+
1050
+ # Returns character data as Bio::Sequence object Array
1051
+ # for matrix rows named 'name'.
1052
+ # ---
1053
+ # *Arguments*:
1054
+ # * (required) _name_: String
1055
+ # *Returns*:: Bio::Sequence
1056
+ def get_sequences_by_name( name )
1057
+ seq_strs = get_characters_strings_by_name( name )
1058
+ seqs = Array.new
1059
+ seq_strs.each do | seq_str |
1060
+ seqs.push( create_sequence( seq_str, name ) )
1061
+ end
1062
+ seqs
1063
+ end
1064
+
1065
+ # Returns the characters in the matrix at row 'row' as
1066
+ # Bio::Sequence object. Column 0 of the matrix is set as
1067
+ # the definition of the Bio::Sequence object.
1068
+ # ---
1069
+ # *Arguments*:
1070
+ # * (required) _row_: Integer
1071
+ # *Returns*:: Bio::Sequence
1072
+ def get_sequence( row )
1073
+ create_sequence( get_characters_string( row ), get_row_name( row ) )
1074
+ end
1075
+
1076
+ # Returns the String in the matrix at row 'row' and column 0,
1077
+ # which usually is interpreted as a sequence name (if the matrix
1078
+ # contains molecular sequence characters).
1079
+ #
1080
+ # ---
1081
+ # *Arguments*:
1082
+ # * (required) _row_: Integer
1083
+ # *Returns*:: String
1084
+ def get_row_name( row )
1085
+ get_matrix.get_name( row )
1086
+ end
1087
+
1088
+ # Returns character data as String Array
1089
+ # for matrix rows named 'name'.
1090
+ #
1091
+ # ---
1092
+ # *Arguments*:
1093
+ # * (required) _name_: String
1094
+ # *Returns*:: Array of Strings
1095
+ def get_characters_strings_by_name( name )
1096
+ get_matrix.get_row_strings_by_name( name, "" )
1097
+ end
1098
+
1099
+ # Returns character data as String
1100
+ # for matrix row 'row'.
1101
+ #
1102
+ # ---
1103
+ # *Arguments*:
1104
+ # * (required) _row_: Integer
1105
+ # *Returns*:: String
1106
+ def get_characters_string( row )
1107
+ get_matrix.get_row_string( row, "" )
1108
+ end
1109
+
1110
+ # Sets the "number of taxa" property.
1111
+ # ---
1112
+ # *Arguments*:
1113
+ # * (required) _number_of_taxa_: Integer
1114
+ def set_number_of_taxa( number_of_taxa )
1115
+ @number_of_taxa = number_of_taxa
1116
+ end
1117
+
1118
+ # Sets the "number of characters" property.
1119
+ # ---
1120
+ # *Arguments*:
1121
+ # * (required) _number_of_characters_: Integer
1122
+ def set_number_of_characters( number_of_characters )
1123
+ @number_of_characters = number_of_characters
1124
+ end
1125
+
1126
+ # Sets the "data type" property.
1127
+ # ---
1128
+ # *Arguments*:
1129
+ # * (required) _data_type_: String
1130
+ def set_datatype( data_type )
1131
+ @data_type = data_type
1132
+ end
1133
+
1134
+ # Sets the "gap character" property.
1135
+ # ---
1136
+ # *Arguments*:
1137
+ # * (required) _gap_character_: String
1138
+ def set_gap_character( gap_character )
1139
+ @gap_character = gap_character
1140
+ end
1141
+
1142
+ # Sets the "missing" property.
1143
+ # ---
1144
+ # *Arguments*:
1145
+ # * (required) _missing_: String
1146
+ def set_missing( missing )
1147
+ @missing = missing
1148
+ end
1149
+
1150
+ # Sets the "match character" property.
1151
+ # ---
1152
+ # *Arguments*:
1153
+ # * (required) _match_character_: String
1154
+ def set_match_character( match_character )
1155
+ @match_character = match_character
1156
+ end
1157
+
1158
+ # Sets the matrix.
1159
+ # ---
1160
+ # *Arguments*:
1161
+ # * (required) _matrix_: Bio::Nexus::NexusMatrix
1162
+ def set_matrix( matrix )
1163
+ @matrix = matrix
1164
+ end
1165
+
1166
+ private
1167
+
1168
+ # Creates a Bio::Sequence object with sequence 'seq_str'
1169
+ # and definition 'definition'.
1170
+ # ---
1171
+ # *Arguments*:
1172
+ # * (required) _seq_str_: String
1173
+ # * (optional) _defintion_: String
1174
+ # *Returns*:: Bio::Sequence
1175
+ def create_sequence( seq_str, definition = "" )
1176
+ seq = Bio::Sequence.auto( seq_str )
1177
+ seq.definition = definition
1178
+ seq
1179
+ end
1180
+
1181
+ end # class CharactersBlock
1182
+
1183
+
1184
+ # == DESCRIPTION
1185
+ # Bio::Nexus::DataBlock represents a data nexus block.
1186
+ # A data block is a Bio::Nexus::CharactersBlock with the added
1187
+ # capability to store taxa names.
1188
+ #
1189
+ # = Example of Data block:
1190
+ # Begin Data;
1191
+ # Dimensions ntax=5 nchar=14;
1192
+ # Format Datatype=RNA gap=# MISSING=x MatchChar=^;
1193
+ # TaxLabels ciona cow [comment] ape 'purple urchin' "green lizard";
1194
+ # Matrix
1195
+ # taxon_1 A- CCGTCGA-GTTA
1196
+ # taxon_2 T- CCG-CGA-GATA
1197
+ # taxon_3 A- C-GTCGA-GATA
1198
+ # taxon_4 A- CCTCGA--GTTA
1199
+ # taxon_5 T- CGGTCGT-CTTA;
1200
+ # End;
1201
+ #
1202
+ #
1203
+ # == USAGE
1204
+ #
1205
+ # require 'bio/db/nexus'
1206
+ #
1207
+ # # Create a new parser:
1208
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
1209
+ #
1210
+ #
1211
+ # # Get first data block:
1212
+ # data_block = nexus.get_data_blocks[ 0 ]
1213
+ # # Get first characters name:
1214
+ # seq_name = data_block.get_row_name( 0 )
1215
+ # # Get first characters row named "taxon_2" as Bio::Sequence sequence:
1216
+ # seq_tax_2 = data_block.get_sequences_by_name( "taxon_2" )[ 0 ]
1217
+ # # Get third characters row as Bio::Sequence sequence:
1218
+ # seq_2 = data_block.get_sequence( 2 )
1219
+ # # Get first characters row named "taxon_3" as String:
1220
+ # string_tax_3 = data_block.get_characters_strings_by_name( "taxon_3" )
1221
+ # # Get name of first taxon:
1222
+ # taxon_0 = data_block.get_taxa[ 0 ]
1223
+ # # Get characters matrix as Bio::Nexus::NexusMatrix (names are in column 0)
1224
+ # characters_matrix = data_block.get_matrix
1225
+ #
1226
+ class DataBlock < CharactersBlock
1227
+
1228
+ # Creates a new DataBlock object named 'name'.
1229
+ # ---
1230
+ # *Arguments*:
1231
+ # * (required) _name_: String
1232
+ def initialize( name )
1233
+ super( name )
1234
+ @taxa = Array.new
1235
+ end
1236
+
1237
+ # Returns a String describing this block as nexus formatted data.
1238
+ # ---
1239
+ # *Returns*:: String
1240
+ def to_nexus
1241
+ line_1 = String.new
1242
+ line_1 << DIMENSIONS
1243
+ if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
1244
+ line_1 << " " << NTAX << "=" << get_number_of_taxa
1245
+ end
1246
+ if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
1247
+ line_1 << " " << NCHAR << "=" << get_number_of_characters
1248
+ end
1249
+ line_1 << DELIMITER
1250
+
1251
+ line_2 = String.new
1252
+ line_2 << FORMAT
1253
+ if ( Nexus::Util::longer_than_zero( get_datatype ) )
1254
+ line_2 << " " << DATATYPE << "=" << get_datatype
1255
+ end
1256
+ if ( Nexus::Util::longer_than_zero( get_missing ) )
1257
+ line_2 << " " << MISSING << "=" << get_missing
1258
+ end
1259
+ if ( Nexus::Util::longer_than_zero( get_gap_character ) )
1260
+ line_2 << " " << GAP << "=" << get_gap_character
1261
+ end
1262
+ if ( Nexus::Util::longer_than_zero( get_match_character ) )
1263
+ line_2 << " " << MATCHCHAR << "=" << get_match_character
1264
+ end
1265
+ line_2 << DELIMITER
1266
+
1267
+ line_3 = String.new
1268
+ line_3 << TAXLABELS << " " << Nexus::Util::array_to_string( get_taxa )
1269
+ line_3 << DELIMITER
1270
+
1271
+ line_4 = String.new
1272
+ line_4 << MATRIX
1273
+ Nexus::Util::to_nexus_helper( DATA_BLOCK, [ line_1, line_2, line_3, line_4 ] +
1274
+ get_matrix.to_nexus_row_array )
1275
+ end
1276
+
1277
+ # Gets the taxa of this block.
1278
+ # ---
1279
+ # *Returns*:: Array
1280
+ def get_taxa
1281
+ @taxa
1282
+ end
1283
+
1284
+ # Adds a taxon name to this block.
1285
+ # ---
1286
+ # *Arguments*:
1287
+ # * (required) _taxon_: String
1288
+ def add_taxon( taxon )
1289
+ @taxa.push( taxon )
1290
+ end
1291
+
1292
+ end # class DataBlock
1293
+
1294
+
1295
+ # == DESCRIPTION
1296
+ # Bio::Nexus::DistancesBlock represents a distances nexus block.
1297
+ #
1298
+ # = Example of Distances block:
1299
+ # Begin Distances;
1300
+ # Dimensions nchar=20 ntax=5;
1301
+ # Format Triangle=Upper;
1302
+ # Matrix
1303
+ # taxon_1 0.0 1.0 2.0 4.0 7.0
1304
+ # taxon_2 1.0 0.0 3.0 5.0 8.0
1305
+ # taxon_3 3.0 4.0 0.0 6.0 9.0
1306
+ # taxon_4 7.0 3.0 1.0 0.0 9.5
1307
+ # taxon_5 1.2 1.3 1.4 1.5 0.0;
1308
+ # End;
1309
+ #
1310
+ #
1311
+ # == USAGE
1312
+ #
1313
+ # require 'bio/db/nexus'
1314
+ #
1315
+ # # Create a new parser:
1316
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
1317
+ #
1318
+ # # Get distances block(s):
1319
+ # distances_blocks = nexus.get_distances_blocks
1320
+ # # Get matrix as Bio::Nexus::NexusMatrix object:
1321
+ # matrix = distances_blocks[ 0 ].get_matrix
1322
+ # # Get value (column 0 are names):
1323
+ # val = matrix.get_value( 1, 5 )
1324
+ #
1325
+ class DistancesBlock < GenericBlock
1326
+ TRIANGLE = "Triangle"
1327
+
1328
+ # Creates a new DistancesBlock object named 'name'.
1329
+ # ---
1330
+ # *Arguments*:
1331
+ # * (required) _name_: String
1332
+ def initialize( name )
1333
+ super( name )
1334
+ @number_of_taxa = 0
1335
+ @number_of_characters = 0
1336
+ @triangle = String.new
1337
+ @matrix = NexusMatrix.new
1338
+ end
1339
+
1340
+ # Returns a String describing this block as nexus formatted data.
1341
+ # ---
1342
+ # *Returns*:: String
1343
+ def to_nexus
1344
+ line_1 = String.new
1345
+ line_1 << DIMENSIONS
1346
+ if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
1347
+ line_1 << " " << NTAX << "=" << get_number_of_taxa
1348
+ end
1349
+ if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
1350
+ line_1 << " " << NCHAR << "=" << get_number_of_characters
1351
+ end
1352
+ line_1 << DELIMITER
1353
+
1354
+ line_2 = String.new
1355
+ line_2 << FORMAT
1356
+ if ( Nexus::Util::longer_than_zero( get_triangle ) )
1357
+ line_2 << " " << TRIANGLE << "=" << get_triangle
1358
+ end
1359
+ line_2 << DELIMITER
1360
+
1361
+ line_3 = String.new
1362
+ line_3 << MATRIX
1363
+ Nexus::Util::to_nexus_helper( DISTANCES_BLOCK, [ line_1, line_2, line_3 ] +
1364
+ get_matrix.to_nexus_row_array( " " ) )
1365
+ end
1366
+
1367
+ # Gets the "number of taxa" property.
1368
+ # ---
1369
+ # *Returns*:: Integer
1370
+ def get_number_of_taxa
1371
+ @number_of_taxa
1372
+ end
1373
+
1374
+ # Gets the "number of characters" property.
1375
+ # ---
1376
+ # *Returns*:: Integer
1377
+ def get_number_of_characters
1378
+ @number_of_characters
1379
+ end
1380
+
1381
+ # Gets the "triangle" property.
1382
+ # ---
1383
+ # *Returns*:: String
1384
+ def get_triangle
1385
+ @triangle
1386
+ end
1387
+
1388
+ # Gets the matrix.
1389
+ # ---
1390
+ # *Returns*:: Bio::Nexus::NexusMatrix
1391
+ def get_matrix
1392
+ @matrix
1393
+ end
1394
+
1395
+ # Sets the "number of taxa" property.
1396
+ # ---
1397
+ # *Arguments*:
1398
+ # * (required) _number_of_taxa_: Integer
1399
+ def set_number_of_taxa( number_of_taxa )
1400
+ @number_of_taxa = number_of_taxa
1401
+ end
1402
+
1403
+ # Sets the "number of characters" property.
1404
+ # ---
1405
+ # *Arguments*:
1406
+ # * (required) _number_of_characters_: Integer
1407
+ def set_number_of_characters( number_of_characters )
1408
+ @number_of_characters = number_of_characters
1409
+ end
1410
+
1411
+ # Sets the "triangle" property.
1412
+ # ---
1413
+ # *Arguments*:
1414
+ # * (required) _triangle_: String
1415
+ def set_triangle( triangle )
1416
+ @triangle = triangle
1417
+ end
1418
+
1419
+ # Sets the matrix.
1420
+ # ---
1421
+ # *Arguments*:
1422
+ # * (required) _matrix_: Bio::Nexus::NexusMatrix
1423
+ def set_matrix( matrix )
1424
+ @matrix = matrix
1425
+ end
1426
+
1427
+ end # class DistancesBlock
1428
+
1429
+
1430
+ # == DESCRIPTION
1431
+ # Bio::Nexus::TreesBlock represents a trees nexus block.
1432
+ #
1433
+ # = Example of Trees block:
1434
+ # Begin Trees;
1435
+ # Tree best=(fish,(frog,(snake, mouse)));
1436
+ # Tree other=(snake,(frog,( fish, mouse)));
1437
+ # End;
1438
+ #
1439
+ #
1440
+ # == USAGE
1441
+ #
1442
+ # require 'bio/db/nexus'
1443
+ #
1444
+ # # Create a new parser:
1445
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
1446
+ #
1447
+ # Get trees block(s):
1448
+ # trees_block = nexus.get_trees_blocks[ 0 ]
1449
+ # # Get first tree named "best" as String:
1450
+ # string_fish = trees_block.get_tree_strings_by_name( "best" )[ 0 ]
1451
+ # # Get first tree named "best" as Bio::Db::Newick object:
1452
+ # tree_fish = trees_block.get_trees_by_name( "best" )[ 0 ]
1453
+ # # Get first tree as Bio::Db::Newick object:
1454
+ # tree_first = trees_block.get_tree( 0 )
1455
+ #
1456
+ class TreesBlock < GenericBlock
1457
+ TREE = "Tree"
1458
+ def initialize( name )
1459
+ super( name )
1460
+ @trees = Array.new
1461
+ @tree_names = Array.new
1462
+ end
1463
+
1464
+ # Returns a String describing this block as nexus formatted data.
1465
+ # ---
1466
+ # *Returns*:: String
1467
+ def to_nexus
1468
+ trees_ary = Array.new
1469
+ for i in 0 .. @trees.length - 1
1470
+ trees_ary.push( TREE + " " + @tree_names[ i ] + "=" + @trees[ i ] )
1471
+ end
1472
+ Nexus::Util::to_nexus_helper( TREES_BLOCK, trees_ary )
1473
+ end
1474
+
1475
+ # Returns an array of strings describing trees
1476
+ # ---
1477
+ # *Returns*:: Array
1478
+ def get_tree_strings
1479
+ @trees
1480
+ end
1481
+
1482
+ # Returns an array of tree names.
1483
+ # ---
1484
+ # *Returns*:: Array
1485
+ def get_tree_names
1486
+ @tree_names
1487
+ end
1488
+
1489
+ # Returns an array of strings describing trees
1490
+ # for which name matches the tree name.
1491
+ # ---
1492
+ # *Arguments*:
1493
+ # * (required) _name_: String
1494
+ # *Returns*:: Array
1495
+ def get_tree_strings_by_name( name )
1496
+ found_trees = Array.new
1497
+ i = 0
1498
+ @tree_names.each do | n |
1499
+ if ( n == name )
1500
+ found_trees.push( @trees[ i ] )
1501
+ end
1502
+ i += 1
1503
+ end
1504
+ found_trees
1505
+ end
1506
+
1507
+ # Returns tree i (same order as in nexus data) as
1508
+ # newick parsed tree object.
1509
+ # ---
1510
+ # *Arguments*:
1511
+ # * (required) _i_: Integer
1512
+ # *Returns*:: Bio::Newick
1513
+ def get_tree( i )
1514
+ newick = Bio::Newick.new( @trees[ i ] )
1515
+ tree = newick.tree
1516
+ tree
1517
+ end
1518
+
1519
+ # Returns an array of newick parsed tree objects
1520
+ # for which name matches the tree name.
1521
+ # ---
1522
+ # *Arguments*:
1523
+ # * (required) _name_: String
1524
+ # *Returns*:: Array of Bio::Newick
1525
+ def get_trees_by_name( name )
1526
+ found_trees = Array.new
1527
+ i = 0
1528
+ @tree_names.each do | n |
1529
+ if ( n == name )
1530
+ found_trees.push( get_tree( i ) )
1531
+ end
1532
+ i += 1
1533
+ end
1534
+ found_trees
1535
+ end
1536
+
1537
+ # Adds a tree name to this block.
1538
+ # ---
1539
+ # *Arguments*:
1540
+ # * (required) _tree_name_: String
1541
+ def add_tree_name( tree_name )
1542
+ @tree_names.push( tree_name )
1543
+ end
1544
+
1545
+ # Adds a tree to this block.
1546
+ # ---
1547
+ # *Arguments*:
1548
+ # * (required) _tree_as_string_: String
1549
+ def add_tree( tree_as_string )
1550
+ @trees.push( tree_as_string )
1551
+ end
1552
+
1553
+ end # class TreesBlock
1554
+
1555
+
1556
+ # == DESCRIPTION
1557
+ # Bio::Nexus::NexusMatrix represents a characters or distance matrix,
1558
+ # where the names are stored in column zero.
1559
+ #
1560
+ #
1561
+ # == USAGE
1562
+ #
1563
+ # require 'bio/db/nexus'
1564
+ #
1565
+ # # Create a new parser:
1566
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
1567
+ # # Get distances block(s):
1568
+ # distances_block = nexus.get_distances_blocks[ 0 ]
1569
+ # # Get matrix as Bio::Nexus::NexusMatrix object:
1570
+ # matrix = distances_blocks.get_matrix
1571
+ # # Get value (column 0 are names):
1572
+ # val = matrix.get_value( 1, 5 )
1573
+ # # Return first row as String (all columns except column 0),
1574
+ # # values are separated by "_":
1575
+ # row_str_0 = matrix.get_row_string( 0, "_" )
1576
+ # # Return all rows named "ciona" as String (all columns except column 0),
1577
+ # # values are separated by "+":
1578
+ # ciona_rows = matrix.get_row_strings_by_name( "ciona", "+" )
1579
+ class NexusMatrix
1580
+
1581
+ # Nexus matrix error class.
1582
+ class NexusMatrixError < RuntimeError; end
1583
+
1584
+ # Creates new NexusMatrix.
1585
+ def initialize()
1586
+ @rows = Hash.new
1587
+ @max_row = -1
1588
+ @max_col = -1
1589
+ end
1590
+
1591
+ # Sets the value at row 'row' and column 'col' to 'value'.
1592
+ # ---
1593
+ # *Arguments*:
1594
+ # * (required) _row_: Integer
1595
+ # * (required) _col_: Integer
1596
+ # * (required) _value_: Object
1597
+ def set_value( row, col, value )
1598
+ if ( ( row < 0 ) || ( col < 0 ) )
1599
+ raise( NexusTableError, "attempt to use negative values for row or column" )
1600
+ end
1601
+ if ( row > get_max_row() )
1602
+ set_max_row( row )
1603
+ end
1604
+ if ( col > get_max_col() )
1605
+ set_max_col( col )
1606
+ end
1607
+ row_map = nil
1608
+ if ( @rows.has_key?( row ) )
1609
+ row_map = @rows[ row ]
1610
+ else
1611
+ row_map = Hash.new
1612
+ @rows[ row ] = row_map
1613
+ end
1614
+ row_map[ col ] = value
1615
+ end
1616
+
1617
+ # Returns the value at row 'row' and column 'col'.
1618
+ # ---
1619
+ # *Arguments*:
1620
+ # * (required) _row_: Integer
1621
+ # * (required) _col_: Integer
1622
+ # *Returns*:: Object
1623
+ def get_value( row, col )
1624
+ if ( ( row > get_max_row() ) || ( row < 0 ) )
1625
+ raise( NexusMatrixError, "value for row (" + row.to_s +
1626
+ ") is out of range [max row: " + get_max_row().to_s + "]" )
1627
+ elsif ( ( col > get_max_col() ) || ( row < 0 ) )
1628
+ raise( NexusMatrixError, "value for column (" + col.to_s +
1629
+ ") is out of range [max column: " + get_max_col().to_s + "]" )
1630
+ end
1631
+ r = @rows[ row ]
1632
+ if ( ( r == nil ) || ( r.length < 1 ) )
1633
+ return nil
1634
+ end
1635
+ r[ col ]
1636
+ end
1637
+
1638
+ # Returns the maximal columns number.
1639
+ # ---
1640
+ # *Returns*:: Integer
1641
+ def get_max_col
1642
+ return @max_col
1643
+ end
1644
+
1645
+ # Returns the maximal row number.
1646
+ # ---
1647
+ # *Returns*:: Integer
1648
+ def get_max_row
1649
+ return @max_row
1650
+ end
1651
+
1652
+ # Returns true of matrix is empty.
1653
+ #
1654
+ # ---
1655
+ # *Returns*:: true or false
1656
+ def is_empty?
1657
+ return get_max_col < 0 || get_max_row < 0
1658
+ end
1659
+
1660
+ # Convenience method which return the value of
1661
+ # column 0 and row 'row' which is usually the name.
1662
+ #
1663
+ # ---
1664
+ # *Arguments*:
1665
+ # * (required) _row_: Integer
1666
+ # *Returns*:: String
1667
+ def get_name( row )
1668
+ get_value( row, 0 ).to_s
1669
+ end
1670
+
1671
+ # Returns the values of columns 1 to maximal column length
1672
+ # in row 'row' concatenated as string. Individual values can be
1673
+ # separated by 'spacer'.
1674
+ #
1675
+ # ---
1676
+ # *Arguments*:
1677
+ # * (required) _row_: Integer
1678
+ # * (optional) _spacer_: String
1679
+ # *Returns*:: String
1680
+ def get_row_string( row, spacer = "" )
1681
+ row_str = String.new
1682
+ if is_empty?
1683
+ return row_str
1684
+ end
1685
+ for col in 1 .. get_max_col
1686
+ row_str << get_value( row, col ) << spacer
1687
+ end
1688
+ row_str
1689
+ end
1690
+
1691
+ # Returns all rows as Array of Strings separated by 'spacer'
1692
+ # for which column 0 is 'name'.
1693
+ # ---
1694
+ # *Arguments*:
1695
+ # * (required) _name_: String
1696
+ # * (optional) _spacer_: String
1697
+ # *Returns*:: Array
1698
+ def get_row_strings_by_name( name, spacer = "" )
1699
+ row_strs = Array.new
1700
+ if is_empty?
1701
+ return row_strs
1702
+ end
1703
+ for row in 0 .. get_max_row
1704
+ if ( get_value( row, 0 ) == name )
1705
+ row_strs.push( get_row_string( row, spacer ) )
1706
+ end
1707
+ end
1708
+ row_strs
1709
+ end
1710
+
1711
+ # Returns matrix as String, returns "empty" if empty.
1712
+ # ---
1713
+ # *Returns*:: String
1714
+ def to_s
1715
+ if is_empty?
1716
+ return "empty"
1717
+ end
1718
+ str = String.new
1719
+ row_array = to_nexus_row_array( spacer = " ", false )
1720
+ row_array.each do | row |
1721
+ str << row << END_OF_LINE
1722
+ end
1723
+ str
1724
+ end
1725
+ alias to_str to_s
1726
+
1727
+ # Helper method to produce nexus formatted data.
1728
+ # ---
1729
+ # *Arguments*:
1730
+ # * (optional) _spacer_: String
1731
+ # * (optional) _append_delimiter_: true or false
1732
+ # *Returns*:: Array
1733
+ def to_nexus_row_array( spacer = "", append_delimiter = true )
1734
+ ary = Array.new
1735
+ if is_empty?
1736
+ return ary
1737
+ end
1738
+ max_length = 10
1739
+ for row in 0 .. get_max_row
1740
+ l = get_value( row, 0 ).length
1741
+ if ( l > max_length )
1742
+ max_length = l
1743
+ end
1744
+ end
1745
+ for row in 0 .. get_max_row
1746
+ row_str = String.new
1747
+ ary.push( row_str )
1748
+ name = get_value( row, 0 )
1749
+ name = name.ljust( max_length + 1 )
1750
+ row_str << name << " " << get_row_string( row, spacer )
1751
+ if ( spacer != nil && spacer.length > 0 )
1752
+ row_str.chomp!( spacer )
1753
+ end
1754
+ if ( append_delimiter && row == get_max_row )
1755
+ row_str << DELIMITER
1756
+ end
1757
+ end
1758
+ ary
1759
+ end
1760
+
1761
+
1762
+ private
1763
+
1764
+ # Returns row data as Array.
1765
+ # ---
1766
+ # *Arguments*:
1767
+ # * (required) _row_: Integer
1768
+ # *Returns*:: Array
1769
+ def get_row( row )
1770
+ return @rows[ row ]
1771
+ end
1772
+
1773
+ # Sets maximal column number.
1774
+ # ---
1775
+ # *Arguments*:
1776
+ # * (required) _max_col_: Integer
1777
+ def set_max_col( max_col )
1778
+ @max_col = max_col
1779
+ end
1780
+
1781
+ # Sets maximal row number.
1782
+ # ---
1783
+ # *Arguments*:
1784
+ # * (required) _max_row_: Integer
1785
+ def set_max_row( max_row )
1786
+ @max_row = max_row
1787
+ end
1788
+
1789
+ end # NexusMatrix
1790
+
1791
+ # End of classes to represent nexus data.
1792
+
1793
+ # = DESCRIPTION
1794
+ # Bio::Nexus::Util is a class containing static helper methods
1795
+ #
1796
+ class Util
1797
+
1798
+ # Helper method to produce nexus formatted data.
1799
+ # ---
1800
+ # *Arguments*:
1801
+ # * (required) _block_: Nexus:GenericBlock or its subclasses
1802
+ # * (required) _block_: Array
1803
+ # *Returns*:: String
1804
+ def Util::to_nexus_helper( block, lines )
1805
+ str = String.new
1806
+ str << BEGIN_BLOCK << " " << block << END_OF_LINE
1807
+ lines.each do | line |
1808
+ if ( line != nil )
1809
+ str << INDENTENTION << line << END_OF_LINE
1810
+ end
1811
+ end # do
1812
+ str << END_BLOCK << END_OF_LINE
1813
+ str
1814
+ end
1815
+
1816
+ # Returns string as array separated by " ".
1817
+ # ---
1818
+ # *Arguments*:
1819
+ # * (required) _ary_: Array
1820
+ # *Returns*:: String
1821
+ def Util::array_to_string( ary )
1822
+ str = String.new
1823
+ ary.each do | e |
1824
+ str << e << " "
1825
+ end
1826
+ str.chomp!( " " )
1827
+ str
1828
+ end
1829
+
1830
+ # Returns true if Integer i is not nil and larger than 0.
1831
+ # ---
1832
+ # *Arguments*:
1833
+ # * (required) _i_: Integer
1834
+ # *Returns*:: true or false
1835
+ def Util::larger_than_zero( i )
1836
+ return ( i != nil && i.to_i > 0 )
1837
+ end
1838
+
1839
+ # Returns true if String str is not nil and longer than 0.
1840
+ # ---
1841
+ # *Arguments*:
1842
+ # * (required) _str_: String
1843
+ # *Returns*:: true or false
1844
+ def Util::longer_than_zero( str )
1845
+ return ( str != nil && str.length > 0 )
1846
+ end
1847
+
1848
+ end # class Util
1849
+
1850
+ end # class Nexus
1851
+
1852
+ end #module Bio
1853
+
1854
+