bio 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -0,0 +1,1854 @@
1
+ #
2
+ # = bio/db/nexus.rb - Nexus Standard phylogenetic tree parser / formatter
3
+ #
4
+ # Copyright:: Copyright (C) 2006 Christian M Zmasek <cmzmasek@yahoo.com>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: nexus.rb,v 1.3 2007/04/05 23:35:40 trevor Exp $
9
+ #
10
+ # == Description
11
+ #
12
+ # This file contains classes that implement a parser for NEXUS formatted
13
+ # data as well as objects to store, access, and write the parsed data.
14
+ #
15
+ # The following five blocks:
16
+ # taxa, characters, distances, trees, data
17
+ # are recognizable and parsable.
18
+ #
19
+ # The parser can deal with (nested) comments (indicated by square brackets),
20
+ # unless the comments are inside a command or data item (e.g.
21
+ # "Dim[comment]ensions" or inside a matrix).
22
+ #
23
+ # Single or double quoted TaxLabels are processed as follows (by way
24
+ # of example): "mus musculus" -> mus_musculus
25
+ #
26
+ #
27
+ # == USAGE
28
+ #
29
+ # require 'bio/db/nexus'
30
+ #
31
+ # # Create a new parser:
32
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
33
+ #
34
+ # # Get first taxa block:
35
+ # taxa_block = nexus.get_taxa_blocks[ 0 ]
36
+ # # Get number of taxa:
37
+ # number_of_taxa = taxa_block.get_number_of_taxa.to_i
38
+ # # Get name of first taxon:
39
+ # first_taxon = taxa_block.get_taxa[ 0 ]
40
+ #
41
+ # # Get first data block:
42
+ # data_block = nexus.get_data_blocks[ 0 ]
43
+ # # Get first characters name:
44
+ # seq_name = data_block.get_row_name( 0 )
45
+ # # Get first characters row named "taxon_2" as Bio::Sequence sequence:
46
+ # seq_tax_2 = data_block.get_sequences_by_name( "taxon_2" )[ 0 ]
47
+ # # Get third characters row as Bio::Sequence sequence:
48
+ # seq_2 = data_block.get_sequence( 2 )
49
+ # # Get first characters row named "taxon_3" as String:
50
+ # string_tax_3 = data_block.get_characters_strings_by_name( "taxon_3" )
51
+ # # Get name of first taxon:
52
+ # taxon_0 = data_block.get_taxa[ 0 ]
53
+ # # Get characters matrix as Bio::Nexus::NexusMatrix (names are in column 0)
54
+ # characters_matrix = data_block.get_matrix
55
+ #
56
+ # # Get first characters block (same methods as Nexus::DataBlock except
57
+ # # it lacks get_taxa method):
58
+ # characters_block = nexus.get_characters_blocks[ 0 ]
59
+ #
60
+ # # Get trees block(s):
61
+ # trees_block = nexus.get_trees_blocks[ 0 ]
62
+ # # Get first tree named "best" as String:
63
+ # string_fish = trees_block.get_tree_strings_by_name( "best" )[ 0 ]
64
+ # # Get first tree named "best" as Bio::Db::Newick object:
65
+ # tree_fish = trees_block.get_trees_by_name( "best" )[ 0 ]
66
+ # # Get first tree as Bio::Db::Newick object:
67
+ # tree_first = trees_block.get_tree( 0 )
68
+ #
69
+ # # Get distances block(s):
70
+ # distances_blocks = nexus.get_distances_blocks
71
+ # # Get matrix as Bio::Nexus::NexusMatrix object:
72
+ # matrix = distances_blocks[ 0 ].get_matrix
73
+ # # Get value (column 0 are names):
74
+ # val = matrix.get_value( 1, 5 )
75
+ #
76
+ # # Get blocks for which no class exists (private blocks):
77
+ # private_blocks = nexus.get_blocks_by_name( "my_block" )
78
+ # # Get first block names "my_block":
79
+ # my_block_0 = private_blocks[ 0 ]
80
+ # # Get first token in first block names "my_block":
81
+ # first_token = my_block_0.get_tokens[ 0 ]
82
+ #
83
+ #
84
+ # == References
85
+ #
86
+ # * Maddison DR, Swofford DL, Maddison WP (1997). NEXUS: an extensible file
87
+ # format for systematic information.
88
+ # Syst Biol. 1997 46(4):590-621.
89
+ #
90
+
91
+ require 'bio/sequence'
92
+ require 'bio/tree'
93
+ require 'bio/db/newick'
94
+
95
+ module Bio
96
+
97
+ # == DESCRIPTION
98
+ # Bio::Nexus is a parser for nexus formatted data.
99
+ # It contains classes and constants enabling the representation and
100
+ # processing of nexus data.
101
+ #
102
+ # == USAGE
103
+ #
104
+ # # Parsing a nexus formatted string str:
105
+ # nexus = Bio::Nexus.new( nexus_str )
106
+ #
107
+ # # Obtaining of the nexus blocks as array of GenericBlock or
108
+ # # any of its subclasses (such as DistancesBlock):
109
+ # blocks = nexus.get_blocks
110
+ #
111
+ # # Getting a block by name:
112
+ # my_blocks = nexus.get_blocks_by_name( "my_block" )
113
+ #
114
+ # # Getting distance blocks:
115
+ # distances_blocks = nexus.get_distances_blocks
116
+ #
117
+ # # Getting trees blocks:
118
+ # trees_blocks = nexus.get_trees_blocks
119
+ #
120
+ # # Getting data blocks:
121
+ # data_blocks = nexus.get_data_blocks
122
+ #
123
+ # # Getting characters blocks:
124
+ # character_blocks = nexus.get_characters_blocks
125
+ #
126
+ # # Getting taxa blocks:
127
+ # taxa_blocks = nexus.get_taxa_blocks
128
+ #
129
+ class Nexus
130
+
131
+
132
+ END_OF_LINE = "\n"
133
+ INDENTENTION = " "
134
+ DOUBLE_QUOTE = "\""
135
+ SINGLE_QUOTE = "'"
136
+
137
+
138
+ BEGIN_NEXUS = "#NEXUS"
139
+ DELIMITER = ";"
140
+ BEGIN_BLOCK = "Begin"
141
+ END_BLOCK = "End" + DELIMITER
142
+ BEGIN_COMMENT = "["
143
+ END_COMMENT = "]"
144
+
145
+
146
+ TAXA = "Taxa"
147
+ CHARACTERS = "Characters"
148
+ DATA = "Data"
149
+ DISTANCES = "Distances"
150
+ TREES = "Trees"
151
+ TAXA_BLOCK = TAXA + DELIMITER
152
+ CHARACTERS_BLOCK = CHARACTERS + DELIMITER
153
+ DATA_BLOCK = DATA + DELIMITER
154
+ DISTANCES_BLOCK = DISTANCES + DELIMITER
155
+ TREES_BLOCK = TREES + DELIMITER
156
+
157
+
158
+ DIMENSIONS = "Dimensions"
159
+ FORMAT = "Format"
160
+ NTAX = "NTax"
161
+ NCHAR = "NChar"
162
+ DATATYPE = "DataType"
163
+ TAXLABELS = "TaxLabels"
164
+ MATRIX = "Matrix"
165
+ # End of constants.
166
+
167
+
168
+ # Nexus parse error class,
169
+ # indicates error during parsing of nexus formatted data.
170
+ class NexusParseError < RuntimeError; end
171
+
172
+ # Creates a new nexus parser for 'nexus_str'.
173
+ #
174
+ # ---
175
+ # *Arguments*:
176
+ # * (required) _nexus_str_: String - nexus formatted data
177
+ def initialize( nexus_str )
178
+ @blocks = Array.new
179
+ @current_cmd = nil
180
+ @current_subcmd = nil
181
+ @current_block_name = nil
182
+ @current_block = nil
183
+ parse( nexus_str )
184
+ end
185
+
186
+
187
+ # Returns an Array of all blocks found in the String 'nexus_str'
188
+ # set via Bio::Nexus.new( nexus_str ).
189
+ #
190
+ # ---
191
+ # *Returns*:: Array of GenericBlocks or any of its subclasses
192
+ def get_blocks
193
+ @blocks
194
+ end
195
+
196
+ # A convenience methods which returns an array of
197
+ # all nexus blocks for which the name equals 'name' found
198
+ # in the String 'nexus_str' set via Bio::Nexus.new( nexus_str ).
199
+ #
200
+ # ---
201
+ # *Arguments*:
202
+ # * (required) _name_: String
203
+ # *Returns*:: Array of GenericBlocks or any of its subclasses
204
+ def get_blocks_by_name( name )
205
+ found_blocks = Array.new
206
+ @blocks.each do | block |
207
+ if ( name == block.get_name )
208
+ found_blocks.push( block )
209
+ end
210
+ end
211
+ found_blocks
212
+ end
213
+
214
+ # A convenience methods which returns an array of
215
+ # all data blocks.
216
+ #
217
+ # ---
218
+ # *Returns*:: Array of DataBlocks
219
+ def get_data_blocks
220
+ get_blocks_by_name( DATA_BLOCK.chomp( ";").downcase )
221
+ end
222
+
223
+ # A convenience methods which returns an array of
224
+ # all characters blocks.
225
+ #
226
+ # ---
227
+ # *Returns*:: Array of CharactersBlocks
228
+ def get_characters_blocks
229
+ get_blocks_by_name( CHARACTERS_BLOCK.chomp( ";").downcase )
230
+ end
231
+
232
+ # A convenience methods which returns an array of
233
+ # all trees blocks.
234
+ #
235
+ # ---
236
+ # *Returns*:: Array of TreesBlocks
237
+ def get_trees_blocks
238
+ get_blocks_by_name( TREES_BLOCK.chomp( ";").downcase )
239
+ end
240
+
241
+ # A convenience methods which returns an array of
242
+ # all distances blocks.
243
+ #
244
+ # ---
245
+ # *Returns*:: Array of DistancesBlock
246
+ def get_distances_blocks
247
+ get_blocks_by_name( DISTANCES_BLOCK.chomp( ";").downcase )
248
+ end
249
+
250
+ # A convenience methods which returns an array of
251
+ # all taxa blocks.
252
+ #
253
+ # ---
254
+ # *Returns*:: Array of TaxaBlocks
255
+ def get_taxa_blocks
256
+ get_blocks_by_name( TAXA_BLOCK.chomp( ";").downcase )
257
+ end
258
+
259
+ # Returns a String listing how many of each blocks it parsed.
260
+ #
261
+ # ---
262
+ # *Returns*:: String
263
+ def to_s
264
+ str = String.new
265
+ if get_blocks.length < 1
266
+ str << "empty"
267
+ else
268
+ str << "number of blocks: " << get_blocks.length.to_s
269
+ if get_characters_blocks.length > 0
270
+ str << " [characters blocks: " << get_characters_blocks.length.to_s << "] "
271
+ end
272
+ if get_data_blocks.length > 0
273
+ str << " [data blocks: " << get_data_blocks.length.to_s << "] "
274
+ end
275
+ if get_distances_blocks.length > 0
276
+ str << " [distances blocks: " << get_distances_blocks.length.to_s << "] "
277
+ end
278
+ if get_taxa_blocks.length > 0
279
+ str << " [taxa blocks: " << get_taxa_blocks.length.to_s << "] "
280
+ end
281
+ if get_trees_blocks.length > 0
282
+ str << " [trees blocks: " << get_trees_blocks.length.to_s << "] "
283
+ end
284
+ end
285
+ str
286
+ end
287
+ alias to_str to_s
288
+
289
+ private
290
+
291
+ # The master method for parsing.
292
+ # Stores the resulting block in array @blocks.
293
+ #
294
+ # ---
295
+ # *Arguments*:
296
+ # * (required) _str_: String - the String to be parsed
297
+ def parse( str )
298
+ str = str.chop if str[-1..-1] == ';'
299
+ ary = str.split(/[\s+=]/)
300
+ ary.collect! { |x| x.strip!; x.empty? ? nil : x }
301
+ ary.compact!
302
+ in_comment = false
303
+ comment_level = 0
304
+
305
+ # Main loop
306
+ while token = ary.shift
307
+ # Quotes:
308
+ if ( token.index( SINGLE_QUOTE ) == 0 ||
309
+ token.index( DOUBLE_QUOTE ) == 0 )
310
+ token << "_" << ary.shift
311
+ token = token.chop if token[-1..-1] == ';'
312
+ token = token.slice( 1, token.length - 2 )
313
+ end
314
+ # Comments:
315
+ open = token.count( BEGIN_COMMENT )
316
+ close = token.count( END_COMMENT )
317
+ comment = comment_level > 0
318
+ comment_level = comment_level + open - close
319
+ if ( open > 0 && open == close )
320
+ next
321
+ elsif comment_level > 0 || comment
322
+ next
323
+ elsif equal?( token, END_BLOCK )
324
+ end_block()
325
+ elsif equal?( token, BEGIN_BLOCK )
326
+ begin_block()
327
+ @current_block_name = token = ary.shift
328
+ @current_block_name.downcase!
329
+ @current_block = create_block()
330
+ @blocks.push( @current_block )
331
+ elsif ( @current_block_name != nil )
332
+ process_token( token.chomp( DELIMITER ), ary )
333
+ end
334
+ end # main loop
335
+ @blocks.compact!
336
+ end # parse
337
+
338
+ # Operations required when beginnig of block encountered.
339
+ #
340
+ # ---
341
+ def begin_block()
342
+ if @current_block_name != nil
343
+ raise NexusParseError, "Cannot have nested nexus blocks (\"end;\" might be missing)"
344
+ end
345
+ reset_command_state()
346
+ end
347
+
348
+ # Operations required when ending of block encountered.
349
+ #
350
+ # ---
351
+ def end_block()
352
+ if @current_block_name == nil
353
+ raise NexusParseError, "Cannot have two or more \"end;\" tokens in sequence"
354
+ end
355
+ @current_block_name = nil
356
+ end
357
+
358
+ # This calls various process_token_for_<name>_block methods
359
+ # depeding on state of @current_block_name.
360
+ #
361
+ # ---
362
+ # *Arguments*:
363
+ # * (required) _token_: String
364
+ # * (required) _ary_: Array
365
+ def process_token( token, ary )
366
+ case @current_block_name
367
+ when TAXA_BLOCK.downcase
368
+ process_token_for_taxa_block( token )
369
+ when CHARACTERS_BLOCK.downcase
370
+ process_token_for_character_block( token, ary )
371
+ when DATA_BLOCK.downcase
372
+ process_token_for_data_block( token, ary )
373
+ when DISTANCES_BLOCK.downcase
374
+ process_token_for_distances_block( token, ary )
375
+ when TREES_BLOCK.downcase
376
+ process_token_for_trees_block( token, ary )
377
+ else
378
+ process_token_for_generic_block( token )
379
+ end
380
+ end
381
+
382
+ # Resets @current_cmd and @current_subcmd to nil.
383
+ #
384
+ # ---
385
+ def reset_command_state()
386
+ @current_cmd = nil
387
+ @current_subcmd = nil
388
+ end
389
+
390
+ # Creates GenericBlock (or any of its subclasses) the type of
391
+ # which is determined by the state of @current_block_name.
392
+ #
393
+ # ---
394
+ # *Returns*:: GenericBlock (or any of its subclasses) object
395
+ def create_block()
396
+ case @current_block_name
397
+ when TAXA_BLOCK.downcase
398
+ return Bio::Nexus::TaxaBlock.new( @current_block_name )
399
+ when CHARACTERS_BLOCK.downcase
400
+ return Bio::Nexus::CharactersBlock.new( @current_block_name )
401
+ when DATA_BLOCK.downcase
402
+ return Bio::Nexus::DataBlock.new( @current_block_name )
403
+ when DISTANCES_BLOCK.downcase
404
+ return Bio::Nexus::DistancesBlock.new( @current_block_name )
405
+ when TREES_BLOCK.downcase
406
+ return Bio::Nexus::TreesBlock.new( @current_block_name )
407
+ else
408
+ return Bio::Nexus::GenericBlock.new( @current_block_name )
409
+ end
410
+ end
411
+
412
+ # This processes the tokens (between Begin Taxa; and End;) for a taxa block
413
+ # Example of a currently parseable taxa block:
414
+ # Begin Taxa;
415
+ # Dimensions NTax=4;
416
+ # TaxLabels fish [comment] 'african frog' "rat snake" 'red mouse';
417
+ # End;
418
+ #
419
+ # ---
420
+ # *Arguments*:
421
+ # * (required) _token_: String
422
+ def process_token_for_taxa_block( token )
423
+ if ( equal?( token, DIMENSIONS ) )
424
+ @current_cmd = DIMENSIONS
425
+ @current_subcmd = nil
426
+ elsif ( equal?( token, TAXLABELS ) )
427
+ @current_cmd = TAXLABELS
428
+ @current_subcmd = nil
429
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
430
+ @current_subcmd = NTAX
431
+ elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
432
+ @current_block.set_number_of_taxa( token )
433
+ elsif ( cmds_equal_to?( TAXLABELS, nil ) )
434
+ @current_block.add_taxon( token )
435
+ end
436
+ end
437
+
438
+ # This processes the tokens (between Begin Taxa; and End;) for a character
439
+ # block
440
+ # Example of a currently parseable character block:
441
+ # Begin Characters;
442
+ # Dimensions NChar=20
443
+ # NTax=4;
444
+ # Format DataType=DNA
445
+ # Missing=x
446
+ # Gap=- MatchChar=.;
447
+ # Matrix
448
+ # fish ACATA GAGGG TACCT CTAAG
449
+ # frog ACTTA GAGGC TACCT CTAGC
450
+ # snake ACTCA CTGGG TACCT TTGCG
451
+ # mouse ACTCA GACGG TACCT TTGCG;
452
+ # End;
453
+ #
454
+ # ---
455
+ # *Arguments*:
456
+ # * (required) _token_: String
457
+ # * (required) _ary_: Array
458
+ def process_token_for_character_block( token, ary )
459
+ if ( equal?( token, DIMENSIONS ) )
460
+ @current_cmd = DIMENSIONS
461
+ @current_subcmd = nil
462
+ elsif ( equal?( token, FORMAT ) )
463
+ @current_cmd = FORMAT
464
+ @current_subcmd = nil
465
+ elsif ( equal?( token, MATRIX ) )
466
+ @current_cmd = MATRIX
467
+ @current_subcmd = nil
468
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
469
+ @current_subcmd = NTAX
470
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
471
+ @current_subcmd = NCHAR
472
+ elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
473
+ @current_subcmd = DATATYPE
474
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MISSING ) )
475
+ @current_subcmd = CharactersBlock::MISSING
476
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::GAP ) )
477
+ @current_subcmd = CharactersBlock::GAP
478
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MATCHCHAR ) )
479
+ @current_subcmd = CharactersBlock::MATCHCHAR
480
+ elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
481
+ @current_block.set_number_of_taxa( token )
482
+ elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
483
+ @current_block.set_number_of_characters( token )
484
+ elsif ( cmds_equal_to?( FORMAT, DATATYPE ) )
485
+ @current_block.set_datatype( token )
486
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MISSING ) )
487
+ @current_block.set_missing( token )
488
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::GAP ) )
489
+ @current_block.set_gap_character( token )
490
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MATCHCHAR ) )
491
+ @current_block.set_match_character( token )
492
+ elsif ( cmds_equal_to?( MATRIX, nil ) )
493
+ @current_block.set_matrix( make_matrix( token, ary,
494
+ @current_block.get_number_of_characters, true ) )
495
+ end
496
+ end
497
+
498
+ # This processes the tokens (between Begin Trees; and End;) for a trees block
499
+ # Example of a currently parseable taxa block:
500
+ # Begin Trees;
501
+ # Tree best=(fish,(frog,(snake, mouse)));
502
+ # Tree other=(snake,(frog,( fish, mouse)));
503
+ # End;
504
+ #
505
+ # ---
506
+ # *Arguments*:
507
+ # * (required) _token_: String
508
+ # * (required) _ary_: Array
509
+ def process_token_for_trees_block( token, ary )
510
+ if ( equal?( token, TreesBlock::TREE ) )
511
+ @current_cmd = TreesBlock::TREE
512
+ @current_subcmd = nil
513
+ elsif ( cmds_equal_to?( TreesBlock::TREE, nil ) )
514
+ @current_block.add_tree_name( token )
515
+ tree_string = ary.shift
516
+ while ( tree_string.index( ";" ) == nil )
517
+ tree_string << ary.shift
518
+ end
519
+ @current_block.add_tree( tree_string )
520
+ @current_cmd = nil
521
+ end
522
+ end
523
+
524
+ # This processes the tokens (between Begin Taxa; and End;) for a character
525
+ # block.
526
+ # Example of a currently parseable character block:
527
+ # Begin Distances;
528
+ # Dimensions nchar=20 ntax=5;
529
+ # Format Triangle=Upper;
530
+ # Matrix
531
+ # taxon_1 0.0 1.0 2.0 4.0 7.0
532
+ # taxon_2 1.0 0.0 3.0 5.0 8.0
533
+ # taxon_3 3.0 4.0 0.0 6.0 9.0
534
+ # taxon_4 7.0 3.0 1.0 0.0 9.5
535
+ # taxon_5 1.2 1.3 1.4 1.5 0.0;
536
+ # End;
537
+ #
538
+ # ---
539
+ # *Arguments*:
540
+ # * (required) _token_: String
541
+ # * (required) _ary_: Array
542
+ def process_token_for_distances_block( token, ary )
543
+ if ( equal?( token, DIMENSIONS ) )
544
+ @current_cmd = DIMENSIONS
545
+ @current_subcmd = nil
546
+ elsif ( equal?( token, FORMAT ) )
547
+ @current_cmd = FORMAT
548
+ @current_subcmd = nil
549
+ elsif ( equal?( token, MATRIX ) )
550
+ @current_cmd = MATRIX
551
+ @current_subcmd = nil
552
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
553
+ @current_subcmd = NTAX
554
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
555
+ @current_subcmd = NCHAR
556
+ elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
557
+ @current_subcmd = DATATYPE
558
+ elsif ( @current_cmd == FORMAT && equal?( token, DistancesBlock::TRIANGLE ) )
559
+ @current_subcmd = DistancesBlock::TRIANGLE
560
+ elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
561
+ @current_block.set_number_of_taxa( token )
562
+ elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
563
+ @current_block.set_number_of_characters( token )
564
+ elsif ( cmds_equal_to?( FORMAT, DistancesBlock::TRIANGLE ) )
565
+ @current_block.set_triangle( token )
566
+ elsif ( cmds_equal_to?( MATRIX, nil ) )
567
+ @current_block.set_matrix( make_matrix( token, ary,
568
+ @current_block.get_number_of_taxa, false ) )
569
+ end
570
+ end
571
+
572
+ # This processes the tokens (between Begin Taxa; and End;) for a data
573
+ # block.
574
+ # Example of a currently parseable data block:
575
+ # Begin Data;
576
+ # Dimensions ntax=5 nchar=14;
577
+ # Format Datatype=RNA gap=# MISSING=x MatchChar=^;
578
+ # TaxLabels ciona cow [comment] ape 'purple urchin' "green lizard";
579
+ # Matrix
580
+ # taxon_1 A- CCGTCGA-GTTA
581
+ # taxon_2 T- CCG-CGA-GATA
582
+ # taxon_3 A- C-GTCGA-GATA
583
+ # taxon_4 A- CCTCGA--GTTA
584
+ # taxon_5 T- CGGTCGT-CTTA;
585
+ # End;
586
+ #
587
+ # ---
588
+ # *Arguments*:
589
+ # * (required) _token_: String
590
+ # * (required) _ary_: Array
591
+ def process_token_for_data_block( token, ary )
592
+ if ( equal?( token, DIMENSIONS ) )
593
+ @current_cmd = DIMENSIONS
594
+ @current_subcmd = nil
595
+ elsif ( equal?( token, FORMAT ) )
596
+ @current_cmd = FORMAT
597
+ @current_subcmd = nil
598
+ elsif ( equal?( token, TAXLABELS ) )
599
+ @current_cmd = TAXLABELS
600
+ @current_subcmd = nil
601
+ elsif ( equal?( token, MATRIX ) )
602
+ @current_cmd = MATRIX
603
+ @current_subcmd = nil
604
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NTAX ) )
605
+ @current_subcmd = NTAX
606
+ elsif ( @current_cmd == DIMENSIONS && equal?( token, NCHAR ) )
607
+ @current_subcmd = NCHAR
608
+ elsif ( @current_cmd == FORMAT && equal?( token, DATATYPE ) )
609
+ @current_subcmd = DATATYPE
610
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MISSING ) )
611
+ @current_subcmd = CharactersBlock::MISSING
612
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::GAP ) )
613
+ @current_subcmd = CharactersBlock::GAP
614
+ elsif ( @current_cmd == FORMAT && equal?( token, CharactersBlock::MATCHCHAR ) )
615
+ @current_subcmd = CharactersBlock::MATCHCHAR
616
+ elsif ( cmds_equal_to?( DIMENSIONS, NTAX ) )
617
+ @current_block.set_number_of_taxa( token )
618
+ elsif ( cmds_equal_to?( DIMENSIONS, NCHAR ) )
619
+ @current_block.set_number_of_characters( token )
620
+ elsif ( cmds_equal_to?( FORMAT, DATATYPE ) )
621
+ @current_block.set_datatype( token )
622
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MISSING ) )
623
+ @current_block.set_missing( token )
624
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::GAP ) )
625
+ @current_block.set_gap_character( token )
626
+ elsif ( cmds_equal_to?( FORMAT, CharactersBlock::MATCHCHAR ) )
627
+ @current_block.set_match_character( token )
628
+ elsif ( cmds_equal_to?( TAXLABELS, nil ) )
629
+ @current_block.add_taxon( token )
630
+ elsif ( cmds_equal_to?( MATRIX, nil ) )
631
+ @current_block.set_matrix( make_matrix( token, ary,
632
+ @current_block.get_number_of_characters, true ) )
633
+ end
634
+ end
635
+
636
+ # Makes a NexusMatrix out of token from token Array ary
637
+ # Used by process_token_for_X_block methods which contain
638
+ # data in a matrix form. Column 0 contains names.
639
+ # This will shift tokens from ary.
640
+ # ---
641
+ # *Arguments*:
642
+ # * (required) _token_: String
643
+ # * (required) _ary_: Array
644
+ # * (required) _size_: Integer
645
+ # * (optional) _scan_token_: true or false
646
+ # *Returns*:: NexusMatrix
647
+ def make_matrix( token, ary, size, scan_token = false )
648
+ matrix = NexusMatrix.new
649
+ col = -1
650
+ row = 0
651
+ done = false
652
+ while ( !done )
653
+ if ( col == -1 )
654
+ # name
655
+ col = 0
656
+ matrix.set_value( row, col, token ) # name is in col 0
657
+ else
658
+ # values
659
+ col = add_token_to_matrix( token, scan_token, matrix, row, col )
660
+ if ( col == size.to_i )
661
+ col = -1
662
+ row += 1
663
+ end
664
+ end
665
+ token = ary.shift
666
+ if ( token.index( DELIMITER ) != nil )
667
+ col = add_token_to_matrix( token.chomp( ";" ), scan_token, matrix, row, col )
668
+ done = true
669
+ end
670
+ end # while
671
+ matrix
672
+ end
673
+
674
+ # Helper method for make_matrix.
675
+ #
676
+ # ---
677
+ # *Arguments*:
678
+ # * (required) _token_: String
679
+ # * (required) _scan_token_: true or false - add whole token
680
+ # or
681
+ # scan into chars
682
+ # * (required) _matrix_: NexusMatrix - the matrix to which to add token
683
+ # * (required) _row_: Integer - the row for matrix
684
+ # * (required) _col_: Integer - the starting row
685
+ # *Returns*:: Integer - ending row
686
+ def add_token_to_matrix( token, scan_token, matrix, row, col )
687
+ if ( scan_token )
688
+ token.scan(/./) { |w|
689
+ col += 1
690
+ matrix.set_value( row, col, w )
691
+ }
692
+ else
693
+ col += 1
694
+ matrix.set_value( row, col, token )
695
+ end
696
+ col
697
+ end
698
+
699
+ # This processes the tokens (between Begin Taxa; and End;) for a block
700
+ # for which a specific parser is not available.
701
+ # Example of a currently parseable generic block:
702
+ # Begin Taxa;
703
+ # token1 token2 token3 ...
704
+ # End;
705
+ #
706
+ # ---
707
+ # *Arguments*:
708
+ # * (required) _token_: String
709
+ def process_token_for_generic_block( token )
710
+ @current_block.add_token( token )
711
+ end
712
+
713
+ # Returns true if Strings str1 and str2 are
714
+ # equal - ignoring case.
715
+ #
716
+ # ---
717
+ # *Arguments*:
718
+ # * (required) _str1_: String
719
+ # * (required) _str2_: String
720
+ # *Returns*:: true or false
721
+ def equal?( str1, str2 )
722
+ if ( str1 == nil || str2 == nil )
723
+ return false
724
+ else
725
+ return ( str1.downcase == str2.downcase )
726
+ end
727
+ end
728
+
729
+ # Returns true if @current_cmd == command
730
+ # and @current_subcmd == subcommand, false otherwise
731
+ # ---
732
+ # *Arguments*:
733
+ # * (required) _command_: String
734
+ # * (required) _subcommand_: String
735
+ # *Returns*:: true or false
736
+ def cmds_equal_to?( command, subcommand )
737
+ return ( @current_cmd == command && @current_subcmd == subcommand )
738
+ end
739
+
740
+ # Classes to represent nexus data follow.
741
+
742
+ # == DESCRIPTION
743
+ # Bio::Nexus::GenericBlock represents a generic nexus block.
744
+ # It is mainly intended to be extended into more specific classes,
745
+ # although it is used for blocks not represented by more specific
746
+ # block classes.
747
+ # It has a name and a array for the tokenized content of a
748
+ # nexus block.
749
+ #
750
+ # == USAGE
751
+ #
752
+ # require 'bio/db/nexus'
753
+ #
754
+ # # Create a new parser:
755
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
756
+ #
757
+ # # Get blocks for which no class exists (private blocks)
758
+ # as Nexus::GenericBlock:
759
+ # private_blocks = nexus.get_blocks_by_name( "my_block" )
760
+ # # Get first block names "my_block":
761
+ # my_block_0 = private_blocks[ 0 ]
762
+ # # Get first token in first block names "my_block":
763
+ # first_token = my_block_0.get_tokens[ 0 ]
764
+ # # Get name of block (would return "my_block" in this case):
765
+ # name = my_block_0.get_name
766
+ # # Return data of block as nexus formatted String:
767
+ # name = my_block_0.to_nexus
768
+ #
769
+ class GenericBlock
770
+
771
+ # Creates a new GenericBlock object named 'name'.
772
+ # ---
773
+ # *Arguments*:
774
+ # * (required) _name_: String
775
+ def initialize( name )
776
+ @name = name.chomp(";")
777
+ @tokens = Array.new
778
+ end
779
+
780
+ # Gets the name of this block.
781
+ #
782
+ # ---
783
+ # *Returns*:: String
784
+ def get_name
785
+ @name
786
+ end
787
+
788
+ # Returns contents as Array of Strings.
789
+ #
790
+ # ---
791
+ # *Returns*:: Array
792
+ def get_tokens
793
+ @tokens
794
+ end
795
+
796
+ # Same as to_nexus.
797
+ #
798
+ # ---
799
+ # *Returns*:: String
800
+ def to_s
801
+ to_nexus
802
+ end
803
+ alias to_str to_s
804
+
805
+ # Should return a String describing this block as nexus formatted data.
806
+ # ---
807
+ # *Returns*:: String
808
+ def to_nexus
809
+ str = "generic block \"" + get_name + "\" [do not know how to write in nexus format]"
810
+ end
811
+
812
+ # Adds a token to this.
813
+ #
814
+ # ---
815
+ # *Arguments*:
816
+ # * (required) _token_: String
817
+ def add_token( token )
818
+ @tokens.push( token )
819
+ end
820
+
821
+ end # class GenericBlock
822
+
823
+
824
+ # == DESCRIPTION
825
+ # Bio::Nexus::TaxaBlock represents a taxa nexus block.
826
+ #
827
+ # = Example of Taxa block:
828
+ # Begin Taxa;
829
+ # Dimensions NTax=4;
830
+ # TaxLabels fish [comment] 'african frog' "rat snake" 'red mouse';
831
+ # End;
832
+ #
833
+ # == USAGE
834
+ #
835
+ # require 'bio/db/nexus'
836
+ #
837
+ # # Create a new parser:
838
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
839
+ #
840
+ # # Get first taxa block:
841
+ # taxa_block = nexus.get_taxa_blocks[ 0 ]
842
+ # # Get number of taxa:
843
+ # number_of_taxa = taxa_block.get_number_of_taxa.to_i
844
+ # # Get name of first taxon:
845
+ # first_taxon = taxa_block.get_taxa[ 0 ]
846
+ #
847
+ class TaxaBlock < GenericBlock
848
+
849
+ # Creates a new TaxaBlock object named 'name'.
850
+ # ---
851
+ # *Arguments*:
852
+ # * (required) _name_: String
853
+ def initialize( name )
854
+ super( name )
855
+ @number_of_taxa = 0
856
+ @taxa = Array.new
857
+ end
858
+
859
+ # Returns a String describing this block as nexus formatted data.
860
+ # ---
861
+ # *Returns*:: String
862
+ def to_nexus
863
+ line_1 = String.new
864
+ line_1 << DIMENSIONS
865
+ if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
866
+ line_1 << " " << NTAX << "=" << get_number_of_taxa
867
+ end
868
+ line_1 << DELIMITER
869
+ line_2 = String.new
870
+ line_2 << TAXLABELS << " " << Nexus::Util::array_to_string( get_taxa ) << DELIMITER
871
+ Nexus::Util::to_nexus_helper( TAXA_BLOCK, [ line_1, line_2 ] )
872
+ end
873
+
874
+ # Gets the "number of taxa" property.
875
+ #
876
+ # ---
877
+ # *Returns*:: Integer
878
+ def get_number_of_taxa
879
+ @number_of_taxa
880
+ end
881
+
882
+ # Gets the taxa of this block.
883
+ #
884
+ # ---
885
+ # *Returns*:: Array
886
+ def get_taxa
887
+ @taxa
888
+ end
889
+
890
+ # Sets the "number of taxa" property.
891
+ #
892
+ # ---
893
+ # *Arguments*:
894
+ # * (required) _number_of_taxa_: Integer
895
+ def set_number_of_taxa( number_of_taxa )
896
+ @number_of_taxa = number_of_taxa
897
+ end
898
+
899
+ # Adds a taxon name to this block.
900
+ #
901
+ # ---
902
+ # *Arguments*:
903
+ # * (required) _taxon_: String
904
+ def add_taxon( taxon )
905
+ @taxa.push( taxon )
906
+ end
907
+
908
+ end # class TaxaBlock
909
+
910
+
911
+ # == DESCRIPTION
912
+ # Bio::Nexus::CharactersBlock represents a characters nexus block.
913
+ #
914
+ # = Example of Characters block:
915
+ # Begin Characters;
916
+ # Dimensions NChar=20
917
+ # NTax=4;
918
+ # Format DataType=DNA
919
+ # Missing=x
920
+ # Gap=- MatchChar=.;
921
+ # Matrix
922
+ # fish ACATA GAGGG TACCT CTAAG
923
+ # frog ACTTA GAGGC TACCT CTAGC
924
+ # snake ACTCA CTGGG TACCT TTGCG
925
+ # mouse ACTCA GACGG TACCT TTGCG;
926
+ # End;
927
+ #
928
+ #
929
+ # == USAGE
930
+ #
931
+ # require 'bio/db/nexus'
932
+ #
933
+ # # Create a new parser:
934
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
935
+ #
936
+ #
937
+ # # Get first characters block (same methods as Nexus::DataBlock except
938
+ # # it lacks get_taxa method):
939
+ # characters_block = nexus.get_characters_blocks[ 0 ]
940
+ #
941
+ class CharactersBlock < GenericBlock
942
+
943
+ MISSING = "Missing"
944
+ GAP = "Gap"
945
+ MATCHCHAR = "MatchChar"
946
+
947
+ # Creates a new CharactersBlock object named 'name'.
948
+ # ---
949
+ # *Arguments*:
950
+ # * (required) _name_: String
951
+ def initialize( name )
952
+ super( name )
953
+ @number_of_taxa = 0
954
+ @number_of_characters = 0
955
+ @data_type = String.new
956
+ @gap_character = String.new
957
+ @missing = String.new
958
+ @match_character = String.new
959
+ @matrix = NexusMatrix.new
960
+ end
961
+
962
+ # Returns a String describing this block as nexus formatted data.
963
+ #
964
+ # ---
965
+ # *Returns*:: String
966
+ def to_nexus
967
+ line_1 = String.new
968
+ line_1 << DIMENSIONS
969
+ if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
970
+ line_1 << " " << NTAX << "=" << get_number_of_taxa
971
+ end
972
+ if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
973
+ line_1 << " " << NCHAR << "=" << get_number_of_characters
974
+ end
975
+ line_1 << DELIMITER
976
+
977
+ line_2 = String.new
978
+ line_2 << FORMAT
979
+ if ( Nexus::Util::longer_than_zero( get_datatype ) )
980
+ line_2 << " " << DATATYPE << "=" << get_datatype
981
+ end
982
+ if ( Nexus::Util::longer_than_zero( get_missing ) )
983
+ line_2 << " " << MISSING << "=" << get_missing
984
+ end
985
+ if ( Nexus::Util::longer_than_zero( get_gap_character ) )
986
+ line_2 << " " << GAP << "=" << get_gap_character
987
+ end
988
+ if ( Nexus::Util::longer_than_zero( get_match_character ) )
989
+ line_2 << " " << MATCHCHAR << "=" << get_match_character
990
+ end
991
+ line_2 << DELIMITER
992
+
993
+ line_3 = String.new
994
+ line_3 << MATRIX
995
+ Nexus::Util::to_nexus_helper( CHARACTERS_BLOCK, [ line_1, line_2, line_3 ] +
996
+ get_matrix.to_nexus_row_array )
997
+ end
998
+
999
+ # Gets the "number of taxa" property.
1000
+ #
1001
+ # ---
1002
+ # *Returns*:: Integer
1003
+ def get_number_of_taxa
1004
+ @number_of_taxa
1005
+ end
1006
+
1007
+ # Gets the "number of characters" property.
1008
+ #
1009
+ # ---
1010
+ # *Returns*:: Integer
1011
+ def get_number_of_characters
1012
+ @number_of_characters
1013
+ end
1014
+
1015
+ # Gets the "datatype" property.
1016
+ # ---
1017
+ # *Returns*:: String
1018
+ def get_datatype
1019
+ @data_type
1020
+ end
1021
+
1022
+ # Gets the "gap character" property.
1023
+ # ---
1024
+ # *Returns*:: String
1025
+ def get_gap_character
1026
+ @gap_character
1027
+ end
1028
+
1029
+ # Gets the "missing" property.
1030
+ # ---
1031
+ # *Returns*:: String
1032
+ def get_missing
1033
+ @missing
1034
+ end
1035
+
1036
+ # Gets the "match character" property.
1037
+ # ---
1038
+ # *Returns*:: String
1039
+ def get_match_character
1040
+ @match_character
1041
+ end
1042
+
1043
+ # Gets the matrix.
1044
+ # ---
1045
+ # *Returns*:: Bio::Nexus::NexusMatrix
1046
+ def get_matrix
1047
+ @matrix
1048
+ end
1049
+
1050
+ # Returns character data as Bio::Sequence object Array
1051
+ # for matrix rows named 'name'.
1052
+ # ---
1053
+ # *Arguments*:
1054
+ # * (required) _name_: String
1055
+ # *Returns*:: Bio::Sequence
1056
+ def get_sequences_by_name( name )
1057
+ seq_strs = get_characters_strings_by_name( name )
1058
+ seqs = Array.new
1059
+ seq_strs.each do | seq_str |
1060
+ seqs.push( create_sequence( seq_str, name ) )
1061
+ end
1062
+ seqs
1063
+ end
1064
+
1065
+ # Returns the characters in the matrix at row 'row' as
1066
+ # Bio::Sequence object. Column 0 of the matrix is set as
1067
+ # the definition of the Bio::Sequence object.
1068
+ # ---
1069
+ # *Arguments*:
1070
+ # * (required) _row_: Integer
1071
+ # *Returns*:: Bio::Sequence
1072
+ def get_sequence( row )
1073
+ create_sequence( get_characters_string( row ), get_row_name( row ) )
1074
+ end
1075
+
1076
+ # Returns the String in the matrix at row 'row' and column 0,
1077
+ # which usually is interpreted as a sequence name (if the matrix
1078
+ # contains molecular sequence characters).
1079
+ #
1080
+ # ---
1081
+ # *Arguments*:
1082
+ # * (required) _row_: Integer
1083
+ # *Returns*:: String
1084
+ def get_row_name( row )
1085
+ get_matrix.get_name( row )
1086
+ end
1087
+
1088
+ # Returns character data as String Array
1089
+ # for matrix rows named 'name'.
1090
+ #
1091
+ # ---
1092
+ # *Arguments*:
1093
+ # * (required) _name_: String
1094
+ # *Returns*:: Array of Strings
1095
+ def get_characters_strings_by_name( name )
1096
+ get_matrix.get_row_strings_by_name( name, "" )
1097
+ end
1098
+
1099
+ # Returns character data as String
1100
+ # for matrix row 'row'.
1101
+ #
1102
+ # ---
1103
+ # *Arguments*:
1104
+ # * (required) _row_: Integer
1105
+ # *Returns*:: String
1106
+ def get_characters_string( row )
1107
+ get_matrix.get_row_string( row, "" )
1108
+ end
1109
+
1110
+ # Sets the "number of taxa" property.
1111
+ # ---
1112
+ # *Arguments*:
1113
+ # * (required) _number_of_taxa_: Integer
1114
+ def set_number_of_taxa( number_of_taxa )
1115
+ @number_of_taxa = number_of_taxa
1116
+ end
1117
+
1118
+ # Sets the "number of characters" property.
1119
+ # ---
1120
+ # *Arguments*:
1121
+ # * (required) _number_of_characters_: Integer
1122
+ def set_number_of_characters( number_of_characters )
1123
+ @number_of_characters = number_of_characters
1124
+ end
1125
+
1126
+ # Sets the "data type" property.
1127
+ # ---
1128
+ # *Arguments*:
1129
+ # * (required) _data_type_: String
1130
+ def set_datatype( data_type )
1131
+ @data_type = data_type
1132
+ end
1133
+
1134
+ # Sets the "gap character" property.
1135
+ # ---
1136
+ # *Arguments*:
1137
+ # * (required) _gap_character_: String
1138
+ def set_gap_character( gap_character )
1139
+ @gap_character = gap_character
1140
+ end
1141
+
1142
+ # Sets the "missing" property.
1143
+ # ---
1144
+ # *Arguments*:
1145
+ # * (required) _missing_: String
1146
+ def set_missing( missing )
1147
+ @missing = missing
1148
+ end
1149
+
1150
+ # Sets the "match character" property.
1151
+ # ---
1152
+ # *Arguments*:
1153
+ # * (required) _match_character_: String
1154
+ def set_match_character( match_character )
1155
+ @match_character = match_character
1156
+ end
1157
+
1158
+ # Sets the matrix.
1159
+ # ---
1160
+ # *Arguments*:
1161
+ # * (required) _matrix_: Bio::Nexus::NexusMatrix
1162
+ def set_matrix( matrix )
1163
+ @matrix = matrix
1164
+ end
1165
+
1166
+ private
1167
+
1168
+ # Creates a Bio::Sequence object with sequence 'seq_str'
1169
+ # and definition 'definition'.
1170
+ # ---
1171
+ # *Arguments*:
1172
+ # * (required) _seq_str_: String
1173
+ # * (optional) _defintion_: String
1174
+ # *Returns*:: Bio::Sequence
1175
+ def create_sequence( seq_str, definition = "" )
1176
+ seq = Bio::Sequence.auto( seq_str )
1177
+ seq.definition = definition
1178
+ seq
1179
+ end
1180
+
1181
+ end # class CharactersBlock
1182
+
1183
+
1184
+ # == DESCRIPTION
1185
+ # Bio::Nexus::DataBlock represents a data nexus block.
1186
+ # A data block is a Bio::Nexus::CharactersBlock with the added
1187
+ # capability to store taxa names.
1188
+ #
1189
+ # = Example of Data block:
1190
+ # Begin Data;
1191
+ # Dimensions ntax=5 nchar=14;
1192
+ # Format Datatype=RNA gap=# MISSING=x MatchChar=^;
1193
+ # TaxLabels ciona cow [comment] ape 'purple urchin' "green lizard";
1194
+ # Matrix
1195
+ # taxon_1 A- CCGTCGA-GTTA
1196
+ # taxon_2 T- CCG-CGA-GATA
1197
+ # taxon_3 A- C-GTCGA-GATA
1198
+ # taxon_4 A- CCTCGA--GTTA
1199
+ # taxon_5 T- CGGTCGT-CTTA;
1200
+ # End;
1201
+ #
1202
+ #
1203
+ # == USAGE
1204
+ #
1205
+ # require 'bio/db/nexus'
1206
+ #
1207
+ # # Create a new parser:
1208
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
1209
+ #
1210
+ #
1211
+ # # Get first data block:
1212
+ # data_block = nexus.get_data_blocks[ 0 ]
1213
+ # # Get first characters name:
1214
+ # seq_name = data_block.get_row_name( 0 )
1215
+ # # Get first characters row named "taxon_2" as Bio::Sequence sequence:
1216
+ # seq_tax_2 = data_block.get_sequences_by_name( "taxon_2" )[ 0 ]
1217
+ # # Get third characters row as Bio::Sequence sequence:
1218
+ # seq_2 = data_block.get_sequence( 2 )
1219
+ # # Get first characters row named "taxon_3" as String:
1220
+ # string_tax_3 = data_block.get_characters_strings_by_name( "taxon_3" )
1221
+ # # Get name of first taxon:
1222
+ # taxon_0 = data_block.get_taxa[ 0 ]
1223
+ # # Get characters matrix as Bio::Nexus::NexusMatrix (names are in column 0)
1224
+ # characters_matrix = data_block.get_matrix
1225
+ #
1226
+ class DataBlock < CharactersBlock
1227
+
1228
+ # Creates a new DataBlock object named 'name'.
1229
+ # ---
1230
+ # *Arguments*:
1231
+ # * (required) _name_: String
1232
+ def initialize( name )
1233
+ super( name )
1234
+ @taxa = Array.new
1235
+ end
1236
+
1237
+ # Returns a String describing this block as nexus formatted data.
1238
+ # ---
1239
+ # *Returns*:: String
1240
+ def to_nexus
1241
+ line_1 = String.new
1242
+ line_1 << DIMENSIONS
1243
+ if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
1244
+ line_1 << " " << NTAX << "=" << get_number_of_taxa
1245
+ end
1246
+ if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
1247
+ line_1 << " " << NCHAR << "=" << get_number_of_characters
1248
+ end
1249
+ line_1 << DELIMITER
1250
+
1251
+ line_2 = String.new
1252
+ line_2 << FORMAT
1253
+ if ( Nexus::Util::longer_than_zero( get_datatype ) )
1254
+ line_2 << " " << DATATYPE << "=" << get_datatype
1255
+ end
1256
+ if ( Nexus::Util::longer_than_zero( get_missing ) )
1257
+ line_2 << " " << MISSING << "=" << get_missing
1258
+ end
1259
+ if ( Nexus::Util::longer_than_zero( get_gap_character ) )
1260
+ line_2 << " " << GAP << "=" << get_gap_character
1261
+ end
1262
+ if ( Nexus::Util::longer_than_zero( get_match_character ) )
1263
+ line_2 << " " << MATCHCHAR << "=" << get_match_character
1264
+ end
1265
+ line_2 << DELIMITER
1266
+
1267
+ line_3 = String.new
1268
+ line_3 << TAXLABELS << " " << Nexus::Util::array_to_string( get_taxa )
1269
+ line_3 << DELIMITER
1270
+
1271
+ line_4 = String.new
1272
+ line_4 << MATRIX
1273
+ Nexus::Util::to_nexus_helper( DATA_BLOCK, [ line_1, line_2, line_3, line_4 ] +
1274
+ get_matrix.to_nexus_row_array )
1275
+ end
1276
+
1277
+ # Gets the taxa of this block.
1278
+ # ---
1279
+ # *Returns*:: Array
1280
+ def get_taxa
1281
+ @taxa
1282
+ end
1283
+
1284
+ # Adds a taxon name to this block.
1285
+ # ---
1286
+ # *Arguments*:
1287
+ # * (required) _taxon_: String
1288
+ def add_taxon( taxon )
1289
+ @taxa.push( taxon )
1290
+ end
1291
+
1292
+ end # class DataBlock
1293
+
1294
+
1295
+ # == DESCRIPTION
1296
+ # Bio::Nexus::DistancesBlock represents a distances nexus block.
1297
+ #
1298
+ # = Example of Distances block:
1299
+ # Begin Distances;
1300
+ # Dimensions nchar=20 ntax=5;
1301
+ # Format Triangle=Upper;
1302
+ # Matrix
1303
+ # taxon_1 0.0 1.0 2.0 4.0 7.0
1304
+ # taxon_2 1.0 0.0 3.0 5.0 8.0
1305
+ # taxon_3 3.0 4.0 0.0 6.0 9.0
1306
+ # taxon_4 7.0 3.0 1.0 0.0 9.5
1307
+ # taxon_5 1.2 1.3 1.4 1.5 0.0;
1308
+ # End;
1309
+ #
1310
+ #
1311
+ # == USAGE
1312
+ #
1313
+ # require 'bio/db/nexus'
1314
+ #
1315
+ # # Create a new parser:
1316
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
1317
+ #
1318
+ # # Get distances block(s):
1319
+ # distances_blocks = nexus.get_distances_blocks
1320
+ # # Get matrix as Bio::Nexus::NexusMatrix object:
1321
+ # matrix = distances_blocks[ 0 ].get_matrix
1322
+ # # Get value (column 0 are names):
1323
+ # val = matrix.get_value( 1, 5 )
1324
+ #
1325
+ class DistancesBlock < GenericBlock
1326
+ TRIANGLE = "Triangle"
1327
+
1328
+ # Creates a new DistancesBlock object named 'name'.
1329
+ # ---
1330
+ # *Arguments*:
1331
+ # * (required) _name_: String
1332
+ def initialize( name )
1333
+ super( name )
1334
+ @number_of_taxa = 0
1335
+ @number_of_characters = 0
1336
+ @triangle = String.new
1337
+ @matrix = NexusMatrix.new
1338
+ end
1339
+
1340
+ # Returns a String describing this block as nexus formatted data.
1341
+ # ---
1342
+ # *Returns*:: String
1343
+ def to_nexus
1344
+ line_1 = String.new
1345
+ line_1 << DIMENSIONS
1346
+ if ( Nexus::Util::larger_than_zero( get_number_of_taxa ) )
1347
+ line_1 << " " << NTAX << "=" << get_number_of_taxa
1348
+ end
1349
+ if ( Nexus::Util::larger_than_zero( get_number_of_characters ) )
1350
+ line_1 << " " << NCHAR << "=" << get_number_of_characters
1351
+ end
1352
+ line_1 << DELIMITER
1353
+
1354
+ line_2 = String.new
1355
+ line_2 << FORMAT
1356
+ if ( Nexus::Util::longer_than_zero( get_triangle ) )
1357
+ line_2 << " " << TRIANGLE << "=" << get_triangle
1358
+ end
1359
+ line_2 << DELIMITER
1360
+
1361
+ line_3 = String.new
1362
+ line_3 << MATRIX
1363
+ Nexus::Util::to_nexus_helper( DISTANCES_BLOCK, [ line_1, line_2, line_3 ] +
1364
+ get_matrix.to_nexus_row_array( " " ) )
1365
+ end
1366
+
1367
+ # Gets the "number of taxa" property.
1368
+ # ---
1369
+ # *Returns*:: Integer
1370
+ def get_number_of_taxa
1371
+ @number_of_taxa
1372
+ end
1373
+
1374
+ # Gets the "number of characters" property.
1375
+ # ---
1376
+ # *Returns*:: Integer
1377
+ def get_number_of_characters
1378
+ @number_of_characters
1379
+ end
1380
+
1381
+ # Gets the "triangle" property.
1382
+ # ---
1383
+ # *Returns*:: String
1384
+ def get_triangle
1385
+ @triangle
1386
+ end
1387
+
1388
+ # Gets the matrix.
1389
+ # ---
1390
+ # *Returns*:: Bio::Nexus::NexusMatrix
1391
+ def get_matrix
1392
+ @matrix
1393
+ end
1394
+
1395
+ # Sets the "number of taxa" property.
1396
+ # ---
1397
+ # *Arguments*:
1398
+ # * (required) _number_of_taxa_: Integer
1399
+ def set_number_of_taxa( number_of_taxa )
1400
+ @number_of_taxa = number_of_taxa
1401
+ end
1402
+
1403
+ # Sets the "number of characters" property.
1404
+ # ---
1405
+ # *Arguments*:
1406
+ # * (required) _number_of_characters_: Integer
1407
+ def set_number_of_characters( number_of_characters )
1408
+ @number_of_characters = number_of_characters
1409
+ end
1410
+
1411
+ # Sets the "triangle" property.
1412
+ # ---
1413
+ # *Arguments*:
1414
+ # * (required) _triangle_: String
1415
+ def set_triangle( triangle )
1416
+ @triangle = triangle
1417
+ end
1418
+
1419
+ # Sets the matrix.
1420
+ # ---
1421
+ # *Arguments*:
1422
+ # * (required) _matrix_: Bio::Nexus::NexusMatrix
1423
+ def set_matrix( matrix )
1424
+ @matrix = matrix
1425
+ end
1426
+
1427
+ end # class DistancesBlock
1428
+
1429
+
1430
+ # == DESCRIPTION
1431
+ # Bio::Nexus::TreesBlock represents a trees nexus block.
1432
+ #
1433
+ # = Example of Trees block:
1434
+ # Begin Trees;
1435
+ # Tree best=(fish,(frog,(snake, mouse)));
1436
+ # Tree other=(snake,(frog,( fish, mouse)));
1437
+ # End;
1438
+ #
1439
+ #
1440
+ # == USAGE
1441
+ #
1442
+ # require 'bio/db/nexus'
1443
+ #
1444
+ # # Create a new parser:
1445
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
1446
+ #
1447
+ # Get trees block(s):
1448
+ # trees_block = nexus.get_trees_blocks[ 0 ]
1449
+ # # Get first tree named "best" as String:
1450
+ # string_fish = trees_block.get_tree_strings_by_name( "best" )[ 0 ]
1451
+ # # Get first tree named "best" as Bio::Db::Newick object:
1452
+ # tree_fish = trees_block.get_trees_by_name( "best" )[ 0 ]
1453
+ # # Get first tree as Bio::Db::Newick object:
1454
+ # tree_first = trees_block.get_tree( 0 )
1455
+ #
1456
+ class TreesBlock < GenericBlock
1457
+ TREE = "Tree"
1458
+ def initialize( name )
1459
+ super( name )
1460
+ @trees = Array.new
1461
+ @tree_names = Array.new
1462
+ end
1463
+
1464
+ # Returns a String describing this block as nexus formatted data.
1465
+ # ---
1466
+ # *Returns*:: String
1467
+ def to_nexus
1468
+ trees_ary = Array.new
1469
+ for i in 0 .. @trees.length - 1
1470
+ trees_ary.push( TREE + " " + @tree_names[ i ] + "=" + @trees[ i ] )
1471
+ end
1472
+ Nexus::Util::to_nexus_helper( TREES_BLOCK, trees_ary )
1473
+ end
1474
+
1475
+ # Returns an array of strings describing trees
1476
+ # ---
1477
+ # *Returns*:: Array
1478
+ def get_tree_strings
1479
+ @trees
1480
+ end
1481
+
1482
+ # Returns an array of tree names.
1483
+ # ---
1484
+ # *Returns*:: Array
1485
+ def get_tree_names
1486
+ @tree_names
1487
+ end
1488
+
1489
+ # Returns an array of strings describing trees
1490
+ # for which name matches the tree name.
1491
+ # ---
1492
+ # *Arguments*:
1493
+ # * (required) _name_: String
1494
+ # *Returns*:: Array
1495
+ def get_tree_strings_by_name( name )
1496
+ found_trees = Array.new
1497
+ i = 0
1498
+ @tree_names.each do | n |
1499
+ if ( n == name )
1500
+ found_trees.push( @trees[ i ] )
1501
+ end
1502
+ i += 1
1503
+ end
1504
+ found_trees
1505
+ end
1506
+
1507
+ # Returns tree i (same order as in nexus data) as
1508
+ # newick parsed tree object.
1509
+ # ---
1510
+ # *Arguments*:
1511
+ # * (required) _i_: Integer
1512
+ # *Returns*:: Bio::Newick
1513
+ def get_tree( i )
1514
+ newick = Bio::Newick.new( @trees[ i ] )
1515
+ tree = newick.tree
1516
+ tree
1517
+ end
1518
+
1519
+ # Returns an array of newick parsed tree objects
1520
+ # for which name matches the tree name.
1521
+ # ---
1522
+ # *Arguments*:
1523
+ # * (required) _name_: String
1524
+ # *Returns*:: Array of Bio::Newick
1525
+ def get_trees_by_name( name )
1526
+ found_trees = Array.new
1527
+ i = 0
1528
+ @tree_names.each do | n |
1529
+ if ( n == name )
1530
+ found_trees.push( get_tree( i ) )
1531
+ end
1532
+ i += 1
1533
+ end
1534
+ found_trees
1535
+ end
1536
+
1537
+ # Adds a tree name to this block.
1538
+ # ---
1539
+ # *Arguments*:
1540
+ # * (required) _tree_name_: String
1541
+ def add_tree_name( tree_name )
1542
+ @tree_names.push( tree_name )
1543
+ end
1544
+
1545
+ # Adds a tree to this block.
1546
+ # ---
1547
+ # *Arguments*:
1548
+ # * (required) _tree_as_string_: String
1549
+ def add_tree( tree_as_string )
1550
+ @trees.push( tree_as_string )
1551
+ end
1552
+
1553
+ end # class TreesBlock
1554
+
1555
+
1556
+ # == DESCRIPTION
1557
+ # Bio::Nexus::NexusMatrix represents a characters or distance matrix,
1558
+ # where the names are stored in column zero.
1559
+ #
1560
+ #
1561
+ # == USAGE
1562
+ #
1563
+ # require 'bio/db/nexus'
1564
+ #
1565
+ # # Create a new parser:
1566
+ # nexus = Bio::Nexus.new( nexus_data_as_string )
1567
+ # # Get distances block(s):
1568
+ # distances_block = nexus.get_distances_blocks[ 0 ]
1569
+ # # Get matrix as Bio::Nexus::NexusMatrix object:
1570
+ # matrix = distances_blocks.get_matrix
1571
+ # # Get value (column 0 are names):
1572
+ # val = matrix.get_value( 1, 5 )
1573
+ # # Return first row as String (all columns except column 0),
1574
+ # # values are separated by "_":
1575
+ # row_str_0 = matrix.get_row_string( 0, "_" )
1576
+ # # Return all rows named "ciona" as String (all columns except column 0),
1577
+ # # values are separated by "+":
1578
+ # ciona_rows = matrix.get_row_strings_by_name( "ciona", "+" )
1579
+ class NexusMatrix
1580
+
1581
+ # Nexus matrix error class.
1582
+ class NexusMatrixError < RuntimeError; end
1583
+
1584
+ # Creates new NexusMatrix.
1585
+ def initialize()
1586
+ @rows = Hash.new
1587
+ @max_row = -1
1588
+ @max_col = -1
1589
+ end
1590
+
1591
+ # Sets the value at row 'row' and column 'col' to 'value'.
1592
+ # ---
1593
+ # *Arguments*:
1594
+ # * (required) _row_: Integer
1595
+ # * (required) _col_: Integer
1596
+ # * (required) _value_: Object
1597
+ def set_value( row, col, value )
1598
+ if ( ( row < 0 ) || ( col < 0 ) )
1599
+ raise( NexusTableError, "attempt to use negative values for row or column" )
1600
+ end
1601
+ if ( row > get_max_row() )
1602
+ set_max_row( row )
1603
+ end
1604
+ if ( col > get_max_col() )
1605
+ set_max_col( col )
1606
+ end
1607
+ row_map = nil
1608
+ if ( @rows.has_key?( row ) )
1609
+ row_map = @rows[ row ]
1610
+ else
1611
+ row_map = Hash.new
1612
+ @rows[ row ] = row_map
1613
+ end
1614
+ row_map[ col ] = value
1615
+ end
1616
+
1617
+ # Returns the value at row 'row' and column 'col'.
1618
+ # ---
1619
+ # *Arguments*:
1620
+ # * (required) _row_: Integer
1621
+ # * (required) _col_: Integer
1622
+ # *Returns*:: Object
1623
+ def get_value( row, col )
1624
+ if ( ( row > get_max_row() ) || ( row < 0 ) )
1625
+ raise( NexusMatrixError, "value for row (" + row.to_s +
1626
+ ") is out of range [max row: " + get_max_row().to_s + "]" )
1627
+ elsif ( ( col > get_max_col() ) || ( row < 0 ) )
1628
+ raise( NexusMatrixError, "value for column (" + col.to_s +
1629
+ ") is out of range [max column: " + get_max_col().to_s + "]" )
1630
+ end
1631
+ r = @rows[ row ]
1632
+ if ( ( r == nil ) || ( r.length < 1 ) )
1633
+ return nil
1634
+ end
1635
+ r[ col ]
1636
+ end
1637
+
1638
+ # Returns the maximal columns number.
1639
+ # ---
1640
+ # *Returns*:: Integer
1641
+ def get_max_col
1642
+ return @max_col
1643
+ end
1644
+
1645
+ # Returns the maximal row number.
1646
+ # ---
1647
+ # *Returns*:: Integer
1648
+ def get_max_row
1649
+ return @max_row
1650
+ end
1651
+
1652
+ # Returns true of matrix is empty.
1653
+ #
1654
+ # ---
1655
+ # *Returns*:: true or false
1656
+ def is_empty?
1657
+ return get_max_col < 0 || get_max_row < 0
1658
+ end
1659
+
1660
+ # Convenience method which return the value of
1661
+ # column 0 and row 'row' which is usually the name.
1662
+ #
1663
+ # ---
1664
+ # *Arguments*:
1665
+ # * (required) _row_: Integer
1666
+ # *Returns*:: String
1667
+ def get_name( row )
1668
+ get_value( row, 0 ).to_s
1669
+ end
1670
+
1671
+ # Returns the values of columns 1 to maximal column length
1672
+ # in row 'row' concatenated as string. Individual values can be
1673
+ # separated by 'spacer'.
1674
+ #
1675
+ # ---
1676
+ # *Arguments*:
1677
+ # * (required) _row_: Integer
1678
+ # * (optional) _spacer_: String
1679
+ # *Returns*:: String
1680
+ def get_row_string( row, spacer = "" )
1681
+ row_str = String.new
1682
+ if is_empty?
1683
+ return row_str
1684
+ end
1685
+ for col in 1 .. get_max_col
1686
+ row_str << get_value( row, col ) << spacer
1687
+ end
1688
+ row_str
1689
+ end
1690
+
1691
+ # Returns all rows as Array of Strings separated by 'spacer'
1692
+ # for which column 0 is 'name'.
1693
+ # ---
1694
+ # *Arguments*:
1695
+ # * (required) _name_: String
1696
+ # * (optional) _spacer_: String
1697
+ # *Returns*:: Array
1698
+ def get_row_strings_by_name( name, spacer = "" )
1699
+ row_strs = Array.new
1700
+ if is_empty?
1701
+ return row_strs
1702
+ end
1703
+ for row in 0 .. get_max_row
1704
+ if ( get_value( row, 0 ) == name )
1705
+ row_strs.push( get_row_string( row, spacer ) )
1706
+ end
1707
+ end
1708
+ row_strs
1709
+ end
1710
+
1711
+ # Returns matrix as String, returns "empty" if empty.
1712
+ # ---
1713
+ # *Returns*:: String
1714
+ def to_s
1715
+ if is_empty?
1716
+ return "empty"
1717
+ end
1718
+ str = String.new
1719
+ row_array = to_nexus_row_array( spacer = " ", false )
1720
+ row_array.each do | row |
1721
+ str << row << END_OF_LINE
1722
+ end
1723
+ str
1724
+ end
1725
+ alias to_str to_s
1726
+
1727
+ # Helper method to produce nexus formatted data.
1728
+ # ---
1729
+ # *Arguments*:
1730
+ # * (optional) _spacer_: String
1731
+ # * (optional) _append_delimiter_: true or false
1732
+ # *Returns*:: Array
1733
+ def to_nexus_row_array( spacer = "", append_delimiter = true )
1734
+ ary = Array.new
1735
+ if is_empty?
1736
+ return ary
1737
+ end
1738
+ max_length = 10
1739
+ for row in 0 .. get_max_row
1740
+ l = get_value( row, 0 ).length
1741
+ if ( l > max_length )
1742
+ max_length = l
1743
+ end
1744
+ end
1745
+ for row in 0 .. get_max_row
1746
+ row_str = String.new
1747
+ ary.push( row_str )
1748
+ name = get_value( row, 0 )
1749
+ name = name.ljust( max_length + 1 )
1750
+ row_str << name << " " << get_row_string( row, spacer )
1751
+ if ( spacer != nil && spacer.length > 0 )
1752
+ row_str.chomp!( spacer )
1753
+ end
1754
+ if ( append_delimiter && row == get_max_row )
1755
+ row_str << DELIMITER
1756
+ end
1757
+ end
1758
+ ary
1759
+ end
1760
+
1761
+
1762
+ private
1763
+
1764
+ # Returns row data as Array.
1765
+ # ---
1766
+ # *Arguments*:
1767
+ # * (required) _row_: Integer
1768
+ # *Returns*:: Array
1769
+ def get_row( row )
1770
+ return @rows[ row ]
1771
+ end
1772
+
1773
+ # Sets maximal column number.
1774
+ # ---
1775
+ # *Arguments*:
1776
+ # * (required) _max_col_: Integer
1777
+ def set_max_col( max_col )
1778
+ @max_col = max_col
1779
+ end
1780
+
1781
+ # Sets maximal row number.
1782
+ # ---
1783
+ # *Arguments*:
1784
+ # * (required) _max_row_: Integer
1785
+ def set_max_row( max_row )
1786
+ @max_row = max_row
1787
+ end
1788
+
1789
+ end # NexusMatrix
1790
+
1791
+ # End of classes to represent nexus data.
1792
+
1793
+ # = DESCRIPTION
1794
+ # Bio::Nexus::Util is a class containing static helper methods
1795
+ #
1796
+ class Util
1797
+
1798
+ # Helper method to produce nexus formatted data.
1799
+ # ---
1800
+ # *Arguments*:
1801
+ # * (required) _block_: Nexus:GenericBlock or its subclasses
1802
+ # * (required) _block_: Array
1803
+ # *Returns*:: String
1804
+ def Util::to_nexus_helper( block, lines )
1805
+ str = String.new
1806
+ str << BEGIN_BLOCK << " " << block << END_OF_LINE
1807
+ lines.each do | line |
1808
+ if ( line != nil )
1809
+ str << INDENTENTION << line << END_OF_LINE
1810
+ end
1811
+ end # do
1812
+ str << END_BLOCK << END_OF_LINE
1813
+ str
1814
+ end
1815
+
1816
+ # Returns string as array separated by " ".
1817
+ # ---
1818
+ # *Arguments*:
1819
+ # * (required) _ary_: Array
1820
+ # *Returns*:: String
1821
+ def Util::array_to_string( ary )
1822
+ str = String.new
1823
+ ary.each do | e |
1824
+ str << e << " "
1825
+ end
1826
+ str.chomp!( " " )
1827
+ str
1828
+ end
1829
+
1830
+ # Returns true if Integer i is not nil and larger than 0.
1831
+ # ---
1832
+ # *Arguments*:
1833
+ # * (required) _i_: Integer
1834
+ # *Returns*:: true or false
1835
+ def Util::larger_than_zero( i )
1836
+ return ( i != nil && i.to_i > 0 )
1837
+ end
1838
+
1839
+ # Returns true if String str is not nil and longer than 0.
1840
+ # ---
1841
+ # *Arguments*:
1842
+ # * (required) _str_: String
1843
+ # *Returns*:: true or false
1844
+ def Util::longer_than_zero( str )
1845
+ return ( str != nil && str.length > 0 )
1846
+ end
1847
+
1848
+ end # class Util
1849
+
1850
+ end # class Nexus
1851
+
1852
+ end #module Bio
1853
+
1854
+