bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -0,0 +1,954 @@
1
+ #
2
+ # = bio/db/phyloxml_parser.rb - PhyloXML parser
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+ # == Description
11
+ #
12
+ # This file containts parser for PhyloXML.
13
+ #
14
+ # == Requirements
15
+ #
16
+ # Libxml2 XML parser is required. Install libxml-ruby bindings from
17
+ # http://libxml.rubyforge.org or
18
+ #
19
+ # gem install -r libxml-ruby
20
+ #
21
+ # == References
22
+ #
23
+ # * http://www.phyloxml.org
24
+ #
25
+ # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
26
+
27
+
28
+ require 'uri'
29
+ require 'libxml'
30
+
31
+ require 'bio/tree'
32
+ require 'bio/db/phyloxml/phyloxml_elements'
33
+
34
+
35
+ module Bio
36
+
37
+ module PhyloXML
38
+
39
+
40
+
41
+
42
+ # == Description
43
+ #
44
+ # Bio::PhyloXML::Parser is for parsing phyloXML format files.
45
+ #
46
+ # == Requirements
47
+ #
48
+ # Libxml2 XML parser is required. Install libxml-ruby bindings from
49
+ # http://libxml.rubyforge.org or
50
+ #
51
+ # gem install -r libxml-ruby
52
+ #
53
+ # == Usage
54
+ #
55
+ # require 'bio'
56
+ #
57
+ # # Create new phyloxml parser
58
+ # phyloxml = Bio::PhyloXML::Parser.open('example.xml')
59
+ #
60
+ # # Print the names of all trees in the file
61
+ # phyloxml.each do |tree|
62
+ # puts tree.name
63
+ # end
64
+ #
65
+ #
66
+ # == References
67
+ #
68
+ # http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html
69
+ #
70
+ class Parser
71
+
72
+ include LibXML
73
+
74
+ # After parsing all the trees, if there is anything else in other xml format,
75
+ # it is saved in this array of PhyloXML::Other objects
76
+ attr_reader :other
77
+
78
+ # Initializes LibXML::Reader and reads the file until it reaches the first
79
+ # phylogeny element.
80
+ #
81
+ # Create a new Bio::PhyloXML::Parser object.
82
+ #
83
+ # p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml")
84
+ #
85
+ # ---
86
+ # *Arguments*:
87
+ # * (required) _filename_: Path to the file to parse.
88
+ # * (optional) _validate_: Whether to validate the file against schema or not. Default value is true.
89
+ # *Returns*:: Bio::PhyloXML::Parser object
90
+ def self.open(filename, validate=true)
91
+ obj = new(nil, validate)
92
+ obj.instance_eval {
93
+ filename = _secure_filename(filename)
94
+ _validate(:file, filename) if validate
95
+ # XML::Parser::Options::NONET for security reason
96
+ @reader = XML::Reader.file(filename,
97
+ { :options =>
98
+ LibXML::XML::Parser::Options::NONET })
99
+ _skip_leader
100
+ }
101
+ obj
102
+ end
103
+
104
+ # Initializes LibXML::Reader and reads the file until it reaches the first
105
+ # phylogeny element.
106
+ #
107
+ # Create a new Bio::PhyloXML::Parser object.
108
+ #
109
+ # p = Bio::PhyloXML::Parser.open_uri("http://www.phyloxml.org/examples/apaf.xml")
110
+ #
111
+ # ---
112
+ # *Arguments*:
113
+ # * (required) _uri_: (URI or String) URI to the data to parse
114
+ # * (optional) _validate_: For URI reader, the "validate" option is ignored and no validation is executed.
115
+ # *Returns*:: Bio::PhyloXML::Parser object
116
+ def self.open_uri(uri, validate=true)
117
+ case uri
118
+ when URI
119
+ uri = uri.to_s
120
+ else
121
+ # raises error if not a String
122
+ uri = uri.to_str
123
+ # raises error if invalid URI
124
+ URI.parse(uri)
125
+ end
126
+
127
+ obj = new(nil, validate)
128
+ obj.instance_eval {
129
+ @reader = XML::Reader.file(uri)
130
+ _skip_leader
131
+ }
132
+ obj
133
+ end
134
+
135
+ # Special class for closed PhyloXML::Parser object.
136
+ # It raises error for any methods except essential methods.
137
+ #
138
+ # Bio::PhyloXML internal use only.
139
+ class ClosedPhyloXMLParser #:nodoc:
140
+ def method_missing(*arg)
141
+ raise LibXML::XML::Error, 'closed PhyloXML::Parser object'
142
+ end
143
+ end #class ClosedPhyloXMLParser
144
+
145
+ # Closes the LibXML::Reader inside the object.
146
+ # It also closes the opened file if it is created by using
147
+ # Bio::PhyloXML::Parser.open method.
148
+ #
149
+ # When closed object is closed again, or closed object is used,
150
+ # it raises LibXML::XML::Error.
151
+ # ---
152
+ # *Returns*:: nil
153
+ def close
154
+ @reader.close
155
+ @reader = ClosedPhyloXMLParser.new
156
+ nil
157
+ end
158
+
159
+ # Initializes LibXML::Reader and reads from the IO until it reaches
160
+ # the first phylogeny element.
161
+ #
162
+ # Create a new Bio::PhyloXML::Parser object.
163
+ #
164
+ # p = Bio::PhyloXML::Parser.for_io($stdin)
165
+ #
166
+ # ---
167
+ # *Arguments*:
168
+ # * (required) _io_: IO object
169
+ # * (optional) _validate_: For IO reader, the "validate" option is ignored and no validation is executed.
170
+ # *Returns*:: Bio::PhyloXML::Parser object
171
+ def self.for_io(io, validate=true)
172
+ obj = new(nil, validate)
173
+ obj.instance_eval {
174
+ @reader = XML::Reader.io(io,
175
+ { :options =>
176
+ LibXML::XML::Parser::Options::NONET })
177
+ _skip_leader
178
+ }
179
+ obj
180
+ end
181
+
182
+ # (private) returns PhyloXML schema
183
+ def _schema
184
+ XML::Schema.document(XML::Document.file(File.join(File.dirname(__FILE__),'phyloxml.xsd')))
185
+ end
186
+ private :_schema
187
+
188
+ # (private) do validation
189
+ # ---
190
+ # *Arguments*:
191
+ # * (required) <em>data_type</em>_: :file for filename, :string for string
192
+ # * (required) _arg_: filename or string
193
+ # *Returns*:: (undefined)
194
+ def _validate(data_type, arg)
195
+ options = { :options =>
196
+ (LibXML::XML::Parser::Options::NOERROR | # no error messages
197
+ LibXML::XML::Parser::Options::NOWARNING | # no warning messages
198
+ LibXML::XML::Parser::Options::NONET) # no network access
199
+ }
200
+ case data_type
201
+ when :file
202
+ # No validation when special file e.g. FIFO (named pipe)
203
+ return unless File.file?(arg)
204
+ xml_instance = XML::Document.file(arg, options)
205
+ when :string
206
+ xml_instance = XML::Document.string(arg, options)
207
+ else
208
+ # no validation for unknown data type
209
+ return
210
+ end
211
+
212
+ schema = _schema
213
+ begin
214
+ flag = xml_instance.validate_schema(schema) do |msg, flag|
215
+ # The document of libxml-ruby says that the block is called
216
+ # when validation failed, but it seems it is never called
217
+ # even when validation failed!
218
+ raise "Validation of the XML document against phyloxml.xsd schema failed. #{msg}"
219
+ end
220
+ rescue LibXML::XML::Error => evar
221
+ raise "Validation of the XML document against phyloxml.xsd schema failed, or XML error occurred. #{evar.message}"
222
+ end
223
+ unless flag then
224
+ raise "Validation of the XML document against phyloxml.xsd schema failed."
225
+ end
226
+ end
227
+ private :_validate
228
+
229
+ # (private) It seems that LibXML::XML::Reader reads from the network
230
+ # even if LibXML::XML::Parser::Options::NONET is set.
231
+ # So, for URI-like filename, '://' is replaced with ':/'.
232
+ def _secure_filename(filename)
233
+ # for safety, URI-like filename is checked.
234
+ if /\A[a-zA-Z]+\:\/\// =~ filename then
235
+ # for example, "http://a/b" is changed to "http:/a/b".
236
+ filename = filename.sub(/\:\/\//, ':/')
237
+ end
238
+ filename
239
+ end
240
+ private :_secure_filename
241
+
242
+ # (private) loops through until reaches phylogeny stuff
243
+ def _skip_leader
244
+ #loops through until reaches phylogeny stuff
245
+ # Have to leave this way, if accepting strings, instead of files
246
+ @reader.read until is_element?('phylogeny')
247
+ nil
248
+ end
249
+ private :_skip_leader
250
+
251
+ # Initializes LibXML::Reader and reads the PhyloXML-formatted string
252
+ # until it reaches the first phylogeny element.
253
+ #
254
+ # Create a new Bio::PhyloXML::Parser object.
255
+ #
256
+ # str = File.read("./phyloxml_examples.xml")
257
+ # p = Bio::PhyloXML::Parser.new(str)
258
+ #
259
+ #
260
+ # Deprecated usage: Reads data from a file. <em>str<em> is a filename.
261
+ #
262
+ # p = Bio::PhyloXML::Parser.new("./phyloxml_examples.xml")
263
+ #
264
+ # Taking filename is deprecated. Use Bio::PhyloXML::Parser.open(filename).
265
+ #
266
+ # ---
267
+ # *Arguments*:
268
+ # * (required) _str_: PhyloXML-formatted string
269
+ # * (optional) _validate_: Whether to validate the file against schema or not. Default value is true.
270
+ # *Returns*:: Bio::PhyloXML::Parser object
271
+ def initialize(str, validate=true)
272
+
273
+ @other = []
274
+
275
+ return unless str
276
+
277
+ # For compatibility, if filename-like string is given,
278
+ # treat it as a filename.
279
+ if /[\<\>\r\n]/ !~ str and File.exist?(str) then
280
+ # assume that str is filename
281
+ warn "Bio::PhyloXML::Parser.new(filename) is deprecated. Use Bio::PhyloXML::Parser.open(filename)."
282
+ filename = _secure_filename(str)
283
+ _validate(:file, filename) if validate
284
+ @reader = XML::Reader.file(filename)
285
+ _skip_leader
286
+ return
287
+ end
288
+
289
+ # initialize for string
290
+ @reader = XML::Reader.string(str,
291
+ { :options =>
292
+ LibXML::XML::Parser::Options::NONET })
293
+ _skip_leader
294
+ end
295
+
296
+
297
+ # Iterate through all trees in the file.
298
+ #
299
+ # phyloxml = Bio::PhyloXML::Parser.open('example.xml')
300
+ # phyloxml.each do |tree|
301
+ # puts tree.name
302
+ # end
303
+ #
304
+ def each
305
+ while tree = next_tree
306
+ yield tree
307
+ end
308
+ end
309
+
310
+ # Access the specified tree in the file. It parses trees until the specified
311
+ # tree is reached.
312
+ #
313
+ # # Get 3rd tree in the file (starts counting from 0).
314
+ # parser = PhyloXML::Parser.open('phyloxml_examples.xml')
315
+ # tree = parser[2]
316
+ #
317
+ def [](i)
318
+ tree = nil
319
+ (i+1).times do
320
+ tree = self.next_tree
321
+ end
322
+ return tree
323
+ end
324
+
325
+ # Parse and return the next phylogeny tree. If there are no more phylogeny
326
+ # element, nil is returned. If there is something else besides phylogeny
327
+ # elements, it is saved in the PhyloXML::Parser#other.
328
+ #
329
+ # p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml")
330
+ # tree = p.next_tree
331
+ #
332
+ # ---
333
+ # *Returns*:: Bio::PhyloXML::Tree
334
+ def next_tree()
335
+
336
+ if not is_element?('phylogeny')
337
+ if @reader.node_type == XML::Reader::TYPE_END_ELEMENT
338
+ if is_end_element?('phyloxml')
339
+ return nil
340
+ else
341
+ @reader.read
342
+ @reader.read
343
+ if is_end_element?('phyloxml')
344
+ return nil
345
+ end
346
+ end
347
+ end
348
+ # phyloxml can hold only phylogeny and "other" elements. If this is not
349
+ # phylogeny element then it is other. Also, "other" always comes after
350
+ # all phylogenies
351
+ @other << parse_other
352
+ #return nil for tree, since this is not valid phyloxml tree.
353
+ return nil
354
+ end
355
+
356
+ tree = Bio::PhyloXML::Tree.new
357
+
358
+ # keep track of current node in clades array/stack. Current node is the
359
+ # last element in the clades array
360
+ clades = []
361
+ clades.push tree
362
+
363
+ #keep track of current edge to be able to parse branch_length tag
364
+ current_edge = nil
365
+
366
+ # we are going to parse clade iteratively by pointing (and changing) to
367
+ # the current node in the tree. Since the property element is both in
368
+ # clade and in the phylogeny, we need some boolean to know if we are
369
+ # parsing the clade (there can be only max 1 clade in phylogeny) or
370
+ # parsing phylogeny
371
+ parsing_clade = false
372
+
373
+ while not is_end_element?('phylogeny') do
374
+ break if is_end_element?('phyloxml')
375
+
376
+ # parse phylogeny elements, except clade
377
+ if not parsing_clade
378
+
379
+ if is_element?('phylogeny')
380
+ @reader["rooted"] == "true" ? tree.rooted = true : tree.rooted = false
381
+ @reader["rerootable"] == "true" ? tree.rerootable = true : tree.rerootable = false
382
+ parse_attributes(tree, ["branch_length_unit", 'type'])
383
+ end
384
+
385
+ parse_simple_elements(tree, [ "name", 'description', "date"])
386
+
387
+ if is_element?('confidence')
388
+ tree.confidences << parse_confidence
389
+ end
390
+
391
+ end
392
+
393
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
394
+ case @reader.name
395
+ when 'clade'
396
+ #parse clade element
397
+
398
+ parsing_clade = true
399
+
400
+ node= Bio::PhyloXML::Node.new
401
+
402
+ branch_length = @reader['branch_length']
403
+
404
+ parse_attributes(node, ["id_source"])
405
+
406
+ #add new node to the tree
407
+ tree.add_node(node)
408
+ # The first clade will always be root since by xsd schema phyloxml can
409
+ # have 0 to 1 clades in it.
410
+ if tree.root == nil
411
+ tree.root = node
412
+ else
413
+ current_edge = tree.add_edge(clades[-1], node,
414
+ Bio::Tree::Edge.new(branch_length))
415
+ end
416
+ clades.push node
417
+ #end if clade element
418
+ else
419
+ parse_clade_elements(clades[-1], current_edge) if parsing_clade
420
+ end
421
+ end
422
+
423
+ #end clade element, go one parent up
424
+ if is_end_element?('clade')
425
+
426
+ #if we have reached the closing tag of the top-most clade, then our
427
+ # curent node should point to the root, If thats the case, we are done
428
+ # parsing the clade element
429
+ if clades[-1] == tree.root
430
+ parsing_clade = false
431
+ else
432
+ # set current node (clades[-1) to the previous clade in the array
433
+ clades.pop
434
+ end
435
+ end
436
+
437
+ #parsing phylogeny elements
438
+ if not parsing_clade
439
+
440
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
441
+ case @reader.name
442
+ when 'property'
443
+ tree.properties << parse_property
444
+
445
+ when 'clade_relation'
446
+ clade_relation = CladeRelation.new
447
+ parse_attributes(clade_relation, ["id_ref_0", "id_ref_1", "distance", "type"])
448
+
449
+ #@ add unit test for this
450
+ if not @reader.empty_element?
451
+ @reader.read
452
+ if is_element?('confidence')
453
+ clade_relation.confidence = parse_confidence
454
+ end
455
+ end
456
+ tree.clade_relations << clade_relation
457
+
458
+ when 'sequence_relation'
459
+ sequence_relation = SequenceRelation.new
460
+ parse_attributes(sequence_relation, ["id_ref_0", "id_ref_1", "distance", "type"])
461
+ if not @reader.empty_element?
462
+ @reader.read
463
+ if is_element?('confidence')
464
+ sequence_relation.confidence = parse_confidence
465
+ end
466
+ end
467
+ tree.sequence_relations << sequence_relation
468
+ when 'phylogeny'
469
+ #do nothing
470
+ else
471
+ tree.other << parse_other
472
+ #puts "Not recognized element. #{@reader.name}"
473
+ end
474
+ end
475
+ end
476
+ # go to next element
477
+ @reader.read
478
+ end #end while not </phylogeny>
479
+ #move on to the next tag after /phylogeny which is text, since phylogeny
480
+ #end tag is empty element, which value is nil, therefore need to move to
481
+ #the next meaningful element (therefore @reader.read twice)
482
+ @reader.read
483
+ @reader.read
484
+
485
+ return tree
486
+ end
487
+
488
+ # return tree of specified name.
489
+ # @todo Implement this method.
490
+ # def get_tree_by_name(name)
491
+
492
+ # while not is_end_element?('phyloxml')
493
+ # if is_element?('phylogeny')
494
+ # @reader.read
495
+ # @reader.read
496
+ #
497
+ # if is_element?('name')
498
+ # @reader.read
499
+ # if @reader.value == name
500
+ # puts "equasl"
501
+ # tree = next_tree
502
+ # puts tree
503
+ # end
504
+ # end
505
+ # end
506
+ # @reader.read
507
+ # end
508
+ #
509
+ # end
510
+
511
+
512
+ private
513
+
514
+ ####
515
+ # Utility methods
516
+ ###
517
+
518
+ def is_element?(str)
519
+ @reader.node_type == XML::Reader::TYPE_ELEMENT and @reader.name == str ? true : false
520
+ end
521
+
522
+ def is_end_element?(str)
523
+ @reader.node_type==XML::Reader::TYPE_END_ELEMENT and @reader.name == str ? true : false
524
+ end
525
+
526
+ def has_reached_end_element?(str)
527
+ if not(is_end_element?(str))
528
+ raise "Warning: Should have reached </#{str}> element here"
529
+ end
530
+ end
531
+
532
+ # Parses a simple XML element. for example <speciations>1</speciations>
533
+ # It reads in the value and assigns it to object.speciation = 1
534
+ # Also checks if have reached end tag (</speciations> and gives warning
535
+ # if not
536
+ def parse_simple_element(object, name)
537
+ if is_element?(name)
538
+ @reader.read
539
+ object.send("#{name}=", @reader.value)
540
+ @reader.read
541
+ has_reached_end_element?(name)
542
+ end
543
+ end
544
+
545
+ def parse_simple_elements(object, elements)
546
+ elements.each do |elmt|
547
+ parse_simple_element(object, elmt)
548
+ end
549
+ end
550
+
551
+ #Parses list of attributes
552
+ #use for the code like: clade_relation.type = @reader["type"]
553
+ def parse_attributes(object, arr_of_attrs)
554
+ arr_of_attrs.each do |attr|
555
+ object.send("#{attr}=", @reader[attr])
556
+ end
557
+ end
558
+
559
+ def parse_clade_elements(current_node, current_edge)
560
+ #no loop inside, loop is already outside
561
+
562
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
563
+ case @reader.name
564
+ when 'branch_length'
565
+ # @todo add unit test for this. current_edge is nil, if the root clade
566
+ # has branch_length attribute.
567
+ @reader.read
568
+ branch_length = @reader.value
569
+ current_edge.distance = branch_length.to_f if current_edge != nil
570
+ @reader.read
571
+ when 'width'
572
+ @reader.read
573
+ current_node.width = @reader.value
574
+ @reader.read
575
+ when 'name'
576
+ @reader.read
577
+ current_node.name = @reader.value
578
+ @reader.read
579
+ when 'events'
580
+ current_node.events = parse_events
581
+ when 'confidence'
582
+ current_node.confidences << parse_confidence
583
+ when 'sequence'
584
+ current_node.sequences << parse_sequence
585
+ when 'property'
586
+ current_node.properties << parse_property
587
+ when 'taxonomy'
588
+ current_node.taxonomies << parse_taxonomy
589
+ when 'distribution'
590
+ current_node.distributions << parse_distribution
591
+ when 'node_id'
592
+ id = Id.new
593
+ id.type = @reader["type"]
594
+ @reader.read
595
+ id.value = @reader.value
596
+ @reader.read
597
+ #has_reached_end_element?('node_id')
598
+ #@todo write unit test for this. There is no example of this in the example files
599
+ current_node.id = id
600
+ when 'color'
601
+ color = BranchColor.new
602
+ parse_simple_element(color, 'red')
603
+ parse_simple_element(color, 'green')
604
+ parse_simple_element(color, 'blue')
605
+ current_node.color = color
606
+ #@todo add unit test for this
607
+ when 'date'
608
+ date = Date.new
609
+ date.unit = @reader["unit"]
610
+ #move to the next token, which is always empty, since date tag does not
611
+ # have text associated with it
612
+ @reader.read
613
+ @reader.read #now the token is the first tag under date tag
614
+ while not(is_end_element?('date'))
615
+ parse_simple_element(date, 'desc')
616
+ parse_simple_element(date, 'value')
617
+ parse_simple_element(date, 'minimum')
618
+ parse_simple_element(date, 'maximum')
619
+ @reader.read
620
+ end
621
+ current_node.date = date
622
+ when 'reference'
623
+ reference = Reference.new()
624
+ reference.doi = @reader['doi']
625
+ if not @reader.empty_element?
626
+ while not is_end_element?('reference')
627
+ parse_simple_element(reference, 'desc')
628
+ @reader.read
629
+ end
630
+ end
631
+ current_node.references << reference
632
+ when 'binary_characters'
633
+ current_node.binary_characters = parse_binary_characters
634
+ when 'clade'
635
+ #do nothing
636
+ else
637
+ current_node.other << parse_other
638
+ #puts "No match found in parse_clade_elements.(#{@reader.name})"
639
+ end
640
+
641
+ end
642
+
643
+ end #parse_clade_elements
644
+
645
+ def parse_events()
646
+ events = PhyloXML::Events.new
647
+ @reader.read #go to next element
648
+ while not(is_end_element?('events')) do
649
+ parse_simple_elements(events, ['type', 'duplications',
650
+ 'speciations', 'losses'])
651
+ if is_element?('confidence')
652
+ events.confidence = parse_confidence
653
+ #@todo could add unit test for this (example file does not have this case)
654
+ end
655
+ @reader.read
656
+ end
657
+ return events
658
+ end #parse_events
659
+
660
+ def parse_taxonomy
661
+ taxonomy = PhyloXML::Taxonomy.new
662
+ parse_attributes(taxonomy, ["id_source"])
663
+ @reader.read
664
+ while not(is_end_element?('taxonomy')) do
665
+
666
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
667
+ case @reader.name
668
+ when 'code'
669
+ @reader.read
670
+ taxonomy.code = @reader.value
671
+ @reader.read
672
+ when 'scientific_name'
673
+ @reader.read
674
+ taxonomy.scientific_name = @reader.value
675
+ @reader.read
676
+ when 'rank'
677
+ @reader.read
678
+ taxonomy.rank = @reader.value
679
+ @reader.read
680
+ when 'authority'
681
+ @reader.read
682
+ taxonomy.authority = @reader.value
683
+ @reader.read
684
+ when 'id'
685
+ taxonomy.taxonomy_id = parse_id('id')
686
+ when 'common_name'
687
+ @reader.read
688
+ taxonomy.common_names << @reader.value
689
+ @reader.read
690
+ #has_reached_end_element?('common_name')
691
+ when 'synonym'
692
+ @reader.read
693
+ taxonomy.synonyms << @reader.value
694
+ @reader.read
695
+ #has_reached_end_element?('synonym')
696
+ when 'uri'
697
+ taxonomy.uri = parse_uri
698
+ else
699
+ taxonomy.other << parse_other
700
+ end
701
+ end
702
+
703
+ @reader.read #move to next tag in the loop
704
+ end
705
+ return taxonomy
706
+ end #parse_taxonomy
707
+
708
+ private
709
+
710
+ def parse_sequence
711
+ sequence = Sequence.new
712
+ parse_attributes(sequence, ["type", "id_source", "id_ref"])
713
+
714
+ @reader.read
715
+ while not(is_end_element?('sequence'))
716
+
717
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
718
+ case @reader.name
719
+ when 'symbol'
720
+ @reader.read
721
+ sequence.symbol = @reader.value
722
+ @reader.read
723
+ when 'name'
724
+ @reader.read
725
+ sequence.name = @reader.value
726
+ @reader.read
727
+ when 'location'
728
+ @reader.read
729
+ sequence.location = @reader.value
730
+ @reader.read
731
+ when 'mol_seq'
732
+ sequence.is_aligned = @reader["is_aligned"]
733
+ @reader.read
734
+ sequence.mol_seq = @reader.value
735
+ @reader.read
736
+ has_reached_end_element?('mol_seq')
737
+ when 'accession'
738
+ sequence.accession = Accession.new
739
+ sequence.accession.source = @reader["source"]
740
+ @reader.read
741
+ sequence.accession.value = @reader.value
742
+ @reader.read
743
+ has_reached_end_element?('accession')
744
+ when 'uri'
745
+ sequence.uri = parse_uri
746
+ when 'annotation'
747
+ sequence.annotations << parse_annotation
748
+ when 'domain_architecture'
749
+ sequence.domain_architecture = DomainArchitecture.new
750
+ sequence.domain_architecture.length = @reader["length"]
751
+ @reader.read
752
+ @reader.read
753
+ while not(is_end_element?('domain_architecture'))
754
+ sequence.domain_architecture.domains << parse_domain
755
+ @reader.read #go to next domain element
756
+ end
757
+ else
758
+ sequence.other << parse_other
759
+ #@todo add unit test
760
+ end
761
+ end
762
+
763
+ @reader.read
764
+ end
765
+ return sequence
766
+ end #parse_sequence
767
+
768
+ def parse_uri
769
+ uri = Uri.new
770
+ parse_attributes(uri, ["desc", "type"])
771
+ parse_simple_element(uri, 'uri')
772
+ return uri
773
+ end
774
+
775
+ def parse_annotation
776
+ annotation = Annotation.new
777
+
778
+ parse_attributes(annotation, ['ref', 'source', 'evidence', 'type'])
779
+
780
+ if not @reader.empty_element?
781
+ while not(is_end_element?('annotation'))
782
+ parse_simple_element(annotation, 'desc') if is_element?('desc')
783
+
784
+ annotation.confidence = parse_confidence if is_element?('confidence')
785
+
786
+ annotation.properties << parse_property if is_element?('property')
787
+
788
+ if is_element?('uri')
789
+ annotation.uri = parse_uri
790
+ end
791
+
792
+ @reader.read
793
+ end
794
+
795
+ end
796
+ return annotation
797
+ end
798
+
799
+ def parse_property
800
+ property = Property.new
801
+ parse_attributes(property, ["ref", "unit", "datatype", "applies_to", "id_ref"])
802
+ @reader.read
803
+ property.value = @reader.value
804
+ @reader.read
805
+ has_reached_end_element?('property')
806
+ return property
807
+ end #parse_property
808
+
809
+ def parse_confidence
810
+ type = @reader["type"]
811
+ @reader.read
812
+ value = @reader.value.to_f
813
+ @reader.read
814
+ has_reached_end_element?('confidence')
815
+ return Confidence.new(type, value)
816
+ end #parse_confidence
817
+
818
+ def parse_distribution
819
+ distribution = Distribution.new
820
+ @reader.read
821
+ while not(is_end_element?('distribution')) do
822
+
823
+ parse_simple_element(distribution, 'desc')
824
+
825
+ distribution.points << parse_point if is_element?('point')
826
+ distribution.polygons << parse_polygon if is_element?('polygon')
827
+
828
+ @reader.read
829
+ end
830
+ return distribution
831
+ end #parse_distribution
832
+
833
+ def parse_point
834
+ point = Point.new
835
+
836
+ point.geodetic_datum = @reader["geodetic_datum"]
837
+ point.alt_unit = @reader["alt_unit"]
838
+
839
+ @reader.read
840
+ while not(is_end_element?('point')) do
841
+
842
+ parse_simple_elements(point, ['lat', 'long'] )
843
+
844
+ if is_element?('alt')
845
+ @reader.read
846
+ point.alt = @reader.value.to_f
847
+ @reader.read
848
+ has_reached_end_element?('alt')
849
+ end
850
+ #advance reader
851
+ @reader.read
852
+ end
853
+ return point
854
+ end #parse_point
855
+
856
+ def parse_polygon
857
+ polygon = Polygon.new
858
+ @reader.read
859
+ while not(is_end_element?('polygon')) do
860
+ polygon.points << parse_point if is_element?('point')
861
+ @reader.read
862
+ end
863
+
864
+ #@todo should check for it at all? Probably not if xml is valid.
865
+ if polygon.points.length <3
866
+ puts "Warning: <polygon> should have at least 3 points"
867
+ end
868
+ return polygon
869
+ end #parse_polygon
870
+
871
+ def parse_id(tag_name)
872
+ id = Id.new
873
+ id.provider = @reader["provider"]
874
+ @reader.read
875
+ id.value = @reader.value
876
+ @reader.read #@todo shouldn't there be another read?
877
+ has_reached_end_element?(tag_name)
878
+ return id
879
+ end #parse_id
880
+
881
+ def parse_domain
882
+ domain = ProteinDomain.new
883
+ parse_attributes(domain, ["from", "to", "confidence", "id"])
884
+ @reader.read
885
+ domain.value = @reader.value
886
+ @reader.read
887
+ has_reached_end_element?('domain')
888
+ @reader.read
889
+ return domain
890
+ end
891
+
892
+ def parse_binary_characters
893
+ b = PhyloXML::BinaryCharacters.new
894
+ b.bc_type = @reader['type']
895
+
896
+ parse_attributes(b, ['gained_count', 'absent_count', 'lost_count', 'present_count'])
897
+ if not @reader.empty_element?
898
+ @reader.read
899
+ while not is_end_element?('binary_characters')
900
+
901
+ parse_bc(b, 'lost')
902
+ parse_bc(b, 'gained')
903
+ parse_bc(b, 'absent')
904
+ parse_bc(b, 'present')
905
+
906
+ @reader.read
907
+ end
908
+ end
909
+ return b
910
+ end #parse_binary_characters
911
+
912
+ def parse_bc(object, element)
913
+ if is_element?(element)
914
+ @reader.read
915
+ while not is_end_element?(element)
916
+ if is_element?('bc')
917
+ @reader.read
918
+ object.send(element) << @reader.value
919
+ @reader.read
920
+ has_reached_end_element?('bc')
921
+ end
922
+ @reader.read
923
+ end
924
+ end
925
+ end #parse_bc
926
+
927
+ def parse_other
928
+ other_obj = PhyloXML::Other.new
929
+ other_obj.element_name = @reader.name
930
+ #parse attributes
931
+ code = @reader.move_to_first_attribute
932
+ while code ==1
933
+ other_obj.attributes[@reader.name] = @reader.value
934
+ code = @reader.move_to_next_attribute
935
+ end
936
+
937
+ while not is_end_element?(other_obj.element_name) do
938
+ @reader.read
939
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
940
+ other_obj.children << parse_other #recursice call to parse children
941
+ elsif @reader.node_type == XML::Reader::TYPE_TEXT
942
+ other_obj.value = @reader.value
943
+ end
944
+ end
945
+ #just a check
946
+ has_reached_end_element?(other_obj.element_name)
947
+ return other_obj
948
+ end #parse_other
949
+
950
+ end #class phyloxmlParser
951
+
952
+ end #module PhyloXML
953
+
954
+ end #module Bio