bio 1.3.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -0,0 +1,954 @@
1
+ #
2
+ # = bio/db/phyloxml_parser.rb - PhyloXML parser
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+ # == Description
11
+ #
12
+ # This file containts parser for PhyloXML.
13
+ #
14
+ # == Requirements
15
+ #
16
+ # Libxml2 XML parser is required. Install libxml-ruby bindings from
17
+ # http://libxml.rubyforge.org or
18
+ #
19
+ # gem install -r libxml-ruby
20
+ #
21
+ # == References
22
+ #
23
+ # * http://www.phyloxml.org
24
+ #
25
+ # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
26
+
27
+
28
+ require 'uri'
29
+ require 'libxml'
30
+
31
+ require 'bio/tree'
32
+ require 'bio/db/phyloxml/phyloxml_elements'
33
+
34
+
35
+ module Bio
36
+
37
+ module PhyloXML
38
+
39
+
40
+
41
+
42
+ # == Description
43
+ #
44
+ # Bio::PhyloXML::Parser is for parsing phyloXML format files.
45
+ #
46
+ # == Requirements
47
+ #
48
+ # Libxml2 XML parser is required. Install libxml-ruby bindings from
49
+ # http://libxml.rubyforge.org or
50
+ #
51
+ # gem install -r libxml-ruby
52
+ #
53
+ # == Usage
54
+ #
55
+ # require 'bio'
56
+ #
57
+ # # Create new phyloxml parser
58
+ # phyloxml = Bio::PhyloXML::Parser.open('example.xml')
59
+ #
60
+ # # Print the names of all trees in the file
61
+ # phyloxml.each do |tree|
62
+ # puts tree.name
63
+ # end
64
+ #
65
+ #
66
+ # == References
67
+ #
68
+ # http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html
69
+ #
70
+ class Parser
71
+
72
+ include LibXML
73
+
74
+ # After parsing all the trees, if there is anything else in other xml format,
75
+ # it is saved in this array of PhyloXML::Other objects
76
+ attr_reader :other
77
+
78
+ # Initializes LibXML::Reader and reads the file until it reaches the first
79
+ # phylogeny element.
80
+ #
81
+ # Create a new Bio::PhyloXML::Parser object.
82
+ #
83
+ # p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml")
84
+ #
85
+ # ---
86
+ # *Arguments*:
87
+ # * (required) _filename_: Path to the file to parse.
88
+ # * (optional) _validate_: Whether to validate the file against schema or not. Default value is true.
89
+ # *Returns*:: Bio::PhyloXML::Parser object
90
+ def self.open(filename, validate=true)
91
+ obj = new(nil, validate)
92
+ obj.instance_eval {
93
+ filename = _secure_filename(filename)
94
+ _validate(:file, filename) if validate
95
+ # XML::Parser::Options::NONET for security reason
96
+ @reader = XML::Reader.file(filename,
97
+ { :options =>
98
+ LibXML::XML::Parser::Options::NONET })
99
+ _skip_leader
100
+ }
101
+ obj
102
+ end
103
+
104
+ # Initializes LibXML::Reader and reads the file until it reaches the first
105
+ # phylogeny element.
106
+ #
107
+ # Create a new Bio::PhyloXML::Parser object.
108
+ #
109
+ # p = Bio::PhyloXML::Parser.open_uri("http://www.phyloxml.org/examples/apaf.xml")
110
+ #
111
+ # ---
112
+ # *Arguments*:
113
+ # * (required) _uri_: (URI or String) URI to the data to parse
114
+ # * (optional) _validate_: For URI reader, the "validate" option is ignored and no validation is executed.
115
+ # *Returns*:: Bio::PhyloXML::Parser object
116
+ def self.open_uri(uri, validate=true)
117
+ case uri
118
+ when URI
119
+ uri = uri.to_s
120
+ else
121
+ # raises error if not a String
122
+ uri = uri.to_str
123
+ # raises error if invalid URI
124
+ URI.parse(uri)
125
+ end
126
+
127
+ obj = new(nil, validate)
128
+ obj.instance_eval {
129
+ @reader = XML::Reader.file(uri)
130
+ _skip_leader
131
+ }
132
+ obj
133
+ end
134
+
135
+ # Special class for closed PhyloXML::Parser object.
136
+ # It raises error for any methods except essential methods.
137
+ #
138
+ # Bio::PhyloXML internal use only.
139
+ class ClosedPhyloXMLParser #:nodoc:
140
+ def method_missing(*arg)
141
+ raise LibXML::XML::Error, 'closed PhyloXML::Parser object'
142
+ end
143
+ end #class ClosedPhyloXMLParser
144
+
145
+ # Closes the LibXML::Reader inside the object.
146
+ # It also closes the opened file if it is created by using
147
+ # Bio::PhyloXML::Parser.open method.
148
+ #
149
+ # When closed object is closed again, or closed object is used,
150
+ # it raises LibXML::XML::Error.
151
+ # ---
152
+ # *Returns*:: nil
153
+ def close
154
+ @reader.close
155
+ @reader = ClosedPhyloXMLParser.new
156
+ nil
157
+ end
158
+
159
+ # Initializes LibXML::Reader and reads from the IO until it reaches
160
+ # the first phylogeny element.
161
+ #
162
+ # Create a new Bio::PhyloXML::Parser object.
163
+ #
164
+ # p = Bio::PhyloXML::Parser.for_io($stdin)
165
+ #
166
+ # ---
167
+ # *Arguments*:
168
+ # * (required) _io_: IO object
169
+ # * (optional) _validate_: For IO reader, the "validate" option is ignored and no validation is executed.
170
+ # *Returns*:: Bio::PhyloXML::Parser object
171
+ def self.for_io(io, validate=true)
172
+ obj = new(nil, validate)
173
+ obj.instance_eval {
174
+ @reader = XML::Reader.io(io,
175
+ { :options =>
176
+ LibXML::XML::Parser::Options::NONET })
177
+ _skip_leader
178
+ }
179
+ obj
180
+ end
181
+
182
+ # (private) returns PhyloXML schema
183
+ def _schema
184
+ XML::Schema.document(XML::Document.file(File.join(File.dirname(__FILE__),'phyloxml.xsd')))
185
+ end
186
+ private :_schema
187
+
188
+ # (private) do validation
189
+ # ---
190
+ # *Arguments*:
191
+ # * (required) <em>data_type</em>_: :file for filename, :string for string
192
+ # * (required) _arg_: filename or string
193
+ # *Returns*:: (undefined)
194
+ def _validate(data_type, arg)
195
+ options = { :options =>
196
+ (LibXML::XML::Parser::Options::NOERROR | # no error messages
197
+ LibXML::XML::Parser::Options::NOWARNING | # no warning messages
198
+ LibXML::XML::Parser::Options::NONET) # no network access
199
+ }
200
+ case data_type
201
+ when :file
202
+ # No validation when special file e.g. FIFO (named pipe)
203
+ return unless File.file?(arg)
204
+ xml_instance = XML::Document.file(arg, options)
205
+ when :string
206
+ xml_instance = XML::Document.string(arg, options)
207
+ else
208
+ # no validation for unknown data type
209
+ return
210
+ end
211
+
212
+ schema = _schema
213
+ begin
214
+ flag = xml_instance.validate_schema(schema) do |msg, flag|
215
+ # The document of libxml-ruby says that the block is called
216
+ # when validation failed, but it seems it is never called
217
+ # even when validation failed!
218
+ raise "Validation of the XML document against phyloxml.xsd schema failed. #{msg}"
219
+ end
220
+ rescue LibXML::XML::Error => evar
221
+ raise "Validation of the XML document against phyloxml.xsd schema failed, or XML error occurred. #{evar.message}"
222
+ end
223
+ unless flag then
224
+ raise "Validation of the XML document against phyloxml.xsd schema failed."
225
+ end
226
+ end
227
+ private :_validate
228
+
229
+ # (private) It seems that LibXML::XML::Reader reads from the network
230
+ # even if LibXML::XML::Parser::Options::NONET is set.
231
+ # So, for URI-like filename, '://' is replaced with ':/'.
232
+ def _secure_filename(filename)
233
+ # for safety, URI-like filename is checked.
234
+ if /\A[a-zA-Z]+\:\/\// =~ filename then
235
+ # for example, "http://a/b" is changed to "http:/a/b".
236
+ filename = filename.sub(/\:\/\//, ':/')
237
+ end
238
+ filename
239
+ end
240
+ private :_secure_filename
241
+
242
+ # (private) loops through until reaches phylogeny stuff
243
+ def _skip_leader
244
+ #loops through until reaches phylogeny stuff
245
+ # Have to leave this way, if accepting strings, instead of files
246
+ @reader.read until is_element?('phylogeny')
247
+ nil
248
+ end
249
+ private :_skip_leader
250
+
251
+ # Initializes LibXML::Reader and reads the PhyloXML-formatted string
252
+ # until it reaches the first phylogeny element.
253
+ #
254
+ # Create a new Bio::PhyloXML::Parser object.
255
+ #
256
+ # str = File.read("./phyloxml_examples.xml")
257
+ # p = Bio::PhyloXML::Parser.new(str)
258
+ #
259
+ #
260
+ # Deprecated usage: Reads data from a file. <em>str<em> is a filename.
261
+ #
262
+ # p = Bio::PhyloXML::Parser.new("./phyloxml_examples.xml")
263
+ #
264
+ # Taking filename is deprecated. Use Bio::PhyloXML::Parser.open(filename).
265
+ #
266
+ # ---
267
+ # *Arguments*:
268
+ # * (required) _str_: PhyloXML-formatted string
269
+ # * (optional) _validate_: Whether to validate the file against schema or not. Default value is true.
270
+ # *Returns*:: Bio::PhyloXML::Parser object
271
+ def initialize(str, validate=true)
272
+
273
+ @other = []
274
+
275
+ return unless str
276
+
277
+ # For compatibility, if filename-like string is given,
278
+ # treat it as a filename.
279
+ if /[\<\>\r\n]/ !~ str and File.exist?(str) then
280
+ # assume that str is filename
281
+ warn "Bio::PhyloXML::Parser.new(filename) is deprecated. Use Bio::PhyloXML::Parser.open(filename)."
282
+ filename = _secure_filename(str)
283
+ _validate(:file, filename) if validate
284
+ @reader = XML::Reader.file(filename)
285
+ _skip_leader
286
+ return
287
+ end
288
+
289
+ # initialize for string
290
+ @reader = XML::Reader.string(str,
291
+ { :options =>
292
+ LibXML::XML::Parser::Options::NONET })
293
+ _skip_leader
294
+ end
295
+
296
+
297
+ # Iterate through all trees in the file.
298
+ #
299
+ # phyloxml = Bio::PhyloXML::Parser.open('example.xml')
300
+ # phyloxml.each do |tree|
301
+ # puts tree.name
302
+ # end
303
+ #
304
+ def each
305
+ while tree = next_tree
306
+ yield tree
307
+ end
308
+ end
309
+
310
+ # Access the specified tree in the file. It parses trees until the specified
311
+ # tree is reached.
312
+ #
313
+ # # Get 3rd tree in the file (starts counting from 0).
314
+ # parser = PhyloXML::Parser.open('phyloxml_examples.xml')
315
+ # tree = parser[2]
316
+ #
317
+ def [](i)
318
+ tree = nil
319
+ (i+1).times do
320
+ tree = self.next_tree
321
+ end
322
+ return tree
323
+ end
324
+
325
+ # Parse and return the next phylogeny tree. If there are no more phylogeny
326
+ # element, nil is returned. If there is something else besides phylogeny
327
+ # elements, it is saved in the PhyloXML::Parser#other.
328
+ #
329
+ # p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml")
330
+ # tree = p.next_tree
331
+ #
332
+ # ---
333
+ # *Returns*:: Bio::PhyloXML::Tree
334
+ def next_tree()
335
+
336
+ if not is_element?('phylogeny')
337
+ if @reader.node_type == XML::Reader::TYPE_END_ELEMENT
338
+ if is_end_element?('phyloxml')
339
+ return nil
340
+ else
341
+ @reader.read
342
+ @reader.read
343
+ if is_end_element?('phyloxml')
344
+ return nil
345
+ end
346
+ end
347
+ end
348
+ # phyloxml can hold only phylogeny and "other" elements. If this is not
349
+ # phylogeny element then it is other. Also, "other" always comes after
350
+ # all phylogenies
351
+ @other << parse_other
352
+ #return nil for tree, since this is not valid phyloxml tree.
353
+ return nil
354
+ end
355
+
356
+ tree = Bio::PhyloXML::Tree.new
357
+
358
+ # keep track of current node in clades array/stack. Current node is the
359
+ # last element in the clades array
360
+ clades = []
361
+ clades.push tree
362
+
363
+ #keep track of current edge to be able to parse branch_length tag
364
+ current_edge = nil
365
+
366
+ # we are going to parse clade iteratively by pointing (and changing) to
367
+ # the current node in the tree. Since the property element is both in
368
+ # clade and in the phylogeny, we need some boolean to know if we are
369
+ # parsing the clade (there can be only max 1 clade in phylogeny) or
370
+ # parsing phylogeny
371
+ parsing_clade = false
372
+
373
+ while not is_end_element?('phylogeny') do
374
+ break if is_end_element?('phyloxml')
375
+
376
+ # parse phylogeny elements, except clade
377
+ if not parsing_clade
378
+
379
+ if is_element?('phylogeny')
380
+ @reader["rooted"] == "true" ? tree.rooted = true : tree.rooted = false
381
+ @reader["rerootable"] == "true" ? tree.rerootable = true : tree.rerootable = false
382
+ parse_attributes(tree, ["branch_length_unit", 'type'])
383
+ end
384
+
385
+ parse_simple_elements(tree, [ "name", 'description', "date"])
386
+
387
+ if is_element?('confidence')
388
+ tree.confidences << parse_confidence
389
+ end
390
+
391
+ end
392
+
393
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
394
+ case @reader.name
395
+ when 'clade'
396
+ #parse clade element
397
+
398
+ parsing_clade = true
399
+
400
+ node= Bio::PhyloXML::Node.new
401
+
402
+ branch_length = @reader['branch_length']
403
+
404
+ parse_attributes(node, ["id_source"])
405
+
406
+ #add new node to the tree
407
+ tree.add_node(node)
408
+ # The first clade will always be root since by xsd schema phyloxml can
409
+ # have 0 to 1 clades in it.
410
+ if tree.root == nil
411
+ tree.root = node
412
+ else
413
+ current_edge = tree.add_edge(clades[-1], node,
414
+ Bio::Tree::Edge.new(branch_length))
415
+ end
416
+ clades.push node
417
+ #end if clade element
418
+ else
419
+ parse_clade_elements(clades[-1], current_edge) if parsing_clade
420
+ end
421
+ end
422
+
423
+ #end clade element, go one parent up
424
+ if is_end_element?('clade')
425
+
426
+ #if we have reached the closing tag of the top-most clade, then our
427
+ # curent node should point to the root, If thats the case, we are done
428
+ # parsing the clade element
429
+ if clades[-1] == tree.root
430
+ parsing_clade = false
431
+ else
432
+ # set current node (clades[-1) to the previous clade in the array
433
+ clades.pop
434
+ end
435
+ end
436
+
437
+ #parsing phylogeny elements
438
+ if not parsing_clade
439
+
440
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
441
+ case @reader.name
442
+ when 'property'
443
+ tree.properties << parse_property
444
+
445
+ when 'clade_relation'
446
+ clade_relation = CladeRelation.new
447
+ parse_attributes(clade_relation, ["id_ref_0", "id_ref_1", "distance", "type"])
448
+
449
+ #@ add unit test for this
450
+ if not @reader.empty_element?
451
+ @reader.read
452
+ if is_element?('confidence')
453
+ clade_relation.confidence = parse_confidence
454
+ end
455
+ end
456
+ tree.clade_relations << clade_relation
457
+
458
+ when 'sequence_relation'
459
+ sequence_relation = SequenceRelation.new
460
+ parse_attributes(sequence_relation, ["id_ref_0", "id_ref_1", "distance", "type"])
461
+ if not @reader.empty_element?
462
+ @reader.read
463
+ if is_element?('confidence')
464
+ sequence_relation.confidence = parse_confidence
465
+ end
466
+ end
467
+ tree.sequence_relations << sequence_relation
468
+ when 'phylogeny'
469
+ #do nothing
470
+ else
471
+ tree.other << parse_other
472
+ #puts "Not recognized element. #{@reader.name}"
473
+ end
474
+ end
475
+ end
476
+ # go to next element
477
+ @reader.read
478
+ end #end while not </phylogeny>
479
+ #move on to the next tag after /phylogeny which is text, since phylogeny
480
+ #end tag is empty element, which value is nil, therefore need to move to
481
+ #the next meaningful element (therefore @reader.read twice)
482
+ @reader.read
483
+ @reader.read
484
+
485
+ return tree
486
+ end
487
+
488
+ # return tree of specified name.
489
+ # @todo Implement this method.
490
+ # def get_tree_by_name(name)
491
+
492
+ # while not is_end_element?('phyloxml')
493
+ # if is_element?('phylogeny')
494
+ # @reader.read
495
+ # @reader.read
496
+ #
497
+ # if is_element?('name')
498
+ # @reader.read
499
+ # if @reader.value == name
500
+ # puts "equasl"
501
+ # tree = next_tree
502
+ # puts tree
503
+ # end
504
+ # end
505
+ # end
506
+ # @reader.read
507
+ # end
508
+ #
509
+ # end
510
+
511
+
512
+ private
513
+
514
+ ####
515
+ # Utility methods
516
+ ###
517
+
518
+ def is_element?(str)
519
+ @reader.node_type == XML::Reader::TYPE_ELEMENT and @reader.name == str ? true : false
520
+ end
521
+
522
+ def is_end_element?(str)
523
+ @reader.node_type==XML::Reader::TYPE_END_ELEMENT and @reader.name == str ? true : false
524
+ end
525
+
526
+ def has_reached_end_element?(str)
527
+ if not(is_end_element?(str))
528
+ raise "Warning: Should have reached </#{str}> element here"
529
+ end
530
+ end
531
+
532
+ # Parses a simple XML element. for example <speciations>1</speciations>
533
+ # It reads in the value and assigns it to object.speciation = 1
534
+ # Also checks if have reached end tag (</speciations> and gives warning
535
+ # if not
536
+ def parse_simple_element(object, name)
537
+ if is_element?(name)
538
+ @reader.read
539
+ object.send("#{name}=", @reader.value)
540
+ @reader.read
541
+ has_reached_end_element?(name)
542
+ end
543
+ end
544
+
545
+ def parse_simple_elements(object, elements)
546
+ elements.each do |elmt|
547
+ parse_simple_element(object, elmt)
548
+ end
549
+ end
550
+
551
+ #Parses list of attributes
552
+ #use for the code like: clade_relation.type = @reader["type"]
553
+ def parse_attributes(object, arr_of_attrs)
554
+ arr_of_attrs.each do |attr|
555
+ object.send("#{attr}=", @reader[attr])
556
+ end
557
+ end
558
+
559
+ def parse_clade_elements(current_node, current_edge)
560
+ #no loop inside, loop is already outside
561
+
562
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
563
+ case @reader.name
564
+ when 'branch_length'
565
+ # @todo add unit test for this. current_edge is nil, if the root clade
566
+ # has branch_length attribute.
567
+ @reader.read
568
+ branch_length = @reader.value
569
+ current_edge.distance = branch_length.to_f if current_edge != nil
570
+ @reader.read
571
+ when 'width'
572
+ @reader.read
573
+ current_node.width = @reader.value
574
+ @reader.read
575
+ when 'name'
576
+ @reader.read
577
+ current_node.name = @reader.value
578
+ @reader.read
579
+ when 'events'
580
+ current_node.events = parse_events
581
+ when 'confidence'
582
+ current_node.confidences << parse_confidence
583
+ when 'sequence'
584
+ current_node.sequences << parse_sequence
585
+ when 'property'
586
+ current_node.properties << parse_property
587
+ when 'taxonomy'
588
+ current_node.taxonomies << parse_taxonomy
589
+ when 'distribution'
590
+ current_node.distributions << parse_distribution
591
+ when 'node_id'
592
+ id = Id.new
593
+ id.type = @reader["type"]
594
+ @reader.read
595
+ id.value = @reader.value
596
+ @reader.read
597
+ #has_reached_end_element?('node_id')
598
+ #@todo write unit test for this. There is no example of this in the example files
599
+ current_node.id = id
600
+ when 'color'
601
+ color = BranchColor.new
602
+ parse_simple_element(color, 'red')
603
+ parse_simple_element(color, 'green')
604
+ parse_simple_element(color, 'blue')
605
+ current_node.color = color
606
+ #@todo add unit test for this
607
+ when 'date'
608
+ date = Date.new
609
+ date.unit = @reader["unit"]
610
+ #move to the next token, which is always empty, since date tag does not
611
+ # have text associated with it
612
+ @reader.read
613
+ @reader.read #now the token is the first tag under date tag
614
+ while not(is_end_element?('date'))
615
+ parse_simple_element(date, 'desc')
616
+ parse_simple_element(date, 'value')
617
+ parse_simple_element(date, 'minimum')
618
+ parse_simple_element(date, 'maximum')
619
+ @reader.read
620
+ end
621
+ current_node.date = date
622
+ when 'reference'
623
+ reference = Reference.new()
624
+ reference.doi = @reader['doi']
625
+ if not @reader.empty_element?
626
+ while not is_end_element?('reference')
627
+ parse_simple_element(reference, 'desc')
628
+ @reader.read
629
+ end
630
+ end
631
+ current_node.references << reference
632
+ when 'binary_characters'
633
+ current_node.binary_characters = parse_binary_characters
634
+ when 'clade'
635
+ #do nothing
636
+ else
637
+ current_node.other << parse_other
638
+ #puts "No match found in parse_clade_elements.(#{@reader.name})"
639
+ end
640
+
641
+ end
642
+
643
+ end #parse_clade_elements
644
+
645
+ def parse_events()
646
+ events = PhyloXML::Events.new
647
+ @reader.read #go to next element
648
+ while not(is_end_element?('events')) do
649
+ parse_simple_elements(events, ['type', 'duplications',
650
+ 'speciations', 'losses'])
651
+ if is_element?('confidence')
652
+ events.confidence = parse_confidence
653
+ #@todo could add unit test for this (example file does not have this case)
654
+ end
655
+ @reader.read
656
+ end
657
+ return events
658
+ end #parse_events
659
+
660
+ def parse_taxonomy
661
+ taxonomy = PhyloXML::Taxonomy.new
662
+ parse_attributes(taxonomy, ["id_source"])
663
+ @reader.read
664
+ while not(is_end_element?('taxonomy')) do
665
+
666
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
667
+ case @reader.name
668
+ when 'code'
669
+ @reader.read
670
+ taxonomy.code = @reader.value
671
+ @reader.read
672
+ when 'scientific_name'
673
+ @reader.read
674
+ taxonomy.scientific_name = @reader.value
675
+ @reader.read
676
+ when 'rank'
677
+ @reader.read
678
+ taxonomy.rank = @reader.value
679
+ @reader.read
680
+ when 'authority'
681
+ @reader.read
682
+ taxonomy.authority = @reader.value
683
+ @reader.read
684
+ when 'id'
685
+ taxonomy.taxonomy_id = parse_id('id')
686
+ when 'common_name'
687
+ @reader.read
688
+ taxonomy.common_names << @reader.value
689
+ @reader.read
690
+ #has_reached_end_element?('common_name')
691
+ when 'synonym'
692
+ @reader.read
693
+ taxonomy.synonyms << @reader.value
694
+ @reader.read
695
+ #has_reached_end_element?('synonym')
696
+ when 'uri'
697
+ taxonomy.uri = parse_uri
698
+ else
699
+ taxonomy.other << parse_other
700
+ end
701
+ end
702
+
703
+ @reader.read #move to next tag in the loop
704
+ end
705
+ return taxonomy
706
+ end #parse_taxonomy
707
+
708
+ private
709
+
710
+ def parse_sequence
711
+ sequence = Sequence.new
712
+ parse_attributes(sequence, ["type", "id_source", "id_ref"])
713
+
714
+ @reader.read
715
+ while not(is_end_element?('sequence'))
716
+
717
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
718
+ case @reader.name
719
+ when 'symbol'
720
+ @reader.read
721
+ sequence.symbol = @reader.value
722
+ @reader.read
723
+ when 'name'
724
+ @reader.read
725
+ sequence.name = @reader.value
726
+ @reader.read
727
+ when 'location'
728
+ @reader.read
729
+ sequence.location = @reader.value
730
+ @reader.read
731
+ when 'mol_seq'
732
+ sequence.is_aligned = @reader["is_aligned"]
733
+ @reader.read
734
+ sequence.mol_seq = @reader.value
735
+ @reader.read
736
+ has_reached_end_element?('mol_seq')
737
+ when 'accession'
738
+ sequence.accession = Accession.new
739
+ sequence.accession.source = @reader["source"]
740
+ @reader.read
741
+ sequence.accession.value = @reader.value
742
+ @reader.read
743
+ has_reached_end_element?('accession')
744
+ when 'uri'
745
+ sequence.uri = parse_uri
746
+ when 'annotation'
747
+ sequence.annotations << parse_annotation
748
+ when 'domain_architecture'
749
+ sequence.domain_architecture = DomainArchitecture.new
750
+ sequence.domain_architecture.length = @reader["length"]
751
+ @reader.read
752
+ @reader.read
753
+ while not(is_end_element?('domain_architecture'))
754
+ sequence.domain_architecture.domains << parse_domain
755
+ @reader.read #go to next domain element
756
+ end
757
+ else
758
+ sequence.other << parse_other
759
+ #@todo add unit test
760
+ end
761
+ end
762
+
763
+ @reader.read
764
+ end
765
+ return sequence
766
+ end #parse_sequence
767
+
768
+ def parse_uri
769
+ uri = Uri.new
770
+ parse_attributes(uri, ["desc", "type"])
771
+ parse_simple_element(uri, 'uri')
772
+ return uri
773
+ end
774
+
775
+ def parse_annotation
776
+ annotation = Annotation.new
777
+
778
+ parse_attributes(annotation, ['ref', 'source', 'evidence', 'type'])
779
+
780
+ if not @reader.empty_element?
781
+ while not(is_end_element?('annotation'))
782
+ parse_simple_element(annotation, 'desc') if is_element?('desc')
783
+
784
+ annotation.confidence = parse_confidence if is_element?('confidence')
785
+
786
+ annotation.properties << parse_property if is_element?('property')
787
+
788
+ if is_element?('uri')
789
+ annotation.uri = parse_uri
790
+ end
791
+
792
+ @reader.read
793
+ end
794
+
795
+ end
796
+ return annotation
797
+ end
798
+
799
+ def parse_property
800
+ property = Property.new
801
+ parse_attributes(property, ["ref", "unit", "datatype", "applies_to", "id_ref"])
802
+ @reader.read
803
+ property.value = @reader.value
804
+ @reader.read
805
+ has_reached_end_element?('property')
806
+ return property
807
+ end #parse_property
808
+
809
+ def parse_confidence
810
+ type = @reader["type"]
811
+ @reader.read
812
+ value = @reader.value.to_f
813
+ @reader.read
814
+ has_reached_end_element?('confidence')
815
+ return Confidence.new(type, value)
816
+ end #parse_confidence
817
+
818
+ def parse_distribution
819
+ distribution = Distribution.new
820
+ @reader.read
821
+ while not(is_end_element?('distribution')) do
822
+
823
+ parse_simple_element(distribution, 'desc')
824
+
825
+ distribution.points << parse_point if is_element?('point')
826
+ distribution.polygons << parse_polygon if is_element?('polygon')
827
+
828
+ @reader.read
829
+ end
830
+ return distribution
831
+ end #parse_distribution
832
+
833
+ def parse_point
834
+ point = Point.new
835
+
836
+ point.geodetic_datum = @reader["geodetic_datum"]
837
+ point.alt_unit = @reader["alt_unit"]
838
+
839
+ @reader.read
840
+ while not(is_end_element?('point')) do
841
+
842
+ parse_simple_elements(point, ['lat', 'long'] )
843
+
844
+ if is_element?('alt')
845
+ @reader.read
846
+ point.alt = @reader.value.to_f
847
+ @reader.read
848
+ has_reached_end_element?('alt')
849
+ end
850
+ #advance reader
851
+ @reader.read
852
+ end
853
+ return point
854
+ end #parse_point
855
+
856
+ def parse_polygon
857
+ polygon = Polygon.new
858
+ @reader.read
859
+ while not(is_end_element?('polygon')) do
860
+ polygon.points << parse_point if is_element?('point')
861
+ @reader.read
862
+ end
863
+
864
+ #@todo should check for it at all? Probably not if xml is valid.
865
+ if polygon.points.length <3
866
+ puts "Warning: <polygon> should have at least 3 points"
867
+ end
868
+ return polygon
869
+ end #parse_polygon
870
+
871
+ def parse_id(tag_name)
872
+ id = Id.new
873
+ id.provider = @reader["provider"]
874
+ @reader.read
875
+ id.value = @reader.value
876
+ @reader.read #@todo shouldn't there be another read?
877
+ has_reached_end_element?(tag_name)
878
+ return id
879
+ end #parse_id
880
+
881
+ def parse_domain
882
+ domain = ProteinDomain.new
883
+ parse_attributes(domain, ["from", "to", "confidence", "id"])
884
+ @reader.read
885
+ domain.value = @reader.value
886
+ @reader.read
887
+ has_reached_end_element?('domain')
888
+ @reader.read
889
+ return domain
890
+ end
891
+
892
+ def parse_binary_characters
893
+ b = PhyloXML::BinaryCharacters.new
894
+ b.bc_type = @reader['type']
895
+
896
+ parse_attributes(b, ['gained_count', 'absent_count', 'lost_count', 'present_count'])
897
+ if not @reader.empty_element?
898
+ @reader.read
899
+ while not is_end_element?('binary_characters')
900
+
901
+ parse_bc(b, 'lost')
902
+ parse_bc(b, 'gained')
903
+ parse_bc(b, 'absent')
904
+ parse_bc(b, 'present')
905
+
906
+ @reader.read
907
+ end
908
+ end
909
+ return b
910
+ end #parse_binary_characters
911
+
912
+ def parse_bc(object, element)
913
+ if is_element?(element)
914
+ @reader.read
915
+ while not is_end_element?(element)
916
+ if is_element?('bc')
917
+ @reader.read
918
+ object.send(element) << @reader.value
919
+ @reader.read
920
+ has_reached_end_element?('bc')
921
+ end
922
+ @reader.read
923
+ end
924
+ end
925
+ end #parse_bc
926
+
927
+ def parse_other
928
+ other_obj = PhyloXML::Other.new
929
+ other_obj.element_name = @reader.name
930
+ #parse attributes
931
+ code = @reader.move_to_first_attribute
932
+ while code ==1
933
+ other_obj.attributes[@reader.name] = @reader.value
934
+ code = @reader.move_to_next_attribute
935
+ end
936
+
937
+ while not is_end_element?(other_obj.element_name) do
938
+ @reader.read
939
+ if @reader.node_type == XML::Reader::TYPE_ELEMENT
940
+ other_obj.children << parse_other #recursice call to parse children
941
+ elsif @reader.node_type == XML::Reader::TYPE_TEXT
942
+ other_obj.value = @reader.value
943
+ end
944
+ end
945
+ #just a check
946
+ has_reached_end_element?(other_obj.element_name)
947
+ return other_obj
948
+ end #parse_other
949
+
950
+ end #class phyloxmlParser
951
+
952
+ end #module PhyloXML
953
+
954
+ end #module Bio