bio 1.3.1 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
@@ -0,0 +1,954 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/phyloxml_parser.rb - PhyloXML parser
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009
|
5
|
+
# Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
# == Description
|
11
|
+
#
|
12
|
+
# This file containts parser for PhyloXML.
|
13
|
+
#
|
14
|
+
# == Requirements
|
15
|
+
#
|
16
|
+
# Libxml2 XML parser is required. Install libxml-ruby bindings from
|
17
|
+
# http://libxml.rubyforge.org or
|
18
|
+
#
|
19
|
+
# gem install -r libxml-ruby
|
20
|
+
#
|
21
|
+
# == References
|
22
|
+
#
|
23
|
+
# * http://www.phyloxml.org
|
24
|
+
#
|
25
|
+
# * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
|
26
|
+
|
27
|
+
|
28
|
+
require 'uri'
|
29
|
+
require 'libxml'
|
30
|
+
|
31
|
+
require 'bio/tree'
|
32
|
+
require 'bio/db/phyloxml/phyloxml_elements'
|
33
|
+
|
34
|
+
|
35
|
+
module Bio
|
36
|
+
|
37
|
+
module PhyloXML
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
# == Description
|
43
|
+
#
|
44
|
+
# Bio::PhyloXML::Parser is for parsing phyloXML format files.
|
45
|
+
#
|
46
|
+
# == Requirements
|
47
|
+
#
|
48
|
+
# Libxml2 XML parser is required. Install libxml-ruby bindings from
|
49
|
+
# http://libxml.rubyforge.org or
|
50
|
+
#
|
51
|
+
# gem install -r libxml-ruby
|
52
|
+
#
|
53
|
+
# == Usage
|
54
|
+
#
|
55
|
+
# require 'bio'
|
56
|
+
#
|
57
|
+
# # Create new phyloxml parser
|
58
|
+
# phyloxml = Bio::PhyloXML::Parser.open('example.xml')
|
59
|
+
#
|
60
|
+
# # Print the names of all trees in the file
|
61
|
+
# phyloxml.each do |tree|
|
62
|
+
# puts tree.name
|
63
|
+
# end
|
64
|
+
#
|
65
|
+
#
|
66
|
+
# == References
|
67
|
+
#
|
68
|
+
# http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html
|
69
|
+
#
|
70
|
+
class Parser
|
71
|
+
|
72
|
+
include LibXML
|
73
|
+
|
74
|
+
# After parsing all the trees, if there is anything else in other xml format,
|
75
|
+
# it is saved in this array of PhyloXML::Other objects
|
76
|
+
attr_reader :other
|
77
|
+
|
78
|
+
# Initializes LibXML::Reader and reads the file until it reaches the first
|
79
|
+
# phylogeny element.
|
80
|
+
#
|
81
|
+
# Create a new Bio::PhyloXML::Parser object.
|
82
|
+
#
|
83
|
+
# p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml")
|
84
|
+
#
|
85
|
+
# ---
|
86
|
+
# *Arguments*:
|
87
|
+
# * (required) _filename_: Path to the file to parse.
|
88
|
+
# * (optional) _validate_: Whether to validate the file against schema or not. Default value is true.
|
89
|
+
# *Returns*:: Bio::PhyloXML::Parser object
|
90
|
+
def self.open(filename, validate=true)
|
91
|
+
obj = new(nil, validate)
|
92
|
+
obj.instance_eval {
|
93
|
+
filename = _secure_filename(filename)
|
94
|
+
_validate(:file, filename) if validate
|
95
|
+
# XML::Parser::Options::NONET for security reason
|
96
|
+
@reader = XML::Reader.file(filename,
|
97
|
+
{ :options =>
|
98
|
+
LibXML::XML::Parser::Options::NONET })
|
99
|
+
_skip_leader
|
100
|
+
}
|
101
|
+
obj
|
102
|
+
end
|
103
|
+
|
104
|
+
# Initializes LibXML::Reader and reads the file until it reaches the first
|
105
|
+
# phylogeny element.
|
106
|
+
#
|
107
|
+
# Create a new Bio::PhyloXML::Parser object.
|
108
|
+
#
|
109
|
+
# p = Bio::PhyloXML::Parser.open_uri("http://www.phyloxml.org/examples/apaf.xml")
|
110
|
+
#
|
111
|
+
# ---
|
112
|
+
# *Arguments*:
|
113
|
+
# * (required) _uri_: (URI or String) URI to the data to parse
|
114
|
+
# * (optional) _validate_: For URI reader, the "validate" option is ignored and no validation is executed.
|
115
|
+
# *Returns*:: Bio::PhyloXML::Parser object
|
116
|
+
def self.open_uri(uri, validate=true)
|
117
|
+
case uri
|
118
|
+
when URI
|
119
|
+
uri = uri.to_s
|
120
|
+
else
|
121
|
+
# raises error if not a String
|
122
|
+
uri = uri.to_str
|
123
|
+
# raises error if invalid URI
|
124
|
+
URI.parse(uri)
|
125
|
+
end
|
126
|
+
|
127
|
+
obj = new(nil, validate)
|
128
|
+
obj.instance_eval {
|
129
|
+
@reader = XML::Reader.file(uri)
|
130
|
+
_skip_leader
|
131
|
+
}
|
132
|
+
obj
|
133
|
+
end
|
134
|
+
|
135
|
+
# Special class for closed PhyloXML::Parser object.
|
136
|
+
# It raises error for any methods except essential methods.
|
137
|
+
#
|
138
|
+
# Bio::PhyloXML internal use only.
|
139
|
+
class ClosedPhyloXMLParser #:nodoc:
|
140
|
+
def method_missing(*arg)
|
141
|
+
raise LibXML::XML::Error, 'closed PhyloXML::Parser object'
|
142
|
+
end
|
143
|
+
end #class ClosedPhyloXMLParser
|
144
|
+
|
145
|
+
# Closes the LibXML::Reader inside the object.
|
146
|
+
# It also closes the opened file if it is created by using
|
147
|
+
# Bio::PhyloXML::Parser.open method.
|
148
|
+
#
|
149
|
+
# When closed object is closed again, or closed object is used,
|
150
|
+
# it raises LibXML::XML::Error.
|
151
|
+
# ---
|
152
|
+
# *Returns*:: nil
|
153
|
+
def close
|
154
|
+
@reader.close
|
155
|
+
@reader = ClosedPhyloXMLParser.new
|
156
|
+
nil
|
157
|
+
end
|
158
|
+
|
159
|
+
# Initializes LibXML::Reader and reads from the IO until it reaches
|
160
|
+
# the first phylogeny element.
|
161
|
+
#
|
162
|
+
# Create a new Bio::PhyloXML::Parser object.
|
163
|
+
#
|
164
|
+
# p = Bio::PhyloXML::Parser.for_io($stdin)
|
165
|
+
#
|
166
|
+
# ---
|
167
|
+
# *Arguments*:
|
168
|
+
# * (required) _io_: IO object
|
169
|
+
# * (optional) _validate_: For IO reader, the "validate" option is ignored and no validation is executed.
|
170
|
+
# *Returns*:: Bio::PhyloXML::Parser object
|
171
|
+
def self.for_io(io, validate=true)
|
172
|
+
obj = new(nil, validate)
|
173
|
+
obj.instance_eval {
|
174
|
+
@reader = XML::Reader.io(io,
|
175
|
+
{ :options =>
|
176
|
+
LibXML::XML::Parser::Options::NONET })
|
177
|
+
_skip_leader
|
178
|
+
}
|
179
|
+
obj
|
180
|
+
end
|
181
|
+
|
182
|
+
# (private) returns PhyloXML schema
|
183
|
+
def _schema
|
184
|
+
XML::Schema.document(XML::Document.file(File.join(File.dirname(__FILE__),'phyloxml.xsd')))
|
185
|
+
end
|
186
|
+
private :_schema
|
187
|
+
|
188
|
+
# (private) do validation
|
189
|
+
# ---
|
190
|
+
# *Arguments*:
|
191
|
+
# * (required) <em>data_type</em>_: :file for filename, :string for string
|
192
|
+
# * (required) _arg_: filename or string
|
193
|
+
# *Returns*:: (undefined)
|
194
|
+
def _validate(data_type, arg)
|
195
|
+
options = { :options =>
|
196
|
+
(LibXML::XML::Parser::Options::NOERROR | # no error messages
|
197
|
+
LibXML::XML::Parser::Options::NOWARNING | # no warning messages
|
198
|
+
LibXML::XML::Parser::Options::NONET) # no network access
|
199
|
+
}
|
200
|
+
case data_type
|
201
|
+
when :file
|
202
|
+
# No validation when special file e.g. FIFO (named pipe)
|
203
|
+
return unless File.file?(arg)
|
204
|
+
xml_instance = XML::Document.file(arg, options)
|
205
|
+
when :string
|
206
|
+
xml_instance = XML::Document.string(arg, options)
|
207
|
+
else
|
208
|
+
# no validation for unknown data type
|
209
|
+
return
|
210
|
+
end
|
211
|
+
|
212
|
+
schema = _schema
|
213
|
+
begin
|
214
|
+
flag = xml_instance.validate_schema(schema) do |msg, flag|
|
215
|
+
# The document of libxml-ruby says that the block is called
|
216
|
+
# when validation failed, but it seems it is never called
|
217
|
+
# even when validation failed!
|
218
|
+
raise "Validation of the XML document against phyloxml.xsd schema failed. #{msg}"
|
219
|
+
end
|
220
|
+
rescue LibXML::XML::Error => evar
|
221
|
+
raise "Validation of the XML document against phyloxml.xsd schema failed, or XML error occurred. #{evar.message}"
|
222
|
+
end
|
223
|
+
unless flag then
|
224
|
+
raise "Validation of the XML document against phyloxml.xsd schema failed."
|
225
|
+
end
|
226
|
+
end
|
227
|
+
private :_validate
|
228
|
+
|
229
|
+
# (private) It seems that LibXML::XML::Reader reads from the network
|
230
|
+
# even if LibXML::XML::Parser::Options::NONET is set.
|
231
|
+
# So, for URI-like filename, '://' is replaced with ':/'.
|
232
|
+
def _secure_filename(filename)
|
233
|
+
# for safety, URI-like filename is checked.
|
234
|
+
if /\A[a-zA-Z]+\:\/\// =~ filename then
|
235
|
+
# for example, "http://a/b" is changed to "http:/a/b".
|
236
|
+
filename = filename.sub(/\:\/\//, ':/')
|
237
|
+
end
|
238
|
+
filename
|
239
|
+
end
|
240
|
+
private :_secure_filename
|
241
|
+
|
242
|
+
# (private) loops through until reaches phylogeny stuff
|
243
|
+
def _skip_leader
|
244
|
+
#loops through until reaches phylogeny stuff
|
245
|
+
# Have to leave this way, if accepting strings, instead of files
|
246
|
+
@reader.read until is_element?('phylogeny')
|
247
|
+
nil
|
248
|
+
end
|
249
|
+
private :_skip_leader
|
250
|
+
|
251
|
+
# Initializes LibXML::Reader and reads the PhyloXML-formatted string
|
252
|
+
# until it reaches the first phylogeny element.
|
253
|
+
#
|
254
|
+
# Create a new Bio::PhyloXML::Parser object.
|
255
|
+
#
|
256
|
+
# str = File.read("./phyloxml_examples.xml")
|
257
|
+
# p = Bio::PhyloXML::Parser.new(str)
|
258
|
+
#
|
259
|
+
#
|
260
|
+
# Deprecated usage: Reads data from a file. <em>str<em> is a filename.
|
261
|
+
#
|
262
|
+
# p = Bio::PhyloXML::Parser.new("./phyloxml_examples.xml")
|
263
|
+
#
|
264
|
+
# Taking filename is deprecated. Use Bio::PhyloXML::Parser.open(filename).
|
265
|
+
#
|
266
|
+
# ---
|
267
|
+
# *Arguments*:
|
268
|
+
# * (required) _str_: PhyloXML-formatted string
|
269
|
+
# * (optional) _validate_: Whether to validate the file against schema or not. Default value is true.
|
270
|
+
# *Returns*:: Bio::PhyloXML::Parser object
|
271
|
+
def initialize(str, validate=true)
|
272
|
+
|
273
|
+
@other = []
|
274
|
+
|
275
|
+
return unless str
|
276
|
+
|
277
|
+
# For compatibility, if filename-like string is given,
|
278
|
+
# treat it as a filename.
|
279
|
+
if /[\<\>\r\n]/ !~ str and File.exist?(str) then
|
280
|
+
# assume that str is filename
|
281
|
+
warn "Bio::PhyloXML::Parser.new(filename) is deprecated. Use Bio::PhyloXML::Parser.open(filename)."
|
282
|
+
filename = _secure_filename(str)
|
283
|
+
_validate(:file, filename) if validate
|
284
|
+
@reader = XML::Reader.file(filename)
|
285
|
+
_skip_leader
|
286
|
+
return
|
287
|
+
end
|
288
|
+
|
289
|
+
# initialize for string
|
290
|
+
@reader = XML::Reader.string(str,
|
291
|
+
{ :options =>
|
292
|
+
LibXML::XML::Parser::Options::NONET })
|
293
|
+
_skip_leader
|
294
|
+
end
|
295
|
+
|
296
|
+
|
297
|
+
# Iterate through all trees in the file.
|
298
|
+
#
|
299
|
+
# phyloxml = Bio::PhyloXML::Parser.open('example.xml')
|
300
|
+
# phyloxml.each do |tree|
|
301
|
+
# puts tree.name
|
302
|
+
# end
|
303
|
+
#
|
304
|
+
def each
|
305
|
+
while tree = next_tree
|
306
|
+
yield tree
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# Access the specified tree in the file. It parses trees until the specified
|
311
|
+
# tree is reached.
|
312
|
+
#
|
313
|
+
# # Get 3rd tree in the file (starts counting from 0).
|
314
|
+
# parser = PhyloXML::Parser.open('phyloxml_examples.xml')
|
315
|
+
# tree = parser[2]
|
316
|
+
#
|
317
|
+
def [](i)
|
318
|
+
tree = nil
|
319
|
+
(i+1).times do
|
320
|
+
tree = self.next_tree
|
321
|
+
end
|
322
|
+
return tree
|
323
|
+
end
|
324
|
+
|
325
|
+
# Parse and return the next phylogeny tree. If there are no more phylogeny
|
326
|
+
# element, nil is returned. If there is something else besides phylogeny
|
327
|
+
# elements, it is saved in the PhyloXML::Parser#other.
|
328
|
+
#
|
329
|
+
# p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml")
|
330
|
+
# tree = p.next_tree
|
331
|
+
#
|
332
|
+
# ---
|
333
|
+
# *Returns*:: Bio::PhyloXML::Tree
|
334
|
+
def next_tree()
|
335
|
+
|
336
|
+
if not is_element?('phylogeny')
|
337
|
+
if @reader.node_type == XML::Reader::TYPE_END_ELEMENT
|
338
|
+
if is_end_element?('phyloxml')
|
339
|
+
return nil
|
340
|
+
else
|
341
|
+
@reader.read
|
342
|
+
@reader.read
|
343
|
+
if is_end_element?('phyloxml')
|
344
|
+
return nil
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|
348
|
+
# phyloxml can hold only phylogeny and "other" elements. If this is not
|
349
|
+
# phylogeny element then it is other. Also, "other" always comes after
|
350
|
+
# all phylogenies
|
351
|
+
@other << parse_other
|
352
|
+
#return nil for tree, since this is not valid phyloxml tree.
|
353
|
+
return nil
|
354
|
+
end
|
355
|
+
|
356
|
+
tree = Bio::PhyloXML::Tree.new
|
357
|
+
|
358
|
+
# keep track of current node in clades array/stack. Current node is the
|
359
|
+
# last element in the clades array
|
360
|
+
clades = []
|
361
|
+
clades.push tree
|
362
|
+
|
363
|
+
#keep track of current edge to be able to parse branch_length tag
|
364
|
+
current_edge = nil
|
365
|
+
|
366
|
+
# we are going to parse clade iteratively by pointing (and changing) to
|
367
|
+
# the current node in the tree. Since the property element is both in
|
368
|
+
# clade and in the phylogeny, we need some boolean to know if we are
|
369
|
+
# parsing the clade (there can be only max 1 clade in phylogeny) or
|
370
|
+
# parsing phylogeny
|
371
|
+
parsing_clade = false
|
372
|
+
|
373
|
+
while not is_end_element?('phylogeny') do
|
374
|
+
break if is_end_element?('phyloxml')
|
375
|
+
|
376
|
+
# parse phylogeny elements, except clade
|
377
|
+
if not parsing_clade
|
378
|
+
|
379
|
+
if is_element?('phylogeny')
|
380
|
+
@reader["rooted"] == "true" ? tree.rooted = true : tree.rooted = false
|
381
|
+
@reader["rerootable"] == "true" ? tree.rerootable = true : tree.rerootable = false
|
382
|
+
parse_attributes(tree, ["branch_length_unit", 'type'])
|
383
|
+
end
|
384
|
+
|
385
|
+
parse_simple_elements(tree, [ "name", 'description', "date"])
|
386
|
+
|
387
|
+
if is_element?('confidence')
|
388
|
+
tree.confidences << parse_confidence
|
389
|
+
end
|
390
|
+
|
391
|
+
end
|
392
|
+
|
393
|
+
if @reader.node_type == XML::Reader::TYPE_ELEMENT
|
394
|
+
case @reader.name
|
395
|
+
when 'clade'
|
396
|
+
#parse clade element
|
397
|
+
|
398
|
+
parsing_clade = true
|
399
|
+
|
400
|
+
node= Bio::PhyloXML::Node.new
|
401
|
+
|
402
|
+
branch_length = @reader['branch_length']
|
403
|
+
|
404
|
+
parse_attributes(node, ["id_source"])
|
405
|
+
|
406
|
+
#add new node to the tree
|
407
|
+
tree.add_node(node)
|
408
|
+
# The first clade will always be root since by xsd schema phyloxml can
|
409
|
+
# have 0 to 1 clades in it.
|
410
|
+
if tree.root == nil
|
411
|
+
tree.root = node
|
412
|
+
else
|
413
|
+
current_edge = tree.add_edge(clades[-1], node,
|
414
|
+
Bio::Tree::Edge.new(branch_length))
|
415
|
+
end
|
416
|
+
clades.push node
|
417
|
+
#end if clade element
|
418
|
+
else
|
419
|
+
parse_clade_elements(clades[-1], current_edge) if parsing_clade
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
#end clade element, go one parent up
|
424
|
+
if is_end_element?('clade')
|
425
|
+
|
426
|
+
#if we have reached the closing tag of the top-most clade, then our
|
427
|
+
# curent node should point to the root, If thats the case, we are done
|
428
|
+
# parsing the clade element
|
429
|
+
if clades[-1] == tree.root
|
430
|
+
parsing_clade = false
|
431
|
+
else
|
432
|
+
# set current node (clades[-1) to the previous clade in the array
|
433
|
+
clades.pop
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
#parsing phylogeny elements
|
438
|
+
if not parsing_clade
|
439
|
+
|
440
|
+
if @reader.node_type == XML::Reader::TYPE_ELEMENT
|
441
|
+
case @reader.name
|
442
|
+
when 'property'
|
443
|
+
tree.properties << parse_property
|
444
|
+
|
445
|
+
when 'clade_relation'
|
446
|
+
clade_relation = CladeRelation.new
|
447
|
+
parse_attributes(clade_relation, ["id_ref_0", "id_ref_1", "distance", "type"])
|
448
|
+
|
449
|
+
#@ add unit test for this
|
450
|
+
if not @reader.empty_element?
|
451
|
+
@reader.read
|
452
|
+
if is_element?('confidence')
|
453
|
+
clade_relation.confidence = parse_confidence
|
454
|
+
end
|
455
|
+
end
|
456
|
+
tree.clade_relations << clade_relation
|
457
|
+
|
458
|
+
when 'sequence_relation'
|
459
|
+
sequence_relation = SequenceRelation.new
|
460
|
+
parse_attributes(sequence_relation, ["id_ref_0", "id_ref_1", "distance", "type"])
|
461
|
+
if not @reader.empty_element?
|
462
|
+
@reader.read
|
463
|
+
if is_element?('confidence')
|
464
|
+
sequence_relation.confidence = parse_confidence
|
465
|
+
end
|
466
|
+
end
|
467
|
+
tree.sequence_relations << sequence_relation
|
468
|
+
when 'phylogeny'
|
469
|
+
#do nothing
|
470
|
+
else
|
471
|
+
tree.other << parse_other
|
472
|
+
#puts "Not recognized element. #{@reader.name}"
|
473
|
+
end
|
474
|
+
end
|
475
|
+
end
|
476
|
+
# go to next element
|
477
|
+
@reader.read
|
478
|
+
end #end while not </phylogeny>
|
479
|
+
#move on to the next tag after /phylogeny which is text, since phylogeny
|
480
|
+
#end tag is empty element, which value is nil, therefore need to move to
|
481
|
+
#the next meaningful element (therefore @reader.read twice)
|
482
|
+
@reader.read
|
483
|
+
@reader.read
|
484
|
+
|
485
|
+
return tree
|
486
|
+
end
|
487
|
+
|
488
|
+
# return tree of specified name.
|
489
|
+
# @todo Implement this method.
|
490
|
+
# def get_tree_by_name(name)
|
491
|
+
|
492
|
+
# while not is_end_element?('phyloxml')
|
493
|
+
# if is_element?('phylogeny')
|
494
|
+
# @reader.read
|
495
|
+
# @reader.read
|
496
|
+
#
|
497
|
+
# if is_element?('name')
|
498
|
+
# @reader.read
|
499
|
+
# if @reader.value == name
|
500
|
+
# puts "equasl"
|
501
|
+
# tree = next_tree
|
502
|
+
# puts tree
|
503
|
+
# end
|
504
|
+
# end
|
505
|
+
# end
|
506
|
+
# @reader.read
|
507
|
+
# end
|
508
|
+
#
|
509
|
+
# end
|
510
|
+
|
511
|
+
|
512
|
+
private
|
513
|
+
|
514
|
+
####
|
515
|
+
# Utility methods
|
516
|
+
###
|
517
|
+
|
518
|
+
def is_element?(str)
|
519
|
+
@reader.node_type == XML::Reader::TYPE_ELEMENT and @reader.name == str ? true : false
|
520
|
+
end
|
521
|
+
|
522
|
+
def is_end_element?(str)
|
523
|
+
@reader.node_type==XML::Reader::TYPE_END_ELEMENT and @reader.name == str ? true : false
|
524
|
+
end
|
525
|
+
|
526
|
+
def has_reached_end_element?(str)
|
527
|
+
if not(is_end_element?(str))
|
528
|
+
raise "Warning: Should have reached </#{str}> element here"
|
529
|
+
end
|
530
|
+
end
|
531
|
+
|
532
|
+
# Parses a simple XML element. for example <speciations>1</speciations>
|
533
|
+
# It reads in the value and assigns it to object.speciation = 1
|
534
|
+
# Also checks if have reached end tag (</speciations> and gives warning
|
535
|
+
# if not
|
536
|
+
def parse_simple_element(object, name)
|
537
|
+
if is_element?(name)
|
538
|
+
@reader.read
|
539
|
+
object.send("#{name}=", @reader.value)
|
540
|
+
@reader.read
|
541
|
+
has_reached_end_element?(name)
|
542
|
+
end
|
543
|
+
end
|
544
|
+
|
545
|
+
def parse_simple_elements(object, elements)
|
546
|
+
elements.each do |elmt|
|
547
|
+
parse_simple_element(object, elmt)
|
548
|
+
end
|
549
|
+
end
|
550
|
+
|
551
|
+
#Parses list of attributes
|
552
|
+
#use for the code like: clade_relation.type = @reader["type"]
|
553
|
+
def parse_attributes(object, arr_of_attrs)
|
554
|
+
arr_of_attrs.each do |attr|
|
555
|
+
object.send("#{attr}=", @reader[attr])
|
556
|
+
end
|
557
|
+
end
|
558
|
+
|
559
|
+
def parse_clade_elements(current_node, current_edge)
|
560
|
+
#no loop inside, loop is already outside
|
561
|
+
|
562
|
+
if @reader.node_type == XML::Reader::TYPE_ELEMENT
|
563
|
+
case @reader.name
|
564
|
+
when 'branch_length'
|
565
|
+
# @todo add unit test for this. current_edge is nil, if the root clade
|
566
|
+
# has branch_length attribute.
|
567
|
+
@reader.read
|
568
|
+
branch_length = @reader.value
|
569
|
+
current_edge.distance = branch_length.to_f if current_edge != nil
|
570
|
+
@reader.read
|
571
|
+
when 'width'
|
572
|
+
@reader.read
|
573
|
+
current_node.width = @reader.value
|
574
|
+
@reader.read
|
575
|
+
when 'name'
|
576
|
+
@reader.read
|
577
|
+
current_node.name = @reader.value
|
578
|
+
@reader.read
|
579
|
+
when 'events'
|
580
|
+
current_node.events = parse_events
|
581
|
+
when 'confidence'
|
582
|
+
current_node.confidences << parse_confidence
|
583
|
+
when 'sequence'
|
584
|
+
current_node.sequences << parse_sequence
|
585
|
+
when 'property'
|
586
|
+
current_node.properties << parse_property
|
587
|
+
when 'taxonomy'
|
588
|
+
current_node.taxonomies << parse_taxonomy
|
589
|
+
when 'distribution'
|
590
|
+
current_node.distributions << parse_distribution
|
591
|
+
when 'node_id'
|
592
|
+
id = Id.new
|
593
|
+
id.type = @reader["type"]
|
594
|
+
@reader.read
|
595
|
+
id.value = @reader.value
|
596
|
+
@reader.read
|
597
|
+
#has_reached_end_element?('node_id')
|
598
|
+
#@todo write unit test for this. There is no example of this in the example files
|
599
|
+
current_node.id = id
|
600
|
+
when 'color'
|
601
|
+
color = BranchColor.new
|
602
|
+
parse_simple_element(color, 'red')
|
603
|
+
parse_simple_element(color, 'green')
|
604
|
+
parse_simple_element(color, 'blue')
|
605
|
+
current_node.color = color
|
606
|
+
#@todo add unit test for this
|
607
|
+
when 'date'
|
608
|
+
date = Date.new
|
609
|
+
date.unit = @reader["unit"]
|
610
|
+
#move to the next token, which is always empty, since date tag does not
|
611
|
+
# have text associated with it
|
612
|
+
@reader.read
|
613
|
+
@reader.read #now the token is the first tag under date tag
|
614
|
+
while not(is_end_element?('date'))
|
615
|
+
parse_simple_element(date, 'desc')
|
616
|
+
parse_simple_element(date, 'value')
|
617
|
+
parse_simple_element(date, 'minimum')
|
618
|
+
parse_simple_element(date, 'maximum')
|
619
|
+
@reader.read
|
620
|
+
end
|
621
|
+
current_node.date = date
|
622
|
+
when 'reference'
|
623
|
+
reference = Reference.new()
|
624
|
+
reference.doi = @reader['doi']
|
625
|
+
if not @reader.empty_element?
|
626
|
+
while not is_end_element?('reference')
|
627
|
+
parse_simple_element(reference, 'desc')
|
628
|
+
@reader.read
|
629
|
+
end
|
630
|
+
end
|
631
|
+
current_node.references << reference
|
632
|
+
when 'binary_characters'
|
633
|
+
current_node.binary_characters = parse_binary_characters
|
634
|
+
when 'clade'
|
635
|
+
#do nothing
|
636
|
+
else
|
637
|
+
current_node.other << parse_other
|
638
|
+
#puts "No match found in parse_clade_elements.(#{@reader.name})"
|
639
|
+
end
|
640
|
+
|
641
|
+
end
|
642
|
+
|
643
|
+
end #parse_clade_elements
|
644
|
+
|
645
|
+
def parse_events()
|
646
|
+
events = PhyloXML::Events.new
|
647
|
+
@reader.read #go to next element
|
648
|
+
while not(is_end_element?('events')) do
|
649
|
+
parse_simple_elements(events, ['type', 'duplications',
|
650
|
+
'speciations', 'losses'])
|
651
|
+
if is_element?('confidence')
|
652
|
+
events.confidence = parse_confidence
|
653
|
+
#@todo could add unit test for this (example file does not have this case)
|
654
|
+
end
|
655
|
+
@reader.read
|
656
|
+
end
|
657
|
+
return events
|
658
|
+
end #parse_events
|
659
|
+
|
660
|
+
def parse_taxonomy
|
661
|
+
taxonomy = PhyloXML::Taxonomy.new
|
662
|
+
parse_attributes(taxonomy, ["id_source"])
|
663
|
+
@reader.read
|
664
|
+
while not(is_end_element?('taxonomy')) do
|
665
|
+
|
666
|
+
if @reader.node_type == XML::Reader::TYPE_ELEMENT
|
667
|
+
case @reader.name
|
668
|
+
when 'code'
|
669
|
+
@reader.read
|
670
|
+
taxonomy.code = @reader.value
|
671
|
+
@reader.read
|
672
|
+
when 'scientific_name'
|
673
|
+
@reader.read
|
674
|
+
taxonomy.scientific_name = @reader.value
|
675
|
+
@reader.read
|
676
|
+
when 'rank'
|
677
|
+
@reader.read
|
678
|
+
taxonomy.rank = @reader.value
|
679
|
+
@reader.read
|
680
|
+
when 'authority'
|
681
|
+
@reader.read
|
682
|
+
taxonomy.authority = @reader.value
|
683
|
+
@reader.read
|
684
|
+
when 'id'
|
685
|
+
taxonomy.taxonomy_id = parse_id('id')
|
686
|
+
when 'common_name'
|
687
|
+
@reader.read
|
688
|
+
taxonomy.common_names << @reader.value
|
689
|
+
@reader.read
|
690
|
+
#has_reached_end_element?('common_name')
|
691
|
+
when 'synonym'
|
692
|
+
@reader.read
|
693
|
+
taxonomy.synonyms << @reader.value
|
694
|
+
@reader.read
|
695
|
+
#has_reached_end_element?('synonym')
|
696
|
+
when 'uri'
|
697
|
+
taxonomy.uri = parse_uri
|
698
|
+
else
|
699
|
+
taxonomy.other << parse_other
|
700
|
+
end
|
701
|
+
end
|
702
|
+
|
703
|
+
@reader.read #move to next tag in the loop
|
704
|
+
end
|
705
|
+
return taxonomy
|
706
|
+
end #parse_taxonomy
|
707
|
+
|
708
|
+
private
|
709
|
+
|
710
|
+
def parse_sequence
|
711
|
+
sequence = Sequence.new
|
712
|
+
parse_attributes(sequence, ["type", "id_source", "id_ref"])
|
713
|
+
|
714
|
+
@reader.read
|
715
|
+
while not(is_end_element?('sequence'))
|
716
|
+
|
717
|
+
if @reader.node_type == XML::Reader::TYPE_ELEMENT
|
718
|
+
case @reader.name
|
719
|
+
when 'symbol'
|
720
|
+
@reader.read
|
721
|
+
sequence.symbol = @reader.value
|
722
|
+
@reader.read
|
723
|
+
when 'name'
|
724
|
+
@reader.read
|
725
|
+
sequence.name = @reader.value
|
726
|
+
@reader.read
|
727
|
+
when 'location'
|
728
|
+
@reader.read
|
729
|
+
sequence.location = @reader.value
|
730
|
+
@reader.read
|
731
|
+
when 'mol_seq'
|
732
|
+
sequence.is_aligned = @reader["is_aligned"]
|
733
|
+
@reader.read
|
734
|
+
sequence.mol_seq = @reader.value
|
735
|
+
@reader.read
|
736
|
+
has_reached_end_element?('mol_seq')
|
737
|
+
when 'accession'
|
738
|
+
sequence.accession = Accession.new
|
739
|
+
sequence.accession.source = @reader["source"]
|
740
|
+
@reader.read
|
741
|
+
sequence.accession.value = @reader.value
|
742
|
+
@reader.read
|
743
|
+
has_reached_end_element?('accession')
|
744
|
+
when 'uri'
|
745
|
+
sequence.uri = parse_uri
|
746
|
+
when 'annotation'
|
747
|
+
sequence.annotations << parse_annotation
|
748
|
+
when 'domain_architecture'
|
749
|
+
sequence.domain_architecture = DomainArchitecture.new
|
750
|
+
sequence.domain_architecture.length = @reader["length"]
|
751
|
+
@reader.read
|
752
|
+
@reader.read
|
753
|
+
while not(is_end_element?('domain_architecture'))
|
754
|
+
sequence.domain_architecture.domains << parse_domain
|
755
|
+
@reader.read #go to next domain element
|
756
|
+
end
|
757
|
+
else
|
758
|
+
sequence.other << parse_other
|
759
|
+
#@todo add unit test
|
760
|
+
end
|
761
|
+
end
|
762
|
+
|
763
|
+
@reader.read
|
764
|
+
end
|
765
|
+
return sequence
|
766
|
+
end #parse_sequence
|
767
|
+
|
768
|
+
def parse_uri
|
769
|
+
uri = Uri.new
|
770
|
+
parse_attributes(uri, ["desc", "type"])
|
771
|
+
parse_simple_element(uri, 'uri')
|
772
|
+
return uri
|
773
|
+
end
|
774
|
+
|
775
|
+
def parse_annotation
|
776
|
+
annotation = Annotation.new
|
777
|
+
|
778
|
+
parse_attributes(annotation, ['ref', 'source', 'evidence', 'type'])
|
779
|
+
|
780
|
+
if not @reader.empty_element?
|
781
|
+
while not(is_end_element?('annotation'))
|
782
|
+
parse_simple_element(annotation, 'desc') if is_element?('desc')
|
783
|
+
|
784
|
+
annotation.confidence = parse_confidence if is_element?('confidence')
|
785
|
+
|
786
|
+
annotation.properties << parse_property if is_element?('property')
|
787
|
+
|
788
|
+
if is_element?('uri')
|
789
|
+
annotation.uri = parse_uri
|
790
|
+
end
|
791
|
+
|
792
|
+
@reader.read
|
793
|
+
end
|
794
|
+
|
795
|
+
end
|
796
|
+
return annotation
|
797
|
+
end
|
798
|
+
|
799
|
+
def parse_property
|
800
|
+
property = Property.new
|
801
|
+
parse_attributes(property, ["ref", "unit", "datatype", "applies_to", "id_ref"])
|
802
|
+
@reader.read
|
803
|
+
property.value = @reader.value
|
804
|
+
@reader.read
|
805
|
+
has_reached_end_element?('property')
|
806
|
+
return property
|
807
|
+
end #parse_property
|
808
|
+
|
809
|
+
def parse_confidence
|
810
|
+
type = @reader["type"]
|
811
|
+
@reader.read
|
812
|
+
value = @reader.value.to_f
|
813
|
+
@reader.read
|
814
|
+
has_reached_end_element?('confidence')
|
815
|
+
return Confidence.new(type, value)
|
816
|
+
end #parse_confidence
|
817
|
+
|
818
|
+
def parse_distribution
|
819
|
+
distribution = Distribution.new
|
820
|
+
@reader.read
|
821
|
+
while not(is_end_element?('distribution')) do
|
822
|
+
|
823
|
+
parse_simple_element(distribution, 'desc')
|
824
|
+
|
825
|
+
distribution.points << parse_point if is_element?('point')
|
826
|
+
distribution.polygons << parse_polygon if is_element?('polygon')
|
827
|
+
|
828
|
+
@reader.read
|
829
|
+
end
|
830
|
+
return distribution
|
831
|
+
end #parse_distribution
|
832
|
+
|
833
|
+
def parse_point
|
834
|
+
point = Point.new
|
835
|
+
|
836
|
+
point.geodetic_datum = @reader["geodetic_datum"]
|
837
|
+
point.alt_unit = @reader["alt_unit"]
|
838
|
+
|
839
|
+
@reader.read
|
840
|
+
while not(is_end_element?('point')) do
|
841
|
+
|
842
|
+
parse_simple_elements(point, ['lat', 'long'] )
|
843
|
+
|
844
|
+
if is_element?('alt')
|
845
|
+
@reader.read
|
846
|
+
point.alt = @reader.value.to_f
|
847
|
+
@reader.read
|
848
|
+
has_reached_end_element?('alt')
|
849
|
+
end
|
850
|
+
#advance reader
|
851
|
+
@reader.read
|
852
|
+
end
|
853
|
+
return point
|
854
|
+
end #parse_point
|
855
|
+
|
856
|
+
def parse_polygon
|
857
|
+
polygon = Polygon.new
|
858
|
+
@reader.read
|
859
|
+
while not(is_end_element?('polygon')) do
|
860
|
+
polygon.points << parse_point if is_element?('point')
|
861
|
+
@reader.read
|
862
|
+
end
|
863
|
+
|
864
|
+
#@todo should check for it at all? Probably not if xml is valid.
|
865
|
+
if polygon.points.length <3
|
866
|
+
puts "Warning: <polygon> should have at least 3 points"
|
867
|
+
end
|
868
|
+
return polygon
|
869
|
+
end #parse_polygon
|
870
|
+
|
871
|
+
def parse_id(tag_name)
|
872
|
+
id = Id.new
|
873
|
+
id.provider = @reader["provider"]
|
874
|
+
@reader.read
|
875
|
+
id.value = @reader.value
|
876
|
+
@reader.read #@todo shouldn't there be another read?
|
877
|
+
has_reached_end_element?(tag_name)
|
878
|
+
return id
|
879
|
+
end #parse_id
|
880
|
+
|
881
|
+
def parse_domain
|
882
|
+
domain = ProteinDomain.new
|
883
|
+
parse_attributes(domain, ["from", "to", "confidence", "id"])
|
884
|
+
@reader.read
|
885
|
+
domain.value = @reader.value
|
886
|
+
@reader.read
|
887
|
+
has_reached_end_element?('domain')
|
888
|
+
@reader.read
|
889
|
+
return domain
|
890
|
+
end
|
891
|
+
|
892
|
+
def parse_binary_characters
|
893
|
+
b = PhyloXML::BinaryCharacters.new
|
894
|
+
b.bc_type = @reader['type']
|
895
|
+
|
896
|
+
parse_attributes(b, ['gained_count', 'absent_count', 'lost_count', 'present_count'])
|
897
|
+
if not @reader.empty_element?
|
898
|
+
@reader.read
|
899
|
+
while not is_end_element?('binary_characters')
|
900
|
+
|
901
|
+
parse_bc(b, 'lost')
|
902
|
+
parse_bc(b, 'gained')
|
903
|
+
parse_bc(b, 'absent')
|
904
|
+
parse_bc(b, 'present')
|
905
|
+
|
906
|
+
@reader.read
|
907
|
+
end
|
908
|
+
end
|
909
|
+
return b
|
910
|
+
end #parse_binary_characters
|
911
|
+
|
912
|
+
def parse_bc(object, element)
|
913
|
+
if is_element?(element)
|
914
|
+
@reader.read
|
915
|
+
while not is_end_element?(element)
|
916
|
+
if is_element?('bc')
|
917
|
+
@reader.read
|
918
|
+
object.send(element) << @reader.value
|
919
|
+
@reader.read
|
920
|
+
has_reached_end_element?('bc')
|
921
|
+
end
|
922
|
+
@reader.read
|
923
|
+
end
|
924
|
+
end
|
925
|
+
end #parse_bc
|
926
|
+
|
927
|
+
def parse_other
|
928
|
+
other_obj = PhyloXML::Other.new
|
929
|
+
other_obj.element_name = @reader.name
|
930
|
+
#parse attributes
|
931
|
+
code = @reader.move_to_first_attribute
|
932
|
+
while code ==1
|
933
|
+
other_obj.attributes[@reader.name] = @reader.value
|
934
|
+
code = @reader.move_to_next_attribute
|
935
|
+
end
|
936
|
+
|
937
|
+
while not is_end_element?(other_obj.element_name) do
|
938
|
+
@reader.read
|
939
|
+
if @reader.node_type == XML::Reader::TYPE_ELEMENT
|
940
|
+
other_obj.children << parse_other #recursice call to parse children
|
941
|
+
elsif @reader.node_type == XML::Reader::TYPE_TEXT
|
942
|
+
other_obj.value = @reader.value
|
943
|
+
end
|
944
|
+
end
|
945
|
+
#just a check
|
946
|
+
has_reached_end_element?(other_obj.element_name)
|
947
|
+
return other_obj
|
948
|
+
end #parse_other
|
949
|
+
|
950
|
+
end #class phyloxmlParser
|
951
|
+
|
952
|
+
end #module PhyloXML
|
953
|
+
|
954
|
+
end #module Bio
|