bioruby-phyloxml 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,227 @@
1
+ #
2
+ # = bio/db/phyloxml_writer.rb - PhyloXML writer
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+ # == Description
10
+ #
11
+ # This file containts writer for PhyloXML.
12
+ #
13
+ # == Requirements
14
+ #
15
+ # Libxml2 XML parser is required. Install libxml-ruby bindings from
16
+ # http://libxml.rubyforge.org or
17
+ #
18
+ # gem install -r libxml-ruby
19
+ #
20
+ # == References
21
+ #
22
+ # * http://www.phyloxml.org
23
+ #
24
+ # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
25
+
26
+ require 'libxml'
27
+ require 'bio-phyloxml/phyloxml_elements'
28
+
29
+ module Bio
30
+
31
+ module PhyloXML
32
+
33
+ # == Description
34
+ #
35
+ # Bio::PhyloXML::Writer is for writing phyloXML (version 1.10) format files.
36
+ #
37
+ # == Requirements
38
+ #
39
+ # Libxml2 XML parser is required. Install libxml-ruby bindings from
40
+ # http://libxml.rubyforge.org or
41
+ #
42
+ # gem install -r libxml-ruby
43
+ #
44
+ # == Usage
45
+ #
46
+ # require 'bio'
47
+ #
48
+ # # Create new phyloxml parser
49
+ # phyloxml = Bio::PhyloXML::Parser.open('example.xml')
50
+ #
51
+ # # Read in some trees from file
52
+ # tree1 = phyloxml.next_tree
53
+ # tree2 = phyloxml.next_tree
54
+ #
55
+ # # Create new phyloxml writer
56
+ # writer = Bio::PhyloXML::Writer.new('tree.xml')
57
+ #
58
+ # # Write tree to the file tree.xml
59
+ # writer.write(tree1)
60
+ #
61
+ # # Add another tree to the file
62
+ # writer.write(tree2)
63
+ #
64
+ # == References
65
+ #
66
+ # http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html
67
+
68
+ class Writer
69
+
70
+ include LibXML
71
+
72
+ SCHEMA_LOCATION = 'http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd'
73
+
74
+ attr_accessor :write_branch_length_as_subelement
75
+
76
+ #
77
+ # Create new Writer object. As parameters provide filename of xml file
78
+ # you wish to create. Optional parameter is whether to indent or no.
79
+ # Default is true. By default branch_length is written as subelement of
80
+ # clade element.
81
+ #
82
+ def initialize(filename, indent=true)
83
+ @write_branch_length_as_subelement = true #default value
84
+ @filename = filename
85
+ @indent = indent
86
+
87
+ @doc = XML::Document.new()
88
+ @doc.root = XML::Node.new('phyloxml')
89
+ @root = @doc.root
90
+ @root['xmlns:xsi'] = 'http://www.w3.org/2001/XMLSchema-instance'
91
+ @root['xsi:schemaLocation'] = SCHEMA_LOCATION
92
+ @root['xmlns'] = 'http://www.phyloxml.org'
93
+
94
+ #@todo save encoding to be UTF-8. (However it is the default one).
95
+ #it gives error NameError: uninitialized constant LibXML::XML::Encoding
96
+ #@doc.encoding = XML::Encoding::UTF_8
97
+
98
+ @doc.save(@filename, :indent => true)
99
+ end
100
+
101
+ #
102
+ # Write a tree to a file in phyloxml format.
103
+ #
104
+ # require 'Bio'
105
+ # writer = Bio::PhyloXML::Writer.new
106
+ # writer.write(tree)
107
+ #
108
+ def write(tree)
109
+ @root << phylogeny = XML::Node.new('phylogeny')
110
+
111
+ PhyloXML::Writer.generate_xml(phylogeny, tree, [
112
+ [:attr, 'rooted'],
113
+ [:simple, 'name', tree.name],
114
+ [:complex, 'id', tree.phylogeny_id],
115
+ [:simple, 'description', tree.description],
116
+ [:simple, 'date', tree.date],
117
+ [:objarr, 'confidence', 'confidences']])
118
+
119
+ root_clade = tree.root.to_xml(nil, @write_branch_length_as_subelement)
120
+
121
+ phylogeny << root_clade
122
+
123
+ tree.children(tree.root).each do |node|
124
+ root_clade << node_to_xml(tree, node, tree.root)
125
+ end
126
+
127
+ Bio::PhyloXML::Writer::generate_xml(phylogeny, tree, [
128
+ [:objarr, 'clade_relation', 'clade_relations'],
129
+ [:objarr, 'sequence_relation', 'sequence_relations'],
130
+ [:objarr, 'property', 'properties']] )
131
+
132
+ @doc.save(@filename, :indent => @indent)
133
+ end #writer#write
134
+
135
+
136
+ #
137
+ # PhyloXML Schema allows to save data in different xml format after all
138
+ # phylogeny elements. This method is to write these additional data.
139
+ #
140
+ # parser = PhyloXML::Parser.open('phyloxml_examples.xml')
141
+ # writer = PhyloXML::Writer.new('new.xml')
142
+ #
143
+ # parser.each do |tree|
144
+ # writer.write(tree)
145
+ # end
146
+ #
147
+ # # When all the trees are read in by the parser, whats left is saved at
148
+ # # PhyloXML::Parser#other
149
+ # writer.write(parser.other)
150
+ #
151
+
152
+ def write_other(other_arr)
153
+ other_arr.each do |other_obj|
154
+ @root << other_obj.to_xml
155
+ end
156
+ @doc.save(@filename, :indent => @indent)
157
+ end
158
+
159
+ #class method
160
+
161
+ #
162
+ # Used by to_xml methods of PhyloXML element classes. Generally not to be
163
+ # invoked directly.
164
+ #
165
+ def self.generate_xml(root, elem, subelement_array)
166
+ #example usage: generate_xml(node, self, [[ :complex,'accession', ], [:simple, 'name', @name], [:simple, 'location', @location]])
167
+ subelement_array.each do |subelem|
168
+ if subelem[0] == :simple
169
+ root << XML::Node.new(subelem[1], subelem[2].to_s) if subelem[2] != nil and not subelem[2].to_s.empty?
170
+
171
+ elsif subelem[0] == :complex
172
+ root << subelem[2].send("to_xml") if subelem[2] != nil
173
+
174
+ elsif subelem[0] == :pattern
175
+ #seq, self, [[:pattern, 'symbol', @symbol, "\S{1,10}"]
176
+ if subelem[2] != nil
177
+ if subelem[2] =~ subelem[3]
178
+ root << XML::Node.new(subelem[1], subelem[2])
179
+ else
180
+ raise "#{subelem[2]} is not a valid value of #{subelem[1]}. It should follow pattern #{subelem[3]}"
181
+ end
182
+ end
183
+
184
+ elsif subelem[0] == :objarr
185
+ #[:objarr, 'annotation', 'annotations']])
186
+ obj_arr = elem.send(subelem[2])
187
+ obj_arr.each do |arr_elem|
188
+ root << arr_elem.to_xml
189
+ end
190
+
191
+ elsif subelem[0] == :simplearr
192
+ # [:simplearr, 'common_name', @common_names]
193
+ subelem[2].each do |elem_val|
194
+ root << XML::Node.new(subelem[1], elem_val)
195
+ end
196
+ elsif subelem[0] == :attr
197
+ #[:attr, 'rooted']
198
+ obj = elem.send(subelem[1])
199
+ if obj != nil
200
+ root[subelem[1]] = obj.to_s
201
+ end
202
+ else
203
+ raise "Not supported type of element by method generate_xml."
204
+ end
205
+ end
206
+ return root
207
+ end
208
+
209
+ private
210
+
211
+ def node_to_xml(tree, node, parent)
212
+ edge = tree.get_edge(parent, node)
213
+ branch_length = edge.distance
214
+
215
+ clade = node.to_xml(branch_length, @write_branch_length_as_subelement)
216
+
217
+ tree.children(node).each do |new_node|
218
+ clade << node_to_xml(tree, new_node, node)
219
+ end
220
+
221
+ return clade
222
+ end
223
+
224
+ end
225
+
226
+ end
227
+ end
@@ -0,0 +1,7 @@
1
+
2
+ module Bio
3
+ module PhyloXML
4
+ # version number of the bio-phyloxml gem package
5
+ VERSION = "1.0.0"
6
+ end
7
+ end
@@ -0,0 +1,4 @@
1
+ # for migration from old bioruby to bio-phyloxml
2
+ if require 'bio-phyloxml/compat/stub_phyloxml_elements' then
3
+ require 'bio-phyloxml'
4
+ end
@@ -0,0 +1,4 @@
1
+ # for migration from old bioruby to bio-phyloxml
2
+ if require 'bio-phyloxml/compat/stub_phyloxml_parser' then
3
+ require 'bio-phyloxml'
4
+ end
@@ -0,0 +1,4 @@
1
+ # for migration from old bioruby to bio-phyloxml
2
+ if require 'bio-phyloxml/compat/stub_phyloxml_writer' then
3
+ require 'bio-phyloxml'
4
+ end
@@ -0,0 +1,10 @@
1
+ #
2
+
3
+ if RUBY_VERSION >= "1.9" then
4
+ require_relative "bio-phyloxml.rb"
5
+ else
6
+ dir = File.dirname(__FILE__)
7
+ target = File.join(dir, "bio-phyloxml.rb")
8
+ require target
9
+ end
10
+
@@ -0,0 +1,205 @@
1
+ #
2
+ # = sample/test_phyloxml_big.rb - Tests for Bio::PhyloXML. Testing very big files.
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
+ # Naohisa Goto <ng@bioruby.org>
7
+ # License:: The Ruby License
8
+ #
9
+
10
+ # libraries needed for the tests
11
+ require 'libxml'
12
+ require 'pathname'
13
+ require 'test/unit'
14
+ require 'digest/sha1'
15
+
16
+ require 'bio/command'
17
+ require 'bio/db/phyloxml/phyloxml_parser'
18
+ require 'bio/db/phyloxml/phyloxml_writer'
19
+
20
+ PhyloXMLBigDataPath = ARGV.shift
21
+
22
+ if !PhyloXMLBigDataPath then
23
+ exit_code = 0
24
+ elsif !File.directory?(PhyloXMLBigDataPath) then
25
+ exit_code = 1
26
+ else
27
+ exit_code = false
28
+ end
29
+
30
+ if exit_code then
31
+ puts "Usage: #{$0} path_to_data (test options...)"
32
+ puts ""
33
+ puts "Requirements:"
34
+ puts " - Write permission to the path_to_data"
35
+ puts " - Internet connection for downloading test data"
36
+ puts " - unzip command to extract downloaded test data"
37
+ puts ""
38
+ puts "You may want to run Ruby with -rubygems and -I<path_to_bioruby_lib>."
39
+ puts ""
40
+ puts "Example of usage using /tmp:"
41
+ puts " $ mkdir /tmp/phyloxml"
42
+ puts " $ ruby -rubygems -I lib #{$0} /tmp/phyloxml -v"
43
+ puts ""
44
+ exit(exit_code)
45
+ end
46
+
47
+ module TestPhyloXMLBigData
48
+
49
+ module_function
50
+
51
+ def metazoa_xml
52
+ #puts "Metazoa 30MB"
53
+ filename = 'ncbi_taxonomy_metazoa.xml'
54
+ uri = "http://www.phylosoft.org/archaeopteryx/examples/data/ncbi_taxonomy_metazoa.xml.zip"
55
+ download_and_unzip_if_not_found(filename, uri, "1M", "33M")
56
+ end
57
+
58
+ def metazoa_test_xml
59
+ #puts "writing Metazoa 30MB"
60
+ File.join PhyloXMLBigDataPath, 'writer_test_ncbi_taxonomy_metazoa.xml'
61
+ end
62
+
63
+ def metazoa_roundtrip_xml
64
+ #puts "writing Metazoa 30MB roundtrip"
65
+ File.join PhyloXMLBigDataPath, 'roundtrip_test_ncbi_taxonomy_metazoa.xml'
66
+ end
67
+
68
+ def mollusca_xml
69
+ #puts "Mollusca 1.5MB"
70
+ filename = 'ncbi_taxonomy_mollusca.xml'
71
+ uri = "http://www.phylosoft.org/archaeopteryx/examples/data/ncbi_taxonomy_mollusca.xml.zip"
72
+ download_and_unzip_if_not_found(filename, uri, "67K", "1.5M")
73
+ end
74
+
75
+ def mollusca_test_xml
76
+ #puts "Writing Mollusca 1.5MB"
77
+ File.join PhyloXMLBigDataPath, 'writer_test_ncbi_taxonomy_mollusca.xml'
78
+ end
79
+
80
+ def mollusca_roundtrip_xml
81
+ #puts "Writing Mollusca 1.5MB roundtrip"
82
+ File.join PhyloXMLBigDataPath, 'roundtrip_test_ncbi_taxonomy_mollusca.xml'
83
+ end
84
+
85
+ def life_xml
86
+ #Right now this file is not compatible with xsd 1.10
87
+ filename = 'tol_life_on_earth_1.xml'
88
+ uri = "http://www.phylosoft.org/archaeopteryx/examples/data/tol_life_on_earth_1.xml.zip"
89
+
90
+ download_and_unzip_if_not_found(filename, uri, '10M', '45M')
91
+ end
92
+
93
+ def life_test_xml
94
+ File.join PhyloXMLBigDataPath, 'writer_test_tol_life_on_earth_1.xml'
95
+ end
96
+
97
+ def life_roundtrip_xml
98
+ File.join PhyloXMLBigDataPath, 'roundtrip_test_tol_life_on_earth_1.xml'
99
+ end
100
+
101
+ def unzip_file(file, target_dir)
102
+ flag = system('unzip', "#{file}.zip", "-d", target_dir)
103
+ unless flag then
104
+ raise "Failed to unzip #{file}.zip"
105
+ end
106
+ file
107
+ end
108
+
109
+ def download_and_unzip_if_not_found(basename, uri, zipsize, origsize)
110
+ file = File.join PhyloXMLBigDataPath, basename
111
+ return file if File.exists?(file)
112
+
113
+ if File.exists?("#{file}.zip")
114
+ unzip_file(file, PhyloXMLBigDataPath)
115
+ return file
116
+ end
117
+
118
+ puts "File #{basename} does not exist. Do you want to download it? (If yes, ~#{zipsize}B zip file will be downloaded and extracted (to #{origsize}B), if no, the test will be skipped.) y/n?"
119
+ res = gets
120
+ if res.to_s.chomp.downcase == "y"
121
+ File.open("#{file}.zip", "wb") do |f|
122
+ f.write(Bio::Command.read_uri(uri))
123
+ end
124
+ puts "File downloaded."
125
+ self.unzip_file(file, PhyloXMLBigDataPath)
126
+ return file
127
+ else
128
+ return nil
129
+ #return File.join PHYLOXML_TEST_DATA, "#{basename}.stub"
130
+ end
131
+ end
132
+
133
+ end #end module TestPhyloXMLBigData
134
+
135
+ module Bio
136
+
137
+ class TestPhyloXMLBig < Test::Unit::TestCase
138
+
139
+ def do_test_next_tree(readfilename)
140
+ raise "the test is skipped" unless readfilename
141
+ filesizeMB = File.size(readfilename) / 1048576.0
142
+ printf "Reading %s (%2.1f MB)\n", readfilename, filesizeMB
143
+
144
+ begin
145
+ phyloxml = Bio::PhyloXML::Parser.open(readfilename)
146
+ rescue NoMethodError
147
+ phyloxml = Bio::PhyloXML::Parser.new(readfilename)
148
+ end
149
+ tree = nil
150
+ assert_nothing_raised {
151
+ tree = phyloxml.next_tree
152
+ }
153
+ tree
154
+ end
155
+ private :do_test_next_tree
156
+
157
+ def do_test_write(tree, writefilename)
158
+ printf "Writing to %s\n", writefilename
159
+ writer = Bio::PhyloXML::Writer.new(writefilename)
160
+ assert_nothing_raised {
161
+ writer.write(tree)
162
+ }
163
+
164
+ # checks file size and sha1sum
165
+ str = File.open(writefilename, 'rb') { |f| f.read }
166
+ sha1 = Digest::SHA1.hexdigest(str)
167
+ puts "Wrote #{str.length} bytes."
168
+ puts "sha1: #{sha1}"
169
+ end
170
+ private :do_test_write
171
+
172
+ def test_mollusca
173
+ tree = do_test_next_tree(TestPhyloXMLBigData.mollusca_xml)
174
+ do_test_write(tree, TestPhyloXMLBigData.mollusca_test_xml)
175
+
176
+ tree2 = do_test_next_tree(TestPhyloXMLBigData.mollusca_test_xml)
177
+ do_test_write(tree2, TestPhyloXMLBigData.mollusca_roundtrip_xml)
178
+ end
179
+
180
+ def test_metazoa
181
+ tree = do_test_next_tree(TestPhyloXMLBigData.metazoa_xml)
182
+ do_test_write(tree, TestPhyloXMLBigData.metazoa_test_xml)
183
+
184
+ tree2 = do_test_next_tree(TestPhyloXMLBigData.metazoa_test_xml)
185
+ do_test_write(tree2, TestPhyloXMLBigData.metazoa_roundtrip_xml)
186
+ end
187
+
188
+ if false
189
+ # Disabled because of the error.
190
+ # LibXML::XML::Error: Fatal error: Input is not proper UTF-8,
191
+ # indicate encoding !
192
+ # Bytes: 0xE9 0x6B 0x65 0x73 at tol_life_on_earth_1.xml:132170.
193
+ #
194
+ def test_life
195
+ tree = do_test_next_tree(TestPhyloXMLBigData.life_xml)
196
+ do_test_write(tree, TestPhyloXMLBigData.life_test_xml)
197
+
198
+ tree2 = do_test_next_tree(TestPhyloXMLBigData.life_test_xml)
199
+ do_test_write(tree2, TestPhyloXMLBigData.life_roundtrip_xml)
200
+ end
201
+ end #if false
202
+
203
+ end
204
+
205
+ end