bio 1.5.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (128) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +35 -36
  3. data/ChangeLog +911 -2554
  4. data/{gemfiles/Gemfile.travis-ruby2.2 → Gemfile} +0 -3
  5. data/KNOWN_ISSUES.rdoc +10 -13
  6. data/LEGAL +0 -10
  7. data/README.rdoc +40 -96
  8. data/README_DEV.rdoc +5 -5
  9. data/RELEASE_NOTES.rdoc +118 -234
  10. data/Rakefile +13 -43
  11. data/appveyor.yml +29 -0
  12. data/bioruby.gemspec +18 -81
  13. data/bioruby.gemspec.erb +8 -28
  14. data/doc/ChangeLog-1.5.0 +2919 -0
  15. data/doc/RELEASE_NOTES-1.5.0.rdoc +285 -0
  16. data/doc/Tutorial.rd +6 -108
  17. data/doc/Tutorial.rd.html +19 -98
  18. data/gemfiles/Gemfile.travis-jruby1.8 +3 -5
  19. data/gemfiles/Gemfile.travis-jruby1.9 +0 -3
  20. data/gemfiles/Gemfile.travis-rbx +0 -3
  21. data/gemfiles/Gemfile.travis-ruby1.8 +3 -5
  22. data/gemfiles/Gemfile.travis-ruby1.9 +0 -3
  23. data/gemfiles/Gemfile.windows +6 -0
  24. data/gemfiles/prepare-gemspec.rb +4 -0
  25. data/lib/bio.rb +0 -10
  26. data/lib/bio/appl/blast/genomenet.rb +4 -4
  27. data/lib/bio/appl/blast/report.rb +40 -8
  28. data/lib/bio/appl/fasta/format10.rb +2 -1
  29. data/lib/bio/command.rb +10 -0
  30. data/lib/bio/data/codontable.rb +99 -3
  31. data/lib/bio/db/aaindex.rb +74 -5
  32. data/lib/bio/db/gff.rb +3 -1
  33. data/lib/bio/db/kegg/common.rb +14 -0
  34. data/lib/bio/db/kegg/genes.rb +26 -0
  35. data/lib/bio/db/kegg/pathway.rb +5 -11
  36. data/lib/bio/db/soft.rb +2 -2
  37. data/lib/bio/io/flatfile/autodetection.rb +5 -0
  38. data/lib/bio/io/togows.rb +5 -5
  39. data/lib/bio/map.rb +4 -4
  40. data/lib/bio/sequence/format.rb +1 -0
  41. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +1 -1
  42. data/lib/bio/util/sirna.rb +2 -0
  43. data/lib/bio/version.rb +6 -8
  44. data/sample/color_scheme_aa.rb +82 -0
  45. data/sample/color_scheme_na.rb +5 -6
  46. data/sample/fastq2html.cwl +23 -0
  47. data/sample/fastq2html.rb +94 -0
  48. data/sample/fastq2html.testdata.yaml +5 -0
  49. data/sample/na2aa.cwl +23 -0
  50. data/sample/na2aa.rb +11 -25
  51. data/sample/na2aa.testdata.yaml +7 -0
  52. data/sample/rev_comp.cwl +23 -0
  53. data/sample/rev_comp.rb +20 -0
  54. data/sample/rev_comp.testdata.yaml +7 -0
  55. data/sample/test_restriction_enzyme_long.rb +1 -1
  56. data/test/network/bio/db/kegg/test_genes_hsa7422.rb +91 -0
  57. data/test/unit/bio/appl/blast/test_report.rb +4 -4
  58. data/test/unit/bio/data/test_codontable.rb +3 -0
  59. data/test/unit/bio/db/test_gff.rb +5 -0
  60. data/test/unit/bio/test_alignment.rb +2 -2
  61. metadata +20 -77
  62. data/bin/bioruby +0 -47
  63. data/bin/br_biofetch.rb +0 -71
  64. data/bin/br_bioflat.rb +0 -293
  65. data/bin/br_biogetseq.rb +0 -45
  66. data/bin/br_pmfetch.rb +0 -422
  67. data/lib/bio/appl/blast/xmlparser.rb +0 -236
  68. data/lib/bio/db/biosql/biosql_to_biosequence.rb +0 -78
  69. data/lib/bio/db/biosql/sequence.rb +0 -444
  70. data/lib/bio/db/phyloxml/phyloxml.xsd +0 -582
  71. data/lib/bio/db/phyloxml/phyloxml_elements.rb +0 -1197
  72. data/lib/bio/db/phyloxml/phyloxml_parser.rb +0 -1001
  73. data/lib/bio/db/phyloxml/phyloxml_writer.rb +0 -227
  74. data/lib/bio/io/biosql/ar-biosql.rb +0 -257
  75. data/lib/bio/io/biosql/biosql.rb +0 -39
  76. data/lib/bio/io/biosql/config/database.yml +0 -21
  77. data/lib/bio/io/sql.rb +0 -79
  78. data/lib/bio/shell.rb +0 -44
  79. data/lib/bio/shell/core.rb +0 -578
  80. data/lib/bio/shell/demo.rb +0 -146
  81. data/lib/bio/shell/interface.rb +0 -217
  82. data/lib/bio/shell/irb.rb +0 -94
  83. data/lib/bio/shell/object.rb +0 -71
  84. data/lib/bio/shell/plugin/blast.rb +0 -42
  85. data/lib/bio/shell/plugin/codon.rb +0 -218
  86. data/lib/bio/shell/plugin/das.rb +0 -58
  87. data/lib/bio/shell/plugin/emboss.rb +0 -23
  88. data/lib/bio/shell/plugin/entry.rb +0 -137
  89. data/lib/bio/shell/plugin/flatfile.rb +0 -101
  90. data/lib/bio/shell/plugin/midi.rb +0 -430
  91. data/lib/bio/shell/plugin/ncbirest.rb +0 -68
  92. data/lib/bio/shell/plugin/obda.rb +0 -45
  93. data/lib/bio/shell/plugin/psort.rb +0 -56
  94. data/lib/bio/shell/plugin/seq.rb +0 -248
  95. data/lib/bio/shell/plugin/togows.rb +0 -40
  96. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb +0 -29
  97. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml +0 -4
  98. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml +0 -27
  99. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtml +0 -11
  100. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtml +0 -4
  101. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml +0 -7
  102. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif +0 -0
  103. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png +0 -0
  104. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gif +0 -0
  105. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css +0 -368
  106. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml +0 -47
  107. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rb +0 -144
  108. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb +0 -47
  109. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml +0 -8
  110. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml +0 -10
  111. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml +0 -26
  112. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif +0 -0
  113. data/lib/bio/shell/script.rb +0 -25
  114. data/lib/bio/shell/setup.rb +0 -108
  115. data/lib/bio/shell/web.rb +0 -102
  116. data/sample/test_phyloxml_big.rb +0 -205
  117. data/setup.rb +0 -1600
  118. data/test/data/phyloxml/apaf.xml +0 -666
  119. data/test/data/phyloxml/bcl_2.xml +0 -2097
  120. data/test/data/phyloxml/made_up.xml +0 -144
  121. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +0 -65
  122. data/test/data/phyloxml/phyloxml_examples.xml +0 -415
  123. data/test/unit/bio/db/biosql/tc_biosql.rb +0 -114
  124. data/test/unit/bio/db/biosql/ts_suite_biosql.rb +0 -8
  125. data/test/unit/bio/db/test_phyloxml.rb +0 -821
  126. data/test/unit/bio/db/test_phyloxml_writer.rb +0 -334
  127. data/test/unit/bio/shell/plugin/test_seq.rb +0 -187
  128. data/test/unit/bio/test_shell.rb +0 -20
@@ -1,1001 +0,0 @@
1
- #
2
- # = bio/db/phyloxml_parser.rb - PhyloXML parser
3
- #
4
- # Copyright:: Copyright (C) 2009
5
- # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
- # License:: The Ruby License
7
- #
8
- #
9
- # == Description
10
- #
11
- # This file containts parser for PhyloXML.
12
- #
13
- # == Requirements
14
- #
15
- # Libxml2 XML parser is required. Install libxml-ruby bindings from
16
- # http://libxml.rubyforge.org or
17
- #
18
- # gem install -r libxml-ruby
19
- #
20
- # == References
21
- #
22
- # * http://www.phyloxml.org
23
- #
24
- # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
25
-
26
-
27
- require 'uri'
28
- require 'libxml'
29
-
30
- require 'bio/tree'
31
- require 'bio/db/phyloxml/phyloxml_elements'
32
-
33
-
34
- module Bio
35
-
36
- module PhyloXML
37
-
38
-
39
-
40
-
41
- # == Description
42
- #
43
- # Bio::PhyloXML::Parser is for parsing phyloXML format files.
44
- #
45
- # == Requirements
46
- #
47
- # Libxml2 XML parser is required. Install libxml-ruby bindings from
48
- # http://libxml.rubyforge.org or
49
- #
50
- # gem install -r libxml-ruby
51
- #
52
- # == Usage
53
- #
54
- # require 'bio'
55
- #
56
- # # Create new phyloxml parser
57
- # phyloxml = Bio::PhyloXML::Parser.open('example.xml')
58
- #
59
- # # Print the names of all trees in the file
60
- # phyloxml.each do |tree|
61
- # puts tree.name
62
- # end
63
- #
64
- #
65
- # == References
66
- #
67
- # http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html
68
- #
69
- class Parser
70
-
71
- include LibXML
72
-
73
- # After parsing all the trees, if there is anything else in other xml format,
74
- # it is saved in this array of PhyloXML::Other objects
75
- attr_reader :other
76
-
77
- # Initializes LibXML::Reader and reads the file until it reaches the first
78
- # phylogeny element.
79
- #
80
- # Example: Create a new Bio::PhyloXML::Parser object.
81
- #
82
- # p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml")
83
- #
84
- # If the optional code block is given, Bio::PhyloXML object is passed to
85
- # the block as an argument. When the block terminates, the Bio::PhyloXML
86
- # object is automatically closed, and the open method returns the value
87
- # of the block.
88
- #
89
- # Example: Get the first tree in the file.
90
- #
91
- # tree = Bio::PhyloXML::Parser.open("example.xml") do |px|
92
- # px.next_tree
93
- # end
94
- #
95
- # ---
96
- # *Arguments*:
97
- # * (required) _filename_: Path to the file to parse.
98
- # * (optional) _validate_: Whether to validate the file against schema or not. Default value is true.
99
- # *Returns*:: (without block) Bio::PhyloXML::Parser object
100
- # *Returns*:: (with block) the value of the block
101
- def self.open(filename, validate=true)
102
- obj = new(nil, validate)
103
- obj.instance_eval {
104
- filename = _secure_filename(filename)
105
- _validate(:file, filename) if validate
106
- # XML::Parser::Options::NONET for security reason
107
- @reader = XML::Reader.file(filename,
108
- { :options =>
109
- LibXML::XML::Parser::Options::NONET })
110
- _skip_leader
111
- }
112
- if block_given? then
113
- begin
114
- ret = yield obj
115
- ensure
116
- obj.close if obj and !obj.closed?
117
- end
118
- ret
119
- else
120
- obj
121
- end
122
- end
123
-
124
- # Initializes LibXML::Reader and reads the file until it reaches the first
125
- # phylogeny element.
126
- #
127
- # Create a new Bio::PhyloXML::Parser object.
128
- #
129
- # p = Bio::PhyloXML::Parser.open_uri("http://www.phyloxml.org/examples/apaf.xml")
130
- #
131
- # If the optional code block is given, Bio::PhyloXML object is passed to
132
- # the block as an argument. When the block terminates, the Bio::PhyloXML
133
- # object is automatically closed, and the open_uri method returns the
134
- # value of the block.
135
- #
136
- # ---
137
- # *Arguments*:
138
- # * (required) _uri_: (URI or String) URI to the data to parse
139
- # * (optional) _validate_: For URI reader, the "validate" option is ignored and no validation is executed.
140
- # *Returns*:: (without block) Bio::PhyloXML::Parser object
141
- # *Returns*:: (with block) the value of the block
142
- def self.open_uri(uri, validate=true)
143
- case uri
144
- when URI
145
- uri = uri.to_s
146
- else
147
- # raises error if not a String
148
- uri = uri.to_str
149
- # raises error if invalid URI
150
- URI.parse(uri)
151
- end
152
-
153
- obj = new(nil, validate)
154
- obj.instance_eval {
155
- @reader = XML::Reader.file(uri)
156
- _skip_leader
157
- }
158
- if block_given? then
159
- begin
160
- ret = yield obj
161
- ensure
162
- obj.close if obj and !obj.closed?
163
- end
164
- ret
165
- else
166
- obj
167
- end
168
- end
169
-
170
- # Special class for closed PhyloXML::Parser object.
171
- # It raises error for any methods except essential methods.
172
- #
173
- # Bio::PhyloXML internal use only.
174
- class ClosedPhyloXMLParser #:nodoc:
175
- def method_missing(*arg)
176
- raise LibXML::XML::Error, 'closed PhyloXML::Parser object'
177
- end
178
- end #class ClosedPhyloXMLParser
179
-
180
- # Closes the LibXML::Reader inside the object.
181
- # It also closes the opened file if it is created by using
182
- # Bio::PhyloXML::Parser.open method.
183
- #
184
- # When closed object is closed again, or closed object is used,
185
- # it raises LibXML::XML::Error.
186
- # ---
187
- # *Returns*:: nil
188
- def close
189
- @reader.close
190
- @reader = ClosedPhyloXMLParser.new
191
- nil
192
- end
193
-
194
- # If the object is closed by using the close method or equivalent,
195
- # returns true. Otherwise, returns false.
196
- # ---
197
- # *Returns*:: true or false
198
- def closed?
199
- if @reader.kind_of?(ClosedPhyloXMLParser) then
200
- true
201
- else
202
- false
203
- end
204
- end
205
-
206
- # Initializes LibXML::Reader and reads from the IO until it reaches
207
- # the first phylogeny element.
208
- #
209
- # Create a new Bio::PhyloXML::Parser object.
210
- #
211
- # p = Bio::PhyloXML::Parser.for_io($stdin)
212
- #
213
- # ---
214
- # *Arguments*:
215
- # * (required) _io_: IO object
216
- # * (optional) _validate_: For IO reader, the "validate" option is ignored and no validation is executed.
217
- # *Returns*:: Bio::PhyloXML::Parser object
218
- def self.for_io(io, validate=true)
219
- obj = new(nil, validate)
220
- obj.instance_eval {
221
- @reader = XML::Reader.io(io,
222
- { :options =>
223
- LibXML::XML::Parser::Options::NONET })
224
- _skip_leader
225
- }
226
- obj
227
- end
228
-
229
- # (private) returns PhyloXML schema
230
- def _schema
231
- XML::Schema.document(XML::Document.file(File.join(File.dirname(__FILE__),'phyloxml.xsd')))
232
- end
233
- private :_schema
234
-
235
- # (private) do validation
236
- # ---
237
- # *Arguments*:
238
- # * (required) <em>data_type</em>_: :file for filename, :string for string
239
- # * (required) _arg_: filename or string
240
- # *Returns*:: (undefined)
241
- def _validate(data_type, arg)
242
- options = { :options =>
243
- (LibXML::XML::Parser::Options::NOERROR | # no error messages
244
- LibXML::XML::Parser::Options::NOWARNING | # no warning messages
245
- LibXML::XML::Parser::Options::NONET) # no network access
246
- }
247
- case data_type
248
- when :file
249
- # No validation when special file e.g. FIFO (named pipe)
250
- return unless File.file?(arg)
251
- xml_instance = XML::Document.file(arg, options)
252
- when :string
253
- xml_instance = XML::Document.string(arg, options)
254
- else
255
- # no validation for unknown data type
256
- return
257
- end
258
-
259
- schema = _schema
260
- begin
261
- flag = xml_instance.validate_schema(schema) do |msg, _|
262
- # The document of libxml-ruby says that the block is called
263
- # when validation failed, but it seems it is never called
264
- # even when validation failed!
265
- raise "Validation of the XML document against phyloxml.xsd schema failed. #{msg}"
266
- end
267
- rescue LibXML::XML::Error => evar
268
- raise "Validation of the XML document against phyloxml.xsd schema failed, or XML error occurred. #{evar.message}"
269
- end
270
- unless flag then
271
- raise "Validation of the XML document against phyloxml.xsd schema failed."
272
- end
273
- end
274
- private :_validate
275
-
276
- # (private) It seems that LibXML::XML::Reader reads from the network
277
- # even if LibXML::XML::Parser::Options::NONET is set.
278
- # So, for URI-like filename, '://' is replaced with ':/'.
279
- def _secure_filename(filename)
280
- # for safety, URI-like filename is checked.
281
- if /\A[a-zA-Z]+\:\/\// =~ filename then
282
- # for example, "http://a/b" is changed to "http:/a/b".
283
- filename = filename.sub(/\:\/\//, ':/')
284
- end
285
- filename
286
- end
287
- private :_secure_filename
288
-
289
- # (private) loops through until reaches phylogeny stuff
290
- def _skip_leader
291
- #loops through until reaches phylogeny stuff
292
- # Have to leave this way, if accepting strings, instead of files
293
- @reader.read until is_element?('phylogeny')
294
- nil
295
- end
296
- private :_skip_leader
297
-
298
- # Initializes LibXML::Reader and reads the PhyloXML-formatted string
299
- # until it reaches the first phylogeny element.
300
- #
301
- # Create a new Bio::PhyloXML::Parser object.
302
- #
303
- # str = File.read("./phyloxml_examples.xml")
304
- # p = Bio::PhyloXML::Parser.new(str)
305
- #
306
- #
307
- # Deprecated usage: Reads data from a file. <em>str<em> is a filename.
308
- #
309
- # p = Bio::PhyloXML::Parser.new("./phyloxml_examples.xml")
310
- #
311
- # Taking filename is deprecated. Use Bio::PhyloXML::Parser.open(filename).
312
- #
313
- # ---
314
- # *Arguments*:
315
- # * (required) _str_: PhyloXML-formatted string
316
- # * (optional) _validate_: Whether to validate the file against schema or not. Default value is true.
317
- # *Returns*:: Bio::PhyloXML::Parser object
318
- def initialize(str, validate=true)
319
-
320
- @other = []
321
-
322
- return unless str
323
-
324
- # For compatibility, if filename-like string is given,
325
- # treat it as a filename.
326
- if /[\<\>\r\n]/ !~ str and File.exist?(str) then
327
- # assume that str is filename
328
- warn "Bio::PhyloXML::Parser.new(filename) is deprecated. Use Bio::PhyloXML::Parser.open(filename)."
329
- filename = _secure_filename(str)
330
- _validate(:file, filename) if validate
331
- @reader = XML::Reader.file(filename)
332
- _skip_leader
333
- return
334
- end
335
-
336
- # initialize for string
337
- @reader = XML::Reader.string(str,
338
- { :options =>
339
- LibXML::XML::Parser::Options::NONET })
340
- _skip_leader
341
- end
342
-
343
-
344
- # Iterate through all trees in the file.
345
- #
346
- # phyloxml = Bio::PhyloXML::Parser.open('example.xml')
347
- # phyloxml.each do |tree|
348
- # puts tree.name
349
- # end
350
- #
351
- def each
352
- while tree = next_tree
353
- yield tree
354
- end
355
- end
356
-
357
- # Access the specified tree in the file. It parses trees until the specified
358
- # tree is reached.
359
- #
360
- # # Get 3rd tree in the file (starts counting from 0).
361
- # parser = PhyloXML::Parser.open('phyloxml_examples.xml')
362
- # tree = parser[2]
363
- #
364
- def [](i)
365
- tree = nil
366
- (i+1).times do
367
- tree = self.next_tree
368
- end
369
- return tree
370
- end
371
-
372
- # Parse and return the next phylogeny tree. If there are no more phylogeny
373
- # element, nil is returned. If there is something else besides phylogeny
374
- # elements, it is saved in the PhyloXML::Parser#other.
375
- #
376
- # p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml")
377
- # tree = p.next_tree
378
- #
379
- # ---
380
- # *Returns*:: Bio::PhyloXML::Tree
381
- def next_tree()
382
-
383
- if not is_element?('phylogeny')
384
- if @reader.node_type == XML::Reader::TYPE_END_ELEMENT
385
- if is_end_element?('phyloxml')
386
- return nil
387
- else
388
- @reader.read
389
- @reader.read
390
- if is_end_element?('phyloxml')
391
- return nil
392
- end
393
- end
394
- end
395
- # phyloxml can hold only phylogeny and "other" elements. If this is not
396
- # phylogeny element then it is other. Also, "other" always comes after
397
- # all phylogenies
398
- @other << parse_other
399
- #return nil for tree, since this is not valid phyloxml tree.
400
- return nil
401
- end
402
-
403
- tree = Bio::PhyloXML::Tree.new
404
-
405
- # keep track of current node in clades array/stack. Current node is the
406
- # last element in the clades array
407
- clades = []
408
- clades.push tree
409
-
410
- #keep track of current edge to be able to parse branch_length tag
411
- current_edge = nil
412
-
413
- # we are going to parse clade iteratively by pointing (and changing) to
414
- # the current node in the tree. Since the property element is both in
415
- # clade and in the phylogeny, we need some boolean to know if we are
416
- # parsing the clade (there can be only max 1 clade in phylogeny) or
417
- # parsing phylogeny
418
- parsing_clade = false
419
-
420
- while not is_end_element?('phylogeny') do
421
- break if is_end_element?('phyloxml')
422
-
423
- # parse phylogeny elements, except clade
424
- if not parsing_clade
425
-
426
- if is_element?('phylogeny')
427
- @reader["rooted"] == "true" ? tree.rooted = true : tree.rooted = false
428
- @reader["rerootable"] == "true" ? tree.rerootable = true : tree.rerootable = false
429
- parse_attributes(tree, ["branch_length_unit", 'type'])
430
- end
431
-
432
- parse_simple_elements(tree, [ "name", 'description', "date"])
433
-
434
- if is_element?('confidence')
435
- tree.confidences << parse_confidence
436
- end
437
-
438
- end
439
-
440
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
441
- case @reader.name
442
- when 'clade'
443
- #parse clade element
444
-
445
- parsing_clade = true
446
-
447
- node= Bio::PhyloXML::Node.new
448
-
449
- branch_length = @reader['branch_length']
450
-
451
- parse_attributes(node, ["id_source"])
452
-
453
- #add new node to the tree
454
- tree.add_node(node)
455
- # The first clade will always be root since by xsd schema phyloxml can
456
- # have 0 to 1 clades in it.
457
- if tree.root == nil
458
- tree.root = node
459
- else
460
- current_edge = tree.add_edge(clades[-1], node,
461
- Bio::Tree::Edge.new(branch_length))
462
- end
463
- clades.push node
464
- #end if clade element
465
- else
466
- parse_clade_elements(clades[-1], current_edge) if parsing_clade
467
- end
468
- end
469
-
470
- #end clade element, go one parent up
471
- if is_end_element?('clade')
472
-
473
- #if we have reached the closing tag of the top-most clade, then our
474
- # curent node should point to the root, If thats the case, we are done
475
- # parsing the clade element
476
- if clades[-1] == tree.root
477
- parsing_clade = false
478
- else
479
- # set current node (clades[-1) to the previous clade in the array
480
- clades.pop
481
- end
482
- end
483
-
484
- #parsing phylogeny elements
485
- if not parsing_clade
486
-
487
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
488
- case @reader.name
489
- when 'property'
490
- tree.properties << parse_property
491
-
492
- when 'clade_relation'
493
- clade_relation = CladeRelation.new
494
- parse_attributes(clade_relation, ["id_ref_0", "id_ref_1", "distance", "type"])
495
-
496
- #@ add unit test for this
497
- if not @reader.empty_element?
498
- @reader.read
499
- if is_element?('confidence')
500
- clade_relation.confidence = parse_confidence
501
- end
502
- end
503
- tree.clade_relations << clade_relation
504
-
505
- when 'sequence_relation'
506
- sequence_relation = SequenceRelation.new
507
- parse_attributes(sequence_relation, ["id_ref_0", "id_ref_1", "distance", "type"])
508
- if not @reader.empty_element?
509
- @reader.read
510
- if is_element?('confidence')
511
- sequence_relation.confidence = parse_confidence
512
- end
513
- end
514
- tree.sequence_relations << sequence_relation
515
- when 'phylogeny'
516
- #do nothing
517
- else
518
- tree.other << parse_other
519
- #puts "Not recognized element. #{@reader.name}"
520
- end
521
- end
522
- end
523
- # go to next element
524
- @reader.read
525
- end #end while not </phylogeny>
526
- #move on to the next tag after /phylogeny which is text, since phylogeny
527
- #end tag is empty element, which value is nil, therefore need to move to
528
- #the next meaningful element (therefore @reader.read twice)
529
- @reader.read
530
- @reader.read
531
-
532
- return tree
533
- end
534
-
535
- # return tree of specified name.
536
- # @todo Implement this method.
537
- # def get_tree_by_name(name)
538
-
539
- # while not is_end_element?('phyloxml')
540
- # if is_element?('phylogeny')
541
- # @reader.read
542
- # @reader.read
543
- #
544
- # if is_element?('name')
545
- # @reader.read
546
- # if @reader.value == name
547
- # puts "equasl"
548
- # tree = next_tree
549
- # puts tree
550
- # end
551
- # end
552
- # end
553
- # @reader.read
554
- # end
555
- #
556
- # end
557
-
558
-
559
- private
560
-
561
- ####
562
- # Utility methods
563
- ###
564
-
565
- def is_element?(str)
566
- @reader.node_type == XML::Reader::TYPE_ELEMENT and @reader.name == str ? true : false
567
- end
568
-
569
- def is_end_element?(str)
570
- @reader.node_type==XML::Reader::TYPE_END_ELEMENT and @reader.name == str ? true : false
571
- end
572
-
573
- def has_reached_end_element?(str)
574
- if not(is_end_element?(str))
575
- raise "Warning: Should have reached </#{str}> element here"
576
- end
577
- end
578
-
579
- # Parses a simple XML element. for example <speciations>1</speciations>
580
- # It reads in the value and assigns it to object.speciation = 1
581
- # Also checks if have reached end tag (</speciations> and gives warning
582
- # if not
583
- def parse_simple_element(object, name)
584
- if is_element?(name)
585
- @reader.read
586
- object.send("#{name}=", @reader.value)
587
- @reader.read
588
- has_reached_end_element?(name)
589
- end
590
- end
591
-
592
- def parse_simple_elements(object, elements)
593
- elements.each do |elmt|
594
- parse_simple_element(object, elmt)
595
- end
596
- end
597
-
598
- #Parses list of attributes
599
- #use for the code like: clade_relation.type = @reader["type"]
600
- def parse_attributes(object, arr_of_attrs)
601
- arr_of_attrs.each do |attr|
602
- object.send("#{attr}=", @reader[attr])
603
- end
604
- end
605
-
606
- def parse_clade_elements(current_node, current_edge)
607
- #no loop inside, loop is already outside
608
-
609
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
610
- case @reader.name
611
- when 'branch_length'
612
- # @todo add unit test for this. current_edge is nil, if the root clade
613
- # has branch_length attribute.
614
- @reader.read
615
- branch_length = @reader.value
616
- current_edge.distance = branch_length.to_f if current_edge != nil
617
- @reader.read
618
- when 'width'
619
- @reader.read
620
- current_node.width = @reader.value
621
- @reader.read
622
- when 'name'
623
- @reader.read
624
- current_node.name = @reader.value
625
- @reader.read
626
- when 'events'
627
- current_node.events = parse_events
628
- when 'confidence'
629
- current_node.confidences << parse_confidence
630
- when 'sequence'
631
- current_node.sequences << parse_sequence
632
- when 'property'
633
- current_node.properties << parse_property
634
- when 'taxonomy'
635
- current_node.taxonomies << parse_taxonomy
636
- when 'distribution'
637
- current_node.distributions << parse_distribution
638
- when 'node_id'
639
- id = Id.new
640
- id.type = @reader["type"]
641
- @reader.read
642
- id.value = @reader.value
643
- @reader.read
644
- #has_reached_end_element?('node_id')
645
- #@todo write unit test for this. There is no example of this in the example files
646
- current_node.id = id
647
- when 'color'
648
- color = BranchColor.new
649
- parse_simple_element(color, 'red')
650
- parse_simple_element(color, 'green')
651
- parse_simple_element(color, 'blue')
652
- current_node.color = color
653
- #@todo add unit test for this
654
- when 'date'
655
- date = Date.new
656
- date.unit = @reader["unit"]
657
- #move to the next token, which is always empty, since date tag does not
658
- # have text associated with it
659
- @reader.read
660
- @reader.read #now the token is the first tag under date tag
661
- while not(is_end_element?('date'))
662
- parse_simple_element(date, 'desc')
663
- parse_simple_element(date, 'value')
664
- parse_simple_element(date, 'minimum')
665
- parse_simple_element(date, 'maximum')
666
- @reader.read
667
- end
668
- current_node.date = date
669
- when 'reference'
670
- reference = Reference.new()
671
- reference.doi = @reader['doi']
672
- if not @reader.empty_element?
673
- while not is_end_element?('reference')
674
- parse_simple_element(reference, 'desc')
675
- @reader.read
676
- end
677
- end
678
- current_node.references << reference
679
- when 'binary_characters'
680
- current_node.binary_characters = parse_binary_characters
681
- when 'clade'
682
- #do nothing
683
- else
684
- current_node.other << parse_other
685
- #puts "No match found in parse_clade_elements.(#{@reader.name})"
686
- end
687
-
688
- end
689
-
690
- end #parse_clade_elements
691
-
692
- def parse_events()
693
- events = PhyloXML::Events.new
694
- @reader.read #go to next element
695
- while not(is_end_element?('events')) do
696
- parse_simple_elements(events, ['type', 'duplications',
697
- 'speciations', 'losses'])
698
- if is_element?('confidence')
699
- events.confidence = parse_confidence
700
- #@todo could add unit test for this (example file does not have this case)
701
- end
702
- @reader.read
703
- end
704
- return events
705
- end #parse_events
706
-
707
- def parse_taxonomy
708
- taxonomy = PhyloXML::Taxonomy.new
709
- parse_attributes(taxonomy, ["id_source"])
710
- @reader.read
711
- while not(is_end_element?('taxonomy')) do
712
-
713
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
714
- case @reader.name
715
- when 'code'
716
- @reader.read
717
- taxonomy.code = @reader.value
718
- @reader.read
719
- when 'scientific_name'
720
- @reader.read
721
- taxonomy.scientific_name = @reader.value
722
- @reader.read
723
- when 'rank'
724
- @reader.read
725
- taxonomy.rank = @reader.value
726
- @reader.read
727
- when 'authority'
728
- @reader.read
729
- taxonomy.authority = @reader.value
730
- @reader.read
731
- when 'id'
732
- taxonomy.taxonomy_id = parse_id('id')
733
- when 'common_name'
734
- @reader.read
735
- taxonomy.common_names << @reader.value
736
- @reader.read
737
- #has_reached_end_element?('common_name')
738
- when 'synonym'
739
- @reader.read
740
- taxonomy.synonyms << @reader.value
741
- @reader.read
742
- #has_reached_end_element?('synonym')
743
- when 'uri'
744
- taxonomy.uri = parse_uri
745
- else
746
- taxonomy.other << parse_other
747
- end
748
- end
749
-
750
- @reader.read #move to next tag in the loop
751
- end
752
- return taxonomy
753
- end #parse_taxonomy
754
-
755
- private
756
-
757
- def parse_sequence
758
- sequence = Sequence.new
759
- parse_attributes(sequence, ["type", "id_source", "id_ref"])
760
-
761
- @reader.read
762
- while not(is_end_element?('sequence'))
763
-
764
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
765
- case @reader.name
766
- when 'symbol'
767
- @reader.read
768
- sequence.symbol = @reader.value
769
- @reader.read
770
- when 'name'
771
- @reader.read
772
- sequence.name = @reader.value
773
- @reader.read
774
- when 'location'
775
- @reader.read
776
- sequence.location = @reader.value
777
- @reader.read
778
- when 'mol_seq'
779
- sequence.is_aligned = @reader["is_aligned"]
780
- @reader.read
781
- sequence.mol_seq = @reader.value
782
- @reader.read
783
- has_reached_end_element?('mol_seq')
784
- when 'accession'
785
- sequence.accession = Accession.new
786
- sequence.accession.source = @reader["source"]
787
- @reader.read
788
- sequence.accession.value = @reader.value
789
- @reader.read
790
- has_reached_end_element?('accession')
791
- when 'uri'
792
- sequence.uri = parse_uri
793
- when 'annotation'
794
- sequence.annotations << parse_annotation
795
- when 'domain_architecture'
796
- sequence.domain_architecture = DomainArchitecture.new
797
- sequence.domain_architecture.length = @reader["length"]
798
- @reader.read
799
- @reader.read
800
- while not(is_end_element?('domain_architecture'))
801
- sequence.domain_architecture.domains << parse_domain
802
- @reader.read #go to next domain element
803
- end
804
- else
805
- sequence.other << parse_other
806
- #@todo add unit test
807
- end
808
- end
809
-
810
- @reader.read
811
- end
812
- return sequence
813
- end #parse_sequence
814
-
815
- def parse_uri
816
- uri = Uri.new
817
- parse_attributes(uri, ["desc", "type"])
818
- parse_simple_element(uri, 'uri')
819
- return uri
820
- end
821
-
822
- def parse_annotation
823
- annotation = Annotation.new
824
-
825
- parse_attributes(annotation, ['ref', 'source', 'evidence', 'type'])
826
-
827
- if not @reader.empty_element?
828
- while not(is_end_element?('annotation'))
829
- parse_simple_element(annotation, 'desc') if is_element?('desc')
830
-
831
- annotation.confidence = parse_confidence if is_element?('confidence')
832
-
833
- annotation.properties << parse_property if is_element?('property')
834
-
835
- if is_element?('uri')
836
- annotation.uri = parse_uri
837
- end
838
-
839
- @reader.read
840
- end
841
-
842
- end
843
- return annotation
844
- end
845
-
846
- def parse_property
847
- property = Property.new
848
- parse_attributes(property, ["ref", "unit", "datatype", "applies_to", "id_ref"])
849
- @reader.read
850
- property.value = @reader.value
851
- @reader.read
852
- has_reached_end_element?('property')
853
- return property
854
- end #parse_property
855
-
856
- def parse_confidence
857
- type = @reader["type"]
858
- @reader.read
859
- value = @reader.value.to_f
860
- @reader.read
861
- has_reached_end_element?('confidence')
862
- return Confidence.new(type, value)
863
- end #parse_confidence
864
-
865
- def parse_distribution
866
- distribution = Distribution.new
867
- @reader.read
868
- while not(is_end_element?('distribution')) do
869
-
870
- parse_simple_element(distribution, 'desc')
871
-
872
- distribution.points << parse_point if is_element?('point')
873
- distribution.polygons << parse_polygon if is_element?('polygon')
874
-
875
- @reader.read
876
- end
877
- return distribution
878
- end #parse_distribution
879
-
880
- def parse_point
881
- point = Point.new
882
-
883
- point.geodetic_datum = @reader["geodetic_datum"]
884
- point.alt_unit = @reader["alt_unit"]
885
-
886
- @reader.read
887
- while not(is_end_element?('point')) do
888
-
889
- parse_simple_elements(point, ['lat', 'long'] )
890
-
891
- if is_element?('alt')
892
- @reader.read
893
- point.alt = @reader.value.to_f
894
- @reader.read
895
- has_reached_end_element?('alt')
896
- end
897
- #advance reader
898
- @reader.read
899
- end
900
- return point
901
- end #parse_point
902
-
903
- def parse_polygon
904
- polygon = Polygon.new
905
- @reader.read
906
- while not(is_end_element?('polygon')) do
907
- polygon.points << parse_point if is_element?('point')
908
- @reader.read
909
- end
910
-
911
- #@todo should check for it at all? Probably not if xml is valid.
912
- if polygon.points.length <3
913
- puts "Warning: <polygon> should have at least 3 points"
914
- end
915
- return polygon
916
- end #parse_polygon
917
-
918
- def parse_id(tag_name)
919
- id = Id.new
920
- id.provider = @reader["provider"]
921
- @reader.read
922
- id.value = @reader.value
923
- @reader.read #@todo shouldn't there be another read?
924
- has_reached_end_element?(tag_name)
925
- return id
926
- end #parse_id
927
-
928
- def parse_domain
929
- domain = ProteinDomain.new
930
- parse_attributes(domain, ["from", "to", "confidence", "id"])
931
- @reader.read
932
- domain.value = @reader.value
933
- @reader.read
934
- has_reached_end_element?('domain')
935
- @reader.read
936
- return domain
937
- end
938
-
939
- def parse_binary_characters
940
- b = PhyloXML::BinaryCharacters.new
941
- b.bc_type = @reader['type']
942
-
943
- parse_attributes(b, ['gained_count', 'absent_count', 'lost_count', 'present_count'])
944
- if not @reader.empty_element?
945
- @reader.read
946
- while not is_end_element?('binary_characters')
947
-
948
- parse_bc(b, 'lost')
949
- parse_bc(b, 'gained')
950
- parse_bc(b, 'absent')
951
- parse_bc(b, 'present')
952
-
953
- @reader.read
954
- end
955
- end
956
- return b
957
- end #parse_binary_characters
958
-
959
- def parse_bc(object, element)
960
- if is_element?(element)
961
- @reader.read
962
- while not is_end_element?(element)
963
- if is_element?('bc')
964
- @reader.read
965
- object.send(element) << @reader.value
966
- @reader.read
967
- has_reached_end_element?('bc')
968
- end
969
- @reader.read
970
- end
971
- end
972
- end #parse_bc
973
-
974
- def parse_other
975
- other_obj = PhyloXML::Other.new
976
- other_obj.element_name = @reader.name
977
- #parse attributes
978
- code = @reader.move_to_first_attribute
979
- while code ==1
980
- other_obj.attributes[@reader.name] = @reader.value
981
- code = @reader.move_to_next_attribute
982
- end
983
-
984
- while not is_end_element?(other_obj.element_name) do
985
- @reader.read
986
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
987
- other_obj.children << parse_other #recursice call to parse children
988
- elsif @reader.node_type == XML::Reader::TYPE_TEXT
989
- other_obj.value = @reader.value
990
- end
991
- end
992
- #just a check
993
- has_reached_end_element?(other_obj.element_name)
994
- return other_obj
995
- end #parse_other
996
-
997
- end #class phyloxmlParser
998
-
999
- end #module PhyloXML
1000
-
1001
- end #module Bio