bio 1.5.2 → 1.6.0.pre.20181210

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +12 -11
  3. data/ChangeLog +14 -3106
  4. data/{gemfiles/Gemfile.travis-ruby2.2 → Gemfile} +0 -1
  5. data/KNOWN_ISSUES.rdoc +0 -5
  6. data/README.rdoc +11 -18
  7. data/RELEASE_NOTES.rdoc +34 -291
  8. data/Rakefile +13 -9
  9. data/appveyor.yml +21 -0
  10. data/bioruby.gemspec +7 -78
  11. data/bioruby.gemspec.erb +8 -27
  12. data/doc/ChangeLog-1.5.0 +2919 -0
  13. data/doc/RELEASE_NOTES-1.5.0.rdoc +285 -0
  14. data/doc/Tutorial.rd +6 -108
  15. data/doc/Tutorial.rd.html +19 -98
  16. data/gemfiles/Gemfile.travis-jruby1.8 +3 -5
  17. data/gemfiles/Gemfile.travis-jruby1.9 +0 -3
  18. data/gemfiles/Gemfile.travis-rbx +0 -1
  19. data/gemfiles/Gemfile.travis-ruby1.8 +4 -4
  20. data/gemfiles/Gemfile.travis-ruby1.9 +0 -1
  21. data/gemfiles/prepare-gemspec.rb +4 -0
  22. data/lib/bio.rb +0 -10
  23. data/lib/bio/data/codontable.rb +99 -3
  24. data/lib/bio/io/togows.rb +5 -5
  25. data/lib/bio/version.rb +6 -8
  26. data/sample/test_restriction_enzyme_long.rb +1 -1
  27. data/test/unit/bio/data/test_codontable.rb +3 -0
  28. metadata +11 -77
  29. data/bin/bioruby +0 -47
  30. data/bin/br_biofetch.rb +0 -71
  31. data/bin/br_bioflat.rb +0 -293
  32. data/bin/br_biogetseq.rb +0 -45
  33. data/bin/br_pmfetch.rb +0 -422
  34. data/lib/bio/db/biosql/biosql_to_biosequence.rb +0 -78
  35. data/lib/bio/db/biosql/sequence.rb +0 -444
  36. data/lib/bio/db/phyloxml/phyloxml.xsd +0 -582
  37. data/lib/bio/db/phyloxml/phyloxml_elements.rb +0 -1197
  38. data/lib/bio/db/phyloxml/phyloxml_parser.rb +0 -1001
  39. data/lib/bio/db/phyloxml/phyloxml_writer.rb +0 -227
  40. data/lib/bio/io/biosql/ar-biosql.rb +0 -257
  41. data/lib/bio/io/biosql/biosql.rb +0 -39
  42. data/lib/bio/io/biosql/config/database.yml +0 -21
  43. data/lib/bio/io/sql.rb +0 -79
  44. data/lib/bio/shell.rb +0 -44
  45. data/lib/bio/shell/core.rb +0 -578
  46. data/lib/bio/shell/demo.rb +0 -146
  47. data/lib/bio/shell/interface.rb +0 -217
  48. data/lib/bio/shell/irb.rb +0 -94
  49. data/lib/bio/shell/object.rb +0 -71
  50. data/lib/bio/shell/plugin/blast.rb +0 -42
  51. data/lib/bio/shell/plugin/codon.rb +0 -218
  52. data/lib/bio/shell/plugin/das.rb +0 -58
  53. data/lib/bio/shell/plugin/emboss.rb +0 -23
  54. data/lib/bio/shell/plugin/entry.rb +0 -137
  55. data/lib/bio/shell/plugin/flatfile.rb +0 -101
  56. data/lib/bio/shell/plugin/midi.rb +0 -430
  57. data/lib/bio/shell/plugin/ncbirest.rb +0 -68
  58. data/lib/bio/shell/plugin/obda.rb +0 -45
  59. data/lib/bio/shell/plugin/psort.rb +0 -56
  60. data/lib/bio/shell/plugin/seq.rb +0 -248
  61. data/lib/bio/shell/plugin/togows.rb +0 -40
  62. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb +0 -29
  63. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml +0 -4
  64. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml +0 -27
  65. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtml +0 -11
  66. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtml +0 -4
  67. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml +0 -7
  68. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif +0 -0
  69. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png +0 -0
  70. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gif +0 -0
  71. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css +0 -368
  72. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml +0 -47
  73. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rb +0 -144
  74. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb +0 -47
  75. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml +0 -8
  76. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml +0 -10
  77. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml +0 -26
  78. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif +0 -0
  79. data/lib/bio/shell/script.rb +0 -25
  80. data/lib/bio/shell/setup.rb +0 -108
  81. data/lib/bio/shell/web.rb +0 -102
  82. data/sample/test_phyloxml_big.rb +0 -205
  83. data/test/data/phyloxml/apaf.xml +0 -666
  84. data/test/data/phyloxml/bcl_2.xml +0 -2097
  85. data/test/data/phyloxml/made_up.xml +0 -144
  86. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +0 -65
  87. data/test/data/phyloxml/phyloxml_examples.xml +0 -415
  88. data/test/unit/bio/db/biosql/tc_biosql.rb +0 -114
  89. data/test/unit/bio/db/biosql/ts_suite_biosql.rb +0 -8
  90. data/test/unit/bio/db/test_phyloxml.rb +0 -821
  91. data/test/unit/bio/db/test_phyloxml_writer.rb +0 -334
  92. data/test/unit/bio/shell/plugin/test_seq.rb +0 -187
  93. data/test/unit/bio/test_shell.rb +0 -20
@@ -1,1001 +0,0 @@
1
- #
2
- # = bio/db/phyloxml_parser.rb - PhyloXML parser
3
- #
4
- # Copyright:: Copyright (C) 2009
5
- # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
- # License:: The Ruby License
7
- #
8
- #
9
- # == Description
10
- #
11
- # This file containts parser for PhyloXML.
12
- #
13
- # == Requirements
14
- #
15
- # Libxml2 XML parser is required. Install libxml-ruby bindings from
16
- # http://libxml.rubyforge.org or
17
- #
18
- # gem install -r libxml-ruby
19
- #
20
- # == References
21
- #
22
- # * http://www.phyloxml.org
23
- #
24
- # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
25
-
26
-
27
- require 'uri'
28
- require 'libxml'
29
-
30
- require 'bio/tree'
31
- require 'bio/db/phyloxml/phyloxml_elements'
32
-
33
-
34
- module Bio
35
-
36
- module PhyloXML
37
-
38
-
39
-
40
-
41
- # == Description
42
- #
43
- # Bio::PhyloXML::Parser is for parsing phyloXML format files.
44
- #
45
- # == Requirements
46
- #
47
- # Libxml2 XML parser is required. Install libxml-ruby bindings from
48
- # http://libxml.rubyforge.org or
49
- #
50
- # gem install -r libxml-ruby
51
- #
52
- # == Usage
53
- #
54
- # require 'bio'
55
- #
56
- # # Create new phyloxml parser
57
- # phyloxml = Bio::PhyloXML::Parser.open('example.xml')
58
- #
59
- # # Print the names of all trees in the file
60
- # phyloxml.each do |tree|
61
- # puts tree.name
62
- # end
63
- #
64
- #
65
- # == References
66
- #
67
- # http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html
68
- #
69
- class Parser
70
-
71
- include LibXML
72
-
73
- # After parsing all the trees, if there is anything else in other xml format,
74
- # it is saved in this array of PhyloXML::Other objects
75
- attr_reader :other
76
-
77
- # Initializes LibXML::Reader and reads the file until it reaches the first
78
- # phylogeny element.
79
- #
80
- # Example: Create a new Bio::PhyloXML::Parser object.
81
- #
82
- # p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml")
83
- #
84
- # If the optional code block is given, Bio::PhyloXML object is passed to
85
- # the block as an argument. When the block terminates, the Bio::PhyloXML
86
- # object is automatically closed, and the open method returns the value
87
- # of the block.
88
- #
89
- # Example: Get the first tree in the file.
90
- #
91
- # tree = Bio::PhyloXML::Parser.open("example.xml") do |px|
92
- # px.next_tree
93
- # end
94
- #
95
- # ---
96
- # *Arguments*:
97
- # * (required) _filename_: Path to the file to parse.
98
- # * (optional) _validate_: Whether to validate the file against schema or not. Default value is true.
99
- # *Returns*:: (without block) Bio::PhyloXML::Parser object
100
- # *Returns*:: (with block) the value of the block
101
- def self.open(filename, validate=true)
102
- obj = new(nil, validate)
103
- obj.instance_eval {
104
- filename = _secure_filename(filename)
105
- _validate(:file, filename) if validate
106
- # XML::Parser::Options::NONET for security reason
107
- @reader = XML::Reader.file(filename,
108
- { :options =>
109
- LibXML::XML::Parser::Options::NONET })
110
- _skip_leader
111
- }
112
- if block_given? then
113
- begin
114
- ret = yield obj
115
- ensure
116
- obj.close if obj and !obj.closed?
117
- end
118
- ret
119
- else
120
- obj
121
- end
122
- end
123
-
124
- # Initializes LibXML::Reader and reads the file until it reaches the first
125
- # phylogeny element.
126
- #
127
- # Create a new Bio::PhyloXML::Parser object.
128
- #
129
- # p = Bio::PhyloXML::Parser.open_uri("http://www.phyloxml.org/examples/apaf.xml")
130
- #
131
- # If the optional code block is given, Bio::PhyloXML object is passed to
132
- # the block as an argument. When the block terminates, the Bio::PhyloXML
133
- # object is automatically closed, and the open_uri method returns the
134
- # value of the block.
135
- #
136
- # ---
137
- # *Arguments*:
138
- # * (required) _uri_: (URI or String) URI to the data to parse
139
- # * (optional) _validate_: For URI reader, the "validate" option is ignored and no validation is executed.
140
- # *Returns*:: (without block) Bio::PhyloXML::Parser object
141
- # *Returns*:: (with block) the value of the block
142
- def self.open_uri(uri, validate=true)
143
- case uri
144
- when URI
145
- uri = uri.to_s
146
- else
147
- # raises error if not a String
148
- uri = uri.to_str
149
- # raises error if invalid URI
150
- URI.parse(uri)
151
- end
152
-
153
- obj = new(nil, validate)
154
- obj.instance_eval {
155
- @reader = XML::Reader.file(uri)
156
- _skip_leader
157
- }
158
- if block_given? then
159
- begin
160
- ret = yield obj
161
- ensure
162
- obj.close if obj and !obj.closed?
163
- end
164
- ret
165
- else
166
- obj
167
- end
168
- end
169
-
170
- # Special class for closed PhyloXML::Parser object.
171
- # It raises error for any methods except essential methods.
172
- #
173
- # Bio::PhyloXML internal use only.
174
- class ClosedPhyloXMLParser #:nodoc:
175
- def method_missing(*arg)
176
- raise LibXML::XML::Error, 'closed PhyloXML::Parser object'
177
- end
178
- end #class ClosedPhyloXMLParser
179
-
180
- # Closes the LibXML::Reader inside the object.
181
- # It also closes the opened file if it is created by using
182
- # Bio::PhyloXML::Parser.open method.
183
- #
184
- # When closed object is closed again, or closed object is used,
185
- # it raises LibXML::XML::Error.
186
- # ---
187
- # *Returns*:: nil
188
- def close
189
- @reader.close
190
- @reader = ClosedPhyloXMLParser.new
191
- nil
192
- end
193
-
194
- # If the object is closed by using the close method or equivalent,
195
- # returns true. Otherwise, returns false.
196
- # ---
197
- # *Returns*:: true or false
198
- def closed?
199
- if @reader.kind_of?(ClosedPhyloXMLParser) then
200
- true
201
- else
202
- false
203
- end
204
- end
205
-
206
- # Initializes LibXML::Reader and reads from the IO until it reaches
207
- # the first phylogeny element.
208
- #
209
- # Create a new Bio::PhyloXML::Parser object.
210
- #
211
- # p = Bio::PhyloXML::Parser.for_io($stdin)
212
- #
213
- # ---
214
- # *Arguments*:
215
- # * (required) _io_: IO object
216
- # * (optional) _validate_: For IO reader, the "validate" option is ignored and no validation is executed.
217
- # *Returns*:: Bio::PhyloXML::Parser object
218
- def self.for_io(io, validate=true)
219
- obj = new(nil, validate)
220
- obj.instance_eval {
221
- @reader = XML::Reader.io(io,
222
- { :options =>
223
- LibXML::XML::Parser::Options::NONET })
224
- _skip_leader
225
- }
226
- obj
227
- end
228
-
229
- # (private) returns PhyloXML schema
230
- def _schema
231
- XML::Schema.document(XML::Document.file(File.join(File.dirname(__FILE__),'phyloxml.xsd')))
232
- end
233
- private :_schema
234
-
235
- # (private) do validation
236
- # ---
237
- # *Arguments*:
238
- # * (required) <em>data_type</em>_: :file for filename, :string for string
239
- # * (required) _arg_: filename or string
240
- # *Returns*:: (undefined)
241
- def _validate(data_type, arg)
242
- options = { :options =>
243
- (LibXML::XML::Parser::Options::NOERROR | # no error messages
244
- LibXML::XML::Parser::Options::NOWARNING | # no warning messages
245
- LibXML::XML::Parser::Options::NONET) # no network access
246
- }
247
- case data_type
248
- when :file
249
- # No validation when special file e.g. FIFO (named pipe)
250
- return unless File.file?(arg)
251
- xml_instance = XML::Document.file(arg, options)
252
- when :string
253
- xml_instance = XML::Document.string(arg, options)
254
- else
255
- # no validation for unknown data type
256
- return
257
- end
258
-
259
- schema = _schema
260
- begin
261
- flag = xml_instance.validate_schema(schema) do |msg, _|
262
- # The document of libxml-ruby says that the block is called
263
- # when validation failed, but it seems it is never called
264
- # even when validation failed!
265
- raise "Validation of the XML document against phyloxml.xsd schema failed. #{msg}"
266
- end
267
- rescue LibXML::XML::Error => evar
268
- raise "Validation of the XML document against phyloxml.xsd schema failed, or XML error occurred. #{evar.message}"
269
- end
270
- unless flag then
271
- raise "Validation of the XML document against phyloxml.xsd schema failed."
272
- end
273
- end
274
- private :_validate
275
-
276
- # (private) It seems that LibXML::XML::Reader reads from the network
277
- # even if LibXML::XML::Parser::Options::NONET is set.
278
- # So, for URI-like filename, '://' is replaced with ':/'.
279
- def _secure_filename(filename)
280
- # for safety, URI-like filename is checked.
281
- if /\A[a-zA-Z]+\:\/\// =~ filename then
282
- # for example, "http://a/b" is changed to "http:/a/b".
283
- filename = filename.sub(/\:\/\//, ':/')
284
- end
285
- filename
286
- end
287
- private :_secure_filename
288
-
289
- # (private) loops through until reaches phylogeny stuff
290
- def _skip_leader
291
- #loops through until reaches phylogeny stuff
292
- # Have to leave this way, if accepting strings, instead of files
293
- @reader.read until is_element?('phylogeny')
294
- nil
295
- end
296
- private :_skip_leader
297
-
298
- # Initializes LibXML::Reader and reads the PhyloXML-formatted string
299
- # until it reaches the first phylogeny element.
300
- #
301
- # Create a new Bio::PhyloXML::Parser object.
302
- #
303
- # str = File.read("./phyloxml_examples.xml")
304
- # p = Bio::PhyloXML::Parser.new(str)
305
- #
306
- #
307
- # Deprecated usage: Reads data from a file. <em>str<em> is a filename.
308
- #
309
- # p = Bio::PhyloXML::Parser.new("./phyloxml_examples.xml")
310
- #
311
- # Taking filename is deprecated. Use Bio::PhyloXML::Parser.open(filename).
312
- #
313
- # ---
314
- # *Arguments*:
315
- # * (required) _str_: PhyloXML-formatted string
316
- # * (optional) _validate_: Whether to validate the file against schema or not. Default value is true.
317
- # *Returns*:: Bio::PhyloXML::Parser object
318
- def initialize(str, validate=true)
319
-
320
- @other = []
321
-
322
- return unless str
323
-
324
- # For compatibility, if filename-like string is given,
325
- # treat it as a filename.
326
- if /[\<\>\r\n]/ !~ str and File.exist?(str) then
327
- # assume that str is filename
328
- warn "Bio::PhyloXML::Parser.new(filename) is deprecated. Use Bio::PhyloXML::Parser.open(filename)."
329
- filename = _secure_filename(str)
330
- _validate(:file, filename) if validate
331
- @reader = XML::Reader.file(filename)
332
- _skip_leader
333
- return
334
- end
335
-
336
- # initialize for string
337
- @reader = XML::Reader.string(str,
338
- { :options =>
339
- LibXML::XML::Parser::Options::NONET })
340
- _skip_leader
341
- end
342
-
343
-
344
- # Iterate through all trees in the file.
345
- #
346
- # phyloxml = Bio::PhyloXML::Parser.open('example.xml')
347
- # phyloxml.each do |tree|
348
- # puts tree.name
349
- # end
350
- #
351
- def each
352
- while tree = next_tree
353
- yield tree
354
- end
355
- end
356
-
357
- # Access the specified tree in the file. It parses trees until the specified
358
- # tree is reached.
359
- #
360
- # # Get 3rd tree in the file (starts counting from 0).
361
- # parser = PhyloXML::Parser.open('phyloxml_examples.xml')
362
- # tree = parser[2]
363
- #
364
- def [](i)
365
- tree = nil
366
- (i+1).times do
367
- tree = self.next_tree
368
- end
369
- return tree
370
- end
371
-
372
- # Parse and return the next phylogeny tree. If there are no more phylogeny
373
- # element, nil is returned. If there is something else besides phylogeny
374
- # elements, it is saved in the PhyloXML::Parser#other.
375
- #
376
- # p = Bio::PhyloXML::Parser.open("./phyloxml_examples.xml")
377
- # tree = p.next_tree
378
- #
379
- # ---
380
- # *Returns*:: Bio::PhyloXML::Tree
381
- def next_tree()
382
-
383
- if not is_element?('phylogeny')
384
- if @reader.node_type == XML::Reader::TYPE_END_ELEMENT
385
- if is_end_element?('phyloxml')
386
- return nil
387
- else
388
- @reader.read
389
- @reader.read
390
- if is_end_element?('phyloxml')
391
- return nil
392
- end
393
- end
394
- end
395
- # phyloxml can hold only phylogeny and "other" elements. If this is not
396
- # phylogeny element then it is other. Also, "other" always comes after
397
- # all phylogenies
398
- @other << parse_other
399
- #return nil for tree, since this is not valid phyloxml tree.
400
- return nil
401
- end
402
-
403
- tree = Bio::PhyloXML::Tree.new
404
-
405
- # keep track of current node in clades array/stack. Current node is the
406
- # last element in the clades array
407
- clades = []
408
- clades.push tree
409
-
410
- #keep track of current edge to be able to parse branch_length tag
411
- current_edge = nil
412
-
413
- # we are going to parse clade iteratively by pointing (and changing) to
414
- # the current node in the tree. Since the property element is both in
415
- # clade and in the phylogeny, we need some boolean to know if we are
416
- # parsing the clade (there can be only max 1 clade in phylogeny) or
417
- # parsing phylogeny
418
- parsing_clade = false
419
-
420
- while not is_end_element?('phylogeny') do
421
- break if is_end_element?('phyloxml')
422
-
423
- # parse phylogeny elements, except clade
424
- if not parsing_clade
425
-
426
- if is_element?('phylogeny')
427
- @reader["rooted"] == "true" ? tree.rooted = true : tree.rooted = false
428
- @reader["rerootable"] == "true" ? tree.rerootable = true : tree.rerootable = false
429
- parse_attributes(tree, ["branch_length_unit", 'type'])
430
- end
431
-
432
- parse_simple_elements(tree, [ "name", 'description', "date"])
433
-
434
- if is_element?('confidence')
435
- tree.confidences << parse_confidence
436
- end
437
-
438
- end
439
-
440
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
441
- case @reader.name
442
- when 'clade'
443
- #parse clade element
444
-
445
- parsing_clade = true
446
-
447
- node= Bio::PhyloXML::Node.new
448
-
449
- branch_length = @reader['branch_length']
450
-
451
- parse_attributes(node, ["id_source"])
452
-
453
- #add new node to the tree
454
- tree.add_node(node)
455
- # The first clade will always be root since by xsd schema phyloxml can
456
- # have 0 to 1 clades in it.
457
- if tree.root == nil
458
- tree.root = node
459
- else
460
- current_edge = tree.add_edge(clades[-1], node,
461
- Bio::Tree::Edge.new(branch_length))
462
- end
463
- clades.push node
464
- #end if clade element
465
- else
466
- parse_clade_elements(clades[-1], current_edge) if parsing_clade
467
- end
468
- end
469
-
470
- #end clade element, go one parent up
471
- if is_end_element?('clade')
472
-
473
- #if we have reached the closing tag of the top-most clade, then our
474
- # curent node should point to the root, If thats the case, we are done
475
- # parsing the clade element
476
- if clades[-1] == tree.root
477
- parsing_clade = false
478
- else
479
- # set current node (clades[-1) to the previous clade in the array
480
- clades.pop
481
- end
482
- end
483
-
484
- #parsing phylogeny elements
485
- if not parsing_clade
486
-
487
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
488
- case @reader.name
489
- when 'property'
490
- tree.properties << parse_property
491
-
492
- when 'clade_relation'
493
- clade_relation = CladeRelation.new
494
- parse_attributes(clade_relation, ["id_ref_0", "id_ref_1", "distance", "type"])
495
-
496
- #@ add unit test for this
497
- if not @reader.empty_element?
498
- @reader.read
499
- if is_element?('confidence')
500
- clade_relation.confidence = parse_confidence
501
- end
502
- end
503
- tree.clade_relations << clade_relation
504
-
505
- when 'sequence_relation'
506
- sequence_relation = SequenceRelation.new
507
- parse_attributes(sequence_relation, ["id_ref_0", "id_ref_1", "distance", "type"])
508
- if not @reader.empty_element?
509
- @reader.read
510
- if is_element?('confidence')
511
- sequence_relation.confidence = parse_confidence
512
- end
513
- end
514
- tree.sequence_relations << sequence_relation
515
- when 'phylogeny'
516
- #do nothing
517
- else
518
- tree.other << parse_other
519
- #puts "Not recognized element. #{@reader.name}"
520
- end
521
- end
522
- end
523
- # go to next element
524
- @reader.read
525
- end #end while not </phylogeny>
526
- #move on to the next tag after /phylogeny which is text, since phylogeny
527
- #end tag is empty element, which value is nil, therefore need to move to
528
- #the next meaningful element (therefore @reader.read twice)
529
- @reader.read
530
- @reader.read
531
-
532
- return tree
533
- end
534
-
535
- # return tree of specified name.
536
- # @todo Implement this method.
537
- # def get_tree_by_name(name)
538
-
539
- # while not is_end_element?('phyloxml')
540
- # if is_element?('phylogeny')
541
- # @reader.read
542
- # @reader.read
543
- #
544
- # if is_element?('name')
545
- # @reader.read
546
- # if @reader.value == name
547
- # puts "equasl"
548
- # tree = next_tree
549
- # puts tree
550
- # end
551
- # end
552
- # end
553
- # @reader.read
554
- # end
555
- #
556
- # end
557
-
558
-
559
- private
560
-
561
- ####
562
- # Utility methods
563
- ###
564
-
565
- def is_element?(str)
566
- @reader.node_type == XML::Reader::TYPE_ELEMENT and @reader.name == str ? true : false
567
- end
568
-
569
- def is_end_element?(str)
570
- @reader.node_type==XML::Reader::TYPE_END_ELEMENT and @reader.name == str ? true : false
571
- end
572
-
573
- def has_reached_end_element?(str)
574
- if not(is_end_element?(str))
575
- raise "Warning: Should have reached </#{str}> element here"
576
- end
577
- end
578
-
579
- # Parses a simple XML element. for example <speciations>1</speciations>
580
- # It reads in the value and assigns it to object.speciation = 1
581
- # Also checks if have reached end tag (</speciations> and gives warning
582
- # if not
583
- def parse_simple_element(object, name)
584
- if is_element?(name)
585
- @reader.read
586
- object.send("#{name}=", @reader.value)
587
- @reader.read
588
- has_reached_end_element?(name)
589
- end
590
- end
591
-
592
- def parse_simple_elements(object, elements)
593
- elements.each do |elmt|
594
- parse_simple_element(object, elmt)
595
- end
596
- end
597
-
598
- #Parses list of attributes
599
- #use for the code like: clade_relation.type = @reader["type"]
600
- def parse_attributes(object, arr_of_attrs)
601
- arr_of_attrs.each do |attr|
602
- object.send("#{attr}=", @reader[attr])
603
- end
604
- end
605
-
606
- def parse_clade_elements(current_node, current_edge)
607
- #no loop inside, loop is already outside
608
-
609
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
610
- case @reader.name
611
- when 'branch_length'
612
- # @todo add unit test for this. current_edge is nil, if the root clade
613
- # has branch_length attribute.
614
- @reader.read
615
- branch_length = @reader.value
616
- current_edge.distance = branch_length.to_f if current_edge != nil
617
- @reader.read
618
- when 'width'
619
- @reader.read
620
- current_node.width = @reader.value
621
- @reader.read
622
- when 'name'
623
- @reader.read
624
- current_node.name = @reader.value
625
- @reader.read
626
- when 'events'
627
- current_node.events = parse_events
628
- when 'confidence'
629
- current_node.confidences << parse_confidence
630
- when 'sequence'
631
- current_node.sequences << parse_sequence
632
- when 'property'
633
- current_node.properties << parse_property
634
- when 'taxonomy'
635
- current_node.taxonomies << parse_taxonomy
636
- when 'distribution'
637
- current_node.distributions << parse_distribution
638
- when 'node_id'
639
- id = Id.new
640
- id.type = @reader["type"]
641
- @reader.read
642
- id.value = @reader.value
643
- @reader.read
644
- #has_reached_end_element?('node_id')
645
- #@todo write unit test for this. There is no example of this in the example files
646
- current_node.id = id
647
- when 'color'
648
- color = BranchColor.new
649
- parse_simple_element(color, 'red')
650
- parse_simple_element(color, 'green')
651
- parse_simple_element(color, 'blue')
652
- current_node.color = color
653
- #@todo add unit test for this
654
- when 'date'
655
- date = Date.new
656
- date.unit = @reader["unit"]
657
- #move to the next token, which is always empty, since date tag does not
658
- # have text associated with it
659
- @reader.read
660
- @reader.read #now the token is the first tag under date tag
661
- while not(is_end_element?('date'))
662
- parse_simple_element(date, 'desc')
663
- parse_simple_element(date, 'value')
664
- parse_simple_element(date, 'minimum')
665
- parse_simple_element(date, 'maximum')
666
- @reader.read
667
- end
668
- current_node.date = date
669
- when 'reference'
670
- reference = Reference.new()
671
- reference.doi = @reader['doi']
672
- if not @reader.empty_element?
673
- while not is_end_element?('reference')
674
- parse_simple_element(reference, 'desc')
675
- @reader.read
676
- end
677
- end
678
- current_node.references << reference
679
- when 'binary_characters'
680
- current_node.binary_characters = parse_binary_characters
681
- when 'clade'
682
- #do nothing
683
- else
684
- current_node.other << parse_other
685
- #puts "No match found in parse_clade_elements.(#{@reader.name})"
686
- end
687
-
688
- end
689
-
690
- end #parse_clade_elements
691
-
692
- def parse_events()
693
- events = PhyloXML::Events.new
694
- @reader.read #go to next element
695
- while not(is_end_element?('events')) do
696
- parse_simple_elements(events, ['type', 'duplications',
697
- 'speciations', 'losses'])
698
- if is_element?('confidence')
699
- events.confidence = parse_confidence
700
- #@todo could add unit test for this (example file does not have this case)
701
- end
702
- @reader.read
703
- end
704
- return events
705
- end #parse_events
706
-
707
- def parse_taxonomy
708
- taxonomy = PhyloXML::Taxonomy.new
709
- parse_attributes(taxonomy, ["id_source"])
710
- @reader.read
711
- while not(is_end_element?('taxonomy')) do
712
-
713
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
714
- case @reader.name
715
- when 'code'
716
- @reader.read
717
- taxonomy.code = @reader.value
718
- @reader.read
719
- when 'scientific_name'
720
- @reader.read
721
- taxonomy.scientific_name = @reader.value
722
- @reader.read
723
- when 'rank'
724
- @reader.read
725
- taxonomy.rank = @reader.value
726
- @reader.read
727
- when 'authority'
728
- @reader.read
729
- taxonomy.authority = @reader.value
730
- @reader.read
731
- when 'id'
732
- taxonomy.taxonomy_id = parse_id('id')
733
- when 'common_name'
734
- @reader.read
735
- taxonomy.common_names << @reader.value
736
- @reader.read
737
- #has_reached_end_element?('common_name')
738
- when 'synonym'
739
- @reader.read
740
- taxonomy.synonyms << @reader.value
741
- @reader.read
742
- #has_reached_end_element?('synonym')
743
- when 'uri'
744
- taxonomy.uri = parse_uri
745
- else
746
- taxonomy.other << parse_other
747
- end
748
- end
749
-
750
- @reader.read #move to next tag in the loop
751
- end
752
- return taxonomy
753
- end #parse_taxonomy
754
-
755
- private
756
-
757
- def parse_sequence
758
- sequence = Sequence.new
759
- parse_attributes(sequence, ["type", "id_source", "id_ref"])
760
-
761
- @reader.read
762
- while not(is_end_element?('sequence'))
763
-
764
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
765
- case @reader.name
766
- when 'symbol'
767
- @reader.read
768
- sequence.symbol = @reader.value
769
- @reader.read
770
- when 'name'
771
- @reader.read
772
- sequence.name = @reader.value
773
- @reader.read
774
- when 'location'
775
- @reader.read
776
- sequence.location = @reader.value
777
- @reader.read
778
- when 'mol_seq'
779
- sequence.is_aligned = @reader["is_aligned"]
780
- @reader.read
781
- sequence.mol_seq = @reader.value
782
- @reader.read
783
- has_reached_end_element?('mol_seq')
784
- when 'accession'
785
- sequence.accession = Accession.new
786
- sequence.accession.source = @reader["source"]
787
- @reader.read
788
- sequence.accession.value = @reader.value
789
- @reader.read
790
- has_reached_end_element?('accession')
791
- when 'uri'
792
- sequence.uri = parse_uri
793
- when 'annotation'
794
- sequence.annotations << parse_annotation
795
- when 'domain_architecture'
796
- sequence.domain_architecture = DomainArchitecture.new
797
- sequence.domain_architecture.length = @reader["length"]
798
- @reader.read
799
- @reader.read
800
- while not(is_end_element?('domain_architecture'))
801
- sequence.domain_architecture.domains << parse_domain
802
- @reader.read #go to next domain element
803
- end
804
- else
805
- sequence.other << parse_other
806
- #@todo add unit test
807
- end
808
- end
809
-
810
- @reader.read
811
- end
812
- return sequence
813
- end #parse_sequence
814
-
815
- def parse_uri
816
- uri = Uri.new
817
- parse_attributes(uri, ["desc", "type"])
818
- parse_simple_element(uri, 'uri')
819
- return uri
820
- end
821
-
822
- def parse_annotation
823
- annotation = Annotation.new
824
-
825
- parse_attributes(annotation, ['ref', 'source', 'evidence', 'type'])
826
-
827
- if not @reader.empty_element?
828
- while not(is_end_element?('annotation'))
829
- parse_simple_element(annotation, 'desc') if is_element?('desc')
830
-
831
- annotation.confidence = parse_confidence if is_element?('confidence')
832
-
833
- annotation.properties << parse_property if is_element?('property')
834
-
835
- if is_element?('uri')
836
- annotation.uri = parse_uri
837
- end
838
-
839
- @reader.read
840
- end
841
-
842
- end
843
- return annotation
844
- end
845
-
846
- def parse_property
847
- property = Property.new
848
- parse_attributes(property, ["ref", "unit", "datatype", "applies_to", "id_ref"])
849
- @reader.read
850
- property.value = @reader.value
851
- @reader.read
852
- has_reached_end_element?('property')
853
- return property
854
- end #parse_property
855
-
856
- def parse_confidence
857
- type = @reader["type"]
858
- @reader.read
859
- value = @reader.value.to_f
860
- @reader.read
861
- has_reached_end_element?('confidence')
862
- return Confidence.new(type, value)
863
- end #parse_confidence
864
-
865
- def parse_distribution
866
- distribution = Distribution.new
867
- @reader.read
868
- while not(is_end_element?('distribution')) do
869
-
870
- parse_simple_element(distribution, 'desc')
871
-
872
- distribution.points << parse_point if is_element?('point')
873
- distribution.polygons << parse_polygon if is_element?('polygon')
874
-
875
- @reader.read
876
- end
877
- return distribution
878
- end #parse_distribution
879
-
880
- def parse_point
881
- point = Point.new
882
-
883
- point.geodetic_datum = @reader["geodetic_datum"]
884
- point.alt_unit = @reader["alt_unit"]
885
-
886
- @reader.read
887
- while not(is_end_element?('point')) do
888
-
889
- parse_simple_elements(point, ['lat', 'long'] )
890
-
891
- if is_element?('alt')
892
- @reader.read
893
- point.alt = @reader.value.to_f
894
- @reader.read
895
- has_reached_end_element?('alt')
896
- end
897
- #advance reader
898
- @reader.read
899
- end
900
- return point
901
- end #parse_point
902
-
903
- def parse_polygon
904
- polygon = Polygon.new
905
- @reader.read
906
- while not(is_end_element?('polygon')) do
907
- polygon.points << parse_point if is_element?('point')
908
- @reader.read
909
- end
910
-
911
- #@todo should check for it at all? Probably not if xml is valid.
912
- if polygon.points.length <3
913
- puts "Warning: <polygon> should have at least 3 points"
914
- end
915
- return polygon
916
- end #parse_polygon
917
-
918
- def parse_id(tag_name)
919
- id = Id.new
920
- id.provider = @reader["provider"]
921
- @reader.read
922
- id.value = @reader.value
923
- @reader.read #@todo shouldn't there be another read?
924
- has_reached_end_element?(tag_name)
925
- return id
926
- end #parse_id
927
-
928
- def parse_domain
929
- domain = ProteinDomain.new
930
- parse_attributes(domain, ["from", "to", "confidence", "id"])
931
- @reader.read
932
- domain.value = @reader.value
933
- @reader.read
934
- has_reached_end_element?('domain')
935
- @reader.read
936
- return domain
937
- end
938
-
939
- def parse_binary_characters
940
- b = PhyloXML::BinaryCharacters.new
941
- b.bc_type = @reader['type']
942
-
943
- parse_attributes(b, ['gained_count', 'absent_count', 'lost_count', 'present_count'])
944
- if not @reader.empty_element?
945
- @reader.read
946
- while not is_end_element?('binary_characters')
947
-
948
- parse_bc(b, 'lost')
949
- parse_bc(b, 'gained')
950
- parse_bc(b, 'absent')
951
- parse_bc(b, 'present')
952
-
953
- @reader.read
954
- end
955
- end
956
- return b
957
- end #parse_binary_characters
958
-
959
- def parse_bc(object, element)
960
- if is_element?(element)
961
- @reader.read
962
- while not is_end_element?(element)
963
- if is_element?('bc')
964
- @reader.read
965
- object.send(element) << @reader.value
966
- @reader.read
967
- has_reached_end_element?('bc')
968
- end
969
- @reader.read
970
- end
971
- end
972
- end #parse_bc
973
-
974
- def parse_other
975
- other_obj = PhyloXML::Other.new
976
- other_obj.element_name = @reader.name
977
- #parse attributes
978
- code = @reader.move_to_first_attribute
979
- while code ==1
980
- other_obj.attributes[@reader.name] = @reader.value
981
- code = @reader.move_to_next_attribute
982
- end
983
-
984
- while not is_end_element?(other_obj.element_name) do
985
- @reader.read
986
- if @reader.node_type == XML::Reader::TYPE_ELEMENT
987
- other_obj.children << parse_other #recursice call to parse children
988
- elsif @reader.node_type == XML::Reader::TYPE_TEXT
989
- other_obj.value = @reader.value
990
- end
991
- end
992
- #just a check
993
- has_reached_end_element?(other_obj.element_name)
994
- return other_obj
995
- end #parse_other
996
-
997
- end #class phyloxmlParser
998
-
999
- end #module PhyloXML
1000
-
1001
- end #module Bio