bio-nexml 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/Gemfile +15 -0
  2. data/Gemfile.lock +24 -0
  3. data/LICENSE.txt +20 -0
  4. data/README.rdoc +47 -0
  5. data/Rakefile +55 -0
  6. data/TODO.txt +6 -0
  7. data/VERSION +1 -0
  8. data/bio-nexml.gemspec +126 -0
  9. data/extconf.rb +2 -0
  10. data/lib/bio-nexml.rb +0 -0
  11. data/lib/bio.rb +321 -0
  12. data/lib/bio/db/nexml.rb +109 -0
  13. data/lib/bio/db/nexml/mapper.rb +113 -0
  14. data/lib/bio/db/nexml/mapper/framework.rb +157 -0
  15. data/lib/bio/db/nexml/mapper/inflection.rb +99 -0
  16. data/lib/bio/db/nexml/mapper/repository.rb +59 -0
  17. data/lib/bio/db/nexml/matrix.rb +1046 -0
  18. data/lib/bio/db/nexml/parser.rb +622 -0
  19. data/lib/bio/db/nexml/schema/README.txt +21 -0
  20. data/lib/bio/db/nexml/schema/abstract.xsd +159 -0
  21. data/lib/bio/db/nexml/schema/characters/README.txt +1 -0
  22. data/lib/bio/db/nexml/schema/characters/abstractcharacters.xsd +361 -0
  23. data/lib/bio/db/nexml/schema/characters/characters.xsd +22 -0
  24. data/lib/bio/db/nexml/schema/characters/continuous.xsd +190 -0
  25. data/lib/bio/db/nexml/schema/characters/dna.xsd +282 -0
  26. data/lib/bio/db/nexml/schema/characters/protein.xsd +280 -0
  27. data/lib/bio/db/nexml/schema/characters/restriction.xsd +239 -0
  28. data/lib/bio/db/nexml/schema/characters/rna.xsd +283 -0
  29. data/lib/bio/db/nexml/schema/characters/standard.xsd +261 -0
  30. data/lib/bio/db/nexml/schema/external/sawsdl.xsd +21 -0
  31. data/lib/bio/db/nexml/schema/external/xhtml-datatypes-1.xsd +177 -0
  32. data/lib/bio/db/nexml/schema/external/xlink.xsd +75 -0
  33. data/lib/bio/db/nexml/schema/external/xml.xsd +145 -0
  34. data/lib/bio/db/nexml/schema/meta/README.txt +2 -0
  35. data/lib/bio/db/nexml/schema/meta/annotations.xsd +100 -0
  36. data/lib/bio/db/nexml/schema/meta/meta.xsd +294 -0
  37. data/lib/bio/db/nexml/schema/nexml.xsd +104 -0
  38. data/lib/bio/db/nexml/schema/taxa/README.txt +2 -0
  39. data/lib/bio/db/nexml/schema/taxa/taxa.xsd +39 -0
  40. data/lib/bio/db/nexml/schema/trees/README.txt +2 -0
  41. data/lib/bio/db/nexml/schema/trees/abstracttrees.xsd +135 -0
  42. data/lib/bio/db/nexml/schema/trees/network.xsd +113 -0
  43. data/lib/bio/db/nexml/schema/trees/tree.xsd +149 -0
  44. data/lib/bio/db/nexml/schema/trees/trees.xsd +36 -0
  45. data/lib/bio/db/nexml/taxa.rb +147 -0
  46. data/lib/bio/db/nexml/trees.rb +663 -0
  47. data/lib/bio/db/nexml/writer.rb +265 -0
  48. data/test/data/nexml/test.xml +69 -0
  49. data/test/test_bio-nexml.rb +17 -0
  50. data/test/unit/bio/db/nexml/tc_factory.rb +119 -0
  51. data/test/unit/bio/db/nexml/tc_mapper.rb +78 -0
  52. data/test/unit/bio/db/nexml/tc_matrix.rb +551 -0
  53. data/test/unit/bio/db/nexml/tc_parser.rb +21 -0
  54. data/test/unit/bio/db/nexml/tc_taxa.rb +118 -0
  55. data/test/unit/bio/db/nexml/tc_trees.rb +370 -0
  56. data/test/unit/bio/db/nexml/tc_writer.rb +633 -0
  57. metadata +253 -0
@@ -0,0 +1,622 @@
1
+ #require "ruby-debug"
2
+ #Debugger.start
3
+
4
+ module Bio
5
+ module NeXML
6
+ include LibXML
7
+
8
+ #def self.parse( nexml, validate = false )
9
+ #Parser.new( nexml, validate ).parse
10
+ #end
11
+
12
+ class Parser
13
+
14
+ def initialize( nexml, validate = false )
15
+ #initialize a cache
16
+ @cache = {}
17
+
18
+ #initialize a libxml cursor
19
+ @reader = read( nexml )
20
+
21
+ #validate
22
+ validate_nexml if validate
23
+
24
+ end
25
+
26
+ #Is a factory method that returns an object of class
27
+ #Bio::NeXML::Nexml
28
+ def parse
29
+ #return a cached version if it exists
30
+ return @nexml if @nexml
31
+
32
+ #start at the root element
33
+ skip_leader
34
+
35
+ #start with a new Nexml object
36
+ version = attribute( 'version' )
37
+ generator = attribute( 'generator' )
38
+ @nexml = NeXML::Nexml.new( version, generator )
39
+
40
+ #perhaps a namespace api as well
41
+
42
+ #start parsing other elements
43
+ while next_node
44
+ case local_name
45
+ when "otus"
46
+ @nexml.add_otus( parse_otus )
47
+ when "trees"
48
+ @nexml.add_trees( parse_trees )
49
+ when "characters"
50
+ @nexml.add_characters( parse_characters )
51
+ end
52
+ end
53
+
54
+ #close the libxml parser object
55
+ #close
56
+
57
+ #return the Nexml object
58
+ @nexml
59
+ end
60
+
61
+ #Close the assosicated XML::Reader object
62
+ #and try to free other resources like @nexml
63
+ def close
64
+ @reader.close
65
+ end
66
+
67
+ private
68
+
69
+ # Cache otus, otu, states, state, char, node
70
+ def cache( object = nil )
71
+ return @cache unless object
72
+ @cache[ object.id ] = object
73
+ end
74
+
75
+ #Determine if the 'nexml' is a file, string, or an io
76
+ #and accordingly return a XML::Reader object.
77
+ def read( nexml )
78
+ case nexml
79
+ when /\.xml$/
80
+ XML::Reader.file( nexml, :options => parse_opts )
81
+ when IO
82
+ XML::Reader.io( nexml, :options => parse_opts )
83
+ when String
84
+ XML::Reader.string( nexml, :options => parse_opts )
85
+ end
86
+ end
87
+
88
+ def skip_leader
89
+ @reader.read until local_name == "nexml"
90
+ end
91
+
92
+ def local_name
93
+ @reader.local_name
94
+ end
95
+
96
+ def value
97
+ @reader.value
98
+ end
99
+
100
+ def attribute( name )
101
+ @reader[ name ]
102
+ end
103
+
104
+ def next_node
105
+ while @reader.read
106
+ return true if element_start? or element_end? or text_node?
107
+ end
108
+ false
109
+ end
110
+
111
+ #Define XML parsing options for the libxml parser.
112
+ #1. remove blank nodes
113
+ #2. substitute entities
114
+ #3. forbid network access
115
+ def parse_opts
116
+ XML::Parser::Options::NOBLANKS |
117
+ XML::Parser::Options::NOENT |
118
+ XML:: Parser::Options::NONET
119
+ end
120
+
121
+ def validate_nexml
122
+ valid = @reader.schema_validate( File.join( File.dirname(__FILE__),
123
+ "schema/nexml.xsd" ) )
124
+ return true if valid == 0
125
+ end
126
+
127
+ #Check if 'name'( without prefix ) is an element node or not.
128
+ def element_start?
129
+ @reader.node_type == XML::Reader::TYPE_ELEMENT
130
+ end
131
+
132
+ #Check if 'name'( without prefix ) is the end of an element or not.
133
+ def element_end?
134
+ @reader.node_type == XML::Reader::TYPE_END_ELEMENT
135
+ end
136
+
137
+ def text_node?
138
+ @reader.node_type == XML::Reader::TYPE_TEXT
139
+ end
140
+
141
+ def empty_element?
142
+ @reader.empty_element?
143
+ end
144
+
145
+ #When this function is called the cursor is at an 'otus' element.
146
+ #Return - an 'otus' object
147
+ def parse_otus
148
+ id = attribute( 'id' )
149
+ label = attribute( 'label' )
150
+
151
+ otus = NeXML::Otus.new( id, :label => label )
152
+
153
+ cache otus
154
+
155
+ #according to the schema an 'otus' may have no child element.
156
+ return otus if empty_element?
157
+
158
+ #else, parse child elements
159
+ while next_node
160
+ case local_name
161
+ when "otu"
162
+ #parse child otu element
163
+ otus << parse_otu
164
+ when "otus"
165
+ #end of current 'otus' element has been reached
166
+ break
167
+ end
168
+ end
169
+
170
+ #return the 'otus' object
171
+ otus
172
+ end
173
+
174
+ #When this function is called the cursor is at an 'otu' element.
175
+ #Return - an 'otu' object.
176
+ def parse_otu
177
+ id = attribute( 'id' )
178
+ label = attribute( 'label' )
179
+
180
+ otu = NeXML::Otu.new( id, :label => label )
181
+
182
+ cache otu
183
+
184
+ #according to the schema an 'otu' may have no child element.
185
+ return otu if empty_element?
186
+
187
+ while next_node
188
+ case local_name
189
+ when 'otu'
190
+ #end of current 'otu' element has been reached
191
+ break
192
+ end
193
+ end
194
+
195
+ #return the 'otu' object
196
+ otu
197
+ end
198
+
199
+ #When this function is called the cursor is at a 'trees' element.
200
+ #Return - a 'trees' object.
201
+ def parse_trees
202
+ otus = cache[ attribute( 'otus' ) ]
203
+
204
+ id = attribute( 'id' )
205
+ label = attribute( 'label' )
206
+
207
+ trees = NeXML::Trees.new( id, :otus => otus, :label => label )
208
+
209
+ #a 'trees' element *will* have child nodes.
210
+ while next_node
211
+ case local_name
212
+ when "tree"
213
+ #parse child 'tree' element
214
+ trees << parse_tree
215
+ when "network"
216
+ trees << parse_network
217
+ when "trees"
218
+ #end of current 'trees' element has been reached
219
+ break
220
+ end
221
+ end
222
+
223
+ #return the 'trees' object
224
+ trees
225
+ end
226
+
227
+ #When this function is called the cursor is at a 'tree' element.
228
+ #Return - a 'tree' object.
229
+ def parse_tree
230
+ id = attribute( 'id' )
231
+ label = attribute( 'label' )
232
+
233
+ type = attribute( 'xsi:type' )[4..-1]
234
+ klass = NeXML.const_get( type )
235
+ tree = klass.new( id, :label => label )
236
+
237
+ #a 'tree' element *will* have child nodes.
238
+ while next_node
239
+ case local_name
240
+ when "node"
241
+ #parse child 'node' element
242
+ node = parse_node
243
+
244
+ #and add it to the 'tree'
245
+ tree.add_node node
246
+
247
+ #root?
248
+ tree.roots << node if node.root?
249
+ when "rootedge"
250
+ #parse child 'edge' element
251
+ rootedge = parse_rootedge
252
+
253
+ #and add it to the 'tree'
254
+ # tree.add_rootedge rootedge # XXX it looks like the super class(es)
255
+ # can only deal with edges that have source and target
256
+ when "edge"
257
+ #parse child 'edge' element
258
+ edge = parse_edge( type )
259
+
260
+ #and add it to the 'tree'
261
+ tree.add_edge edge
262
+ when "tree"
263
+ #end of current 'tree' element has been reached
264
+ break
265
+ end
266
+ end
267
+
268
+ #return the 'tree' object
269
+ tree
270
+ end
271
+
272
+ def parse_network
273
+ id = attribute( 'id' )
274
+ label = attribute( 'label' )
275
+
276
+ type = attribute( 'xsi:type' )[4..-1]
277
+ klass = NeXML.const_get type
278
+ network = klass.new( id, :label => label )
279
+
280
+ #a 'network' element *will* have child nodes.
281
+ while next_node
282
+ case local_name
283
+ when "node"
284
+ #parse child 'node' element
285
+ node = parse_node
286
+
287
+ #and add it to the 'network'
288
+ network.add_node node
289
+
290
+ #root?
291
+ network.root = node if node.root?
292
+ when "edge"
293
+ #parse child 'edge' element
294
+ edge = parse_edge( type )
295
+
296
+ #and add it to the 'network'
297
+ network.add_edge edge
298
+
299
+ when "network"
300
+ #end of current 'network' element has been reached
301
+ break
302
+ end
303
+ end
304
+
305
+ #return the 'network' object
306
+ network
307
+ end
308
+
309
+ #When this function is called the cursor is at a 'node' element.
310
+ #Return - a 'node' object.
311
+ def parse_node
312
+ id = attribute( 'id' )
313
+ label = attribute( 'label' )
314
+ root = attribute( 'root' ) ? true : false
315
+
316
+ #is this node taxon linked
317
+ if otu_id = attribute( 'otu' )
318
+ otu = cache[ otu_id ]
319
+ end
320
+
321
+ node = NeXML::Node.new( id, :otu => otu, :root => root, :label => label )
322
+ cache node
323
+
324
+ #according to the schema a 'node' may have no child element.
325
+ return node if empty_element?
326
+
327
+ #else, if 'node' has child elements
328
+ while next_node
329
+ case local_name
330
+ when 'node'
331
+ #end of current 'node' element has been reached
332
+ break
333
+ end
334
+ end
335
+
336
+ #return the 'node' object
337
+ node
338
+ end
339
+
340
+ #When this function is called the cursor is at a 'edge' element.
341
+ #Return - a 'edge' object.
342
+ def parse_edge( type )
343
+ id = attribute( 'id' )
344
+ source = cache[ attribute( 'source' ) ]
345
+ target = cache[ attribute( 'target' ) ]
346
+ length = attribute( 'length' )
347
+
348
+ type.sub!(/Tree|Network/, "Edge")
349
+ klass = NeXML.const_get( type )
350
+ edge = klass.new( id, :source => source, :target => target, :length => length )
351
+
352
+ #according to the schema an 'edge' may have no child element.
353
+ return edge if empty_element?
354
+
355
+ while next_node
356
+ case local_name
357
+ when 'edge'
358
+ #end of current 'edge' element has been reached
359
+ break
360
+ end
361
+ end
362
+
363
+ #return the 'edge' object
364
+ edge
365
+ end
366
+
367
+ def parse_rootedge
368
+ id = attribute( 'id' )
369
+ target = cache[ attribute( 'target' ) ]
370
+ length = attribute( 'length' )
371
+
372
+ rootedge = RootEdge.new( id, :target => target, :length => length )
373
+
374
+ #according to the schema an 'edge' may have no child element.
375
+ return rootedge if empty_element?
376
+
377
+ while next_node
378
+ case local_name
379
+ when 'rootedge'
380
+ #end of current 'rootedge' element has been reached
381
+ break
382
+ end
383
+ end
384
+
385
+ #return the 'rootedge' object
386
+ rootedge
387
+ end
388
+
389
+ def parse_characters
390
+ #get the taxon linkage
391
+ otus = cache[ attribute( 'otus' ) ]
392
+
393
+ #other attribute
394
+ id = attribute( 'id' )
395
+ label = attribute( 'label' )
396
+
397
+ #determine the type
398
+ type = attribute( 'xsi:type' )[ 4..-1 ]
399
+ klass = NeXML.const_get( type )
400
+ verbose = type =~ /Cells/ ? true : false;
401
+
402
+ characters = klass.new( id, :otus => otus, :label => label )
403
+
404
+ #according to the schema a 'characters' will have a child
405
+ while next_node
406
+ case local_name
407
+ when 'format'
408
+ format = parse_format( type )
409
+ characters.add_format( format )
410
+ when 'matrix'
411
+ matrix = parse_matrix( type, verbose )
412
+ characters.add_matrix( matrix )
413
+ when 'characters'
414
+ break
415
+ end #end case
416
+ end #end while
417
+
418
+ characters
419
+ end #end parse_characters
420
+
421
+ def parse_format( type )
422
+ format = Format.new
423
+
424
+ #according to the schema a concrete characters type
425
+ #will have a child element.
426
+ while next_node
427
+ case local_name
428
+ when 'states'
429
+ states = parse_states( type )
430
+ format.add_states( states )
431
+ when 'char'
432
+ char = parse_char( type )
433
+ format.add_char( char )
434
+ when 'format'
435
+ break
436
+ end #end case
437
+ end #end while
438
+
439
+ format
440
+ end #end parse_format
441
+
442
+ def parse_states( type )
443
+ id = attribute( 'id' )
444
+ label = attribute( 'label' )
445
+ states = States.new( id, :label => label )
446
+
447
+ while next_node
448
+ case local_name
449
+ when 'state'
450
+ state = parse_state( type )
451
+ states.add_state( state )
452
+ when 'polymorphic_state_set'
453
+ state = parse_state( type )
454
+ state.ambiguity = :polymorphic
455
+ states.add_state( state )
456
+ when 'uncertain_state_set'
457
+ state = parse_state( type )
458
+ state.ambiguity = :uncertain
459
+ states.add_state( state )
460
+ when 'states'
461
+ break
462
+ end
463
+ end
464
+
465
+ cache states
466
+
467
+ states
468
+ end
469
+
470
+ def parse_state( type )
471
+ id = attribute( 'id' )
472
+ symbol = attribute( 'symbol' )
473
+ label = attribute( 'label' )
474
+
475
+ type = type[ 0..-2 ]
476
+ state = State.new( id, symbol, :label => label )
477
+
478
+ cache state
479
+
480
+ return state if empty_element?
481
+
482
+ while next_node
483
+ case local_name
484
+ when 'state', 'polymorphic_state_set', 'uncertain_state_set'
485
+ break
486
+ when 'member'
487
+ state.add_member( parse_member )
488
+ end
489
+ end
490
+
491
+ state
492
+ end
493
+
494
+ def parse_member
495
+ state_id = attribute( 'state' )
496
+ cache[ state_id ]
497
+ end
498
+
499
+ def parse_char( type )
500
+ id = attribute( 'id' )
501
+ label = attribute( 'label' )
502
+ states = cache[ attribute( 'states' ) ]
503
+
504
+ type = type.sub( /Format/, "Char" )
505
+ char = Char.new( id, states, :label => label )
506
+
507
+ if char.respond_to?(:codon=) and c = attribute( 'codon' )
508
+ char.codon = c
509
+ end
510
+
511
+ cache char
512
+
513
+ return char if empty_element?
514
+
515
+ while next_node
516
+ case local_name
517
+ when 'char'
518
+ break
519
+ end #end case
520
+ end #end while
521
+
522
+ char
523
+ end #end method parse_char
524
+
525
+ def parse_matrix( type, verbose )
526
+ type = type[ 0..-2 ]
527
+ type << "Matrix"
528
+
529
+ matrix = Matrix.new
530
+
531
+ while next_node
532
+ case local_name
533
+ when 'row'
534
+ row = parse_row( type, verbose )
535
+ matrix.add_row( row )
536
+ when 'matrix'
537
+ break
538
+ end
539
+ end
540
+
541
+ matrix
542
+ end #end method parse_matrix
543
+
544
+ def parse_row( type, verbose )
545
+ id = attribute( 'id' )
546
+ label = attribute( 'label' )
547
+ otu = cache[ attribute( 'otu' ) ]
548
+
549
+ type = type.sub( /Matrix/, "Row" )
550
+ klass = verbose ? CellRow : SeqRow
551
+ row = klass.new( id, :label => label )
552
+
553
+ while next_node
554
+ case local_name
555
+ when 'seq'
556
+ seq = parse_seq( type )
557
+ row.add_sequence( seq )
558
+ when 'cell'
559
+ cell = parse_cell( type )
560
+ row.add_cell( cell )
561
+ when 'row'
562
+ break
563
+ end
564
+ end
565
+
566
+ row
567
+ end #end class parse_row
568
+
569
+ def parse_seq( type )
570
+ type = type[ 0..-4 ]
571
+ #klass = NeXML.const_get type
572
+
573
+ seq = Sequence.new
574
+
575
+ return seq if empty_element?
576
+
577
+ while next_node
578
+ case local_name
579
+ when '#text'
580
+ seq.value = value
581
+ when 'seq'
582
+ break
583
+ end
584
+ end
585
+
586
+ seq
587
+ end
588
+
589
+ def parse_cell( type )
590
+ type = type[ 0..-4 ]
591
+
592
+ cell = Cell.new
593
+
594
+ char_id = attribute( 'char' )
595
+ state_id = attribute( 'state' )
596
+
597
+ char = cache[ char_id ]
598
+ state = ( type !~ /Continuous/ ? cache[ state_id ] : state_id )
599
+
600
+ cell.state = state
601
+ cell.char = char
602
+
603
+ return cell if empty_element?
604
+
605
+ while next_node
606
+ case local_name
607
+ when 'cell'
608
+ break
609
+ end
610
+ end
611
+
612
+ cell
613
+ end
614
+
615
+ end #end Parser class
616
+
617
+ end #end NeXML module
618
+
619
+ end #end Bio module
620
+
621
+ #n = Bio::NeXML.parse "examples/test.xml"
622
+ #Debugger.stop