bio-nexml 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Gemfile +15 -0
  2. data/Gemfile.lock +24 -0
  3. data/LICENSE.txt +20 -0
  4. data/README.rdoc +47 -0
  5. data/Rakefile +55 -0
  6. data/TODO.txt +6 -0
  7. data/VERSION +1 -0
  8. data/bio-nexml.gemspec +126 -0
  9. data/extconf.rb +2 -0
  10. data/lib/bio-nexml.rb +0 -0
  11. data/lib/bio.rb +321 -0
  12. data/lib/bio/db/nexml.rb +109 -0
  13. data/lib/bio/db/nexml/mapper.rb +113 -0
  14. data/lib/bio/db/nexml/mapper/framework.rb +157 -0
  15. data/lib/bio/db/nexml/mapper/inflection.rb +99 -0
  16. data/lib/bio/db/nexml/mapper/repository.rb +59 -0
  17. data/lib/bio/db/nexml/matrix.rb +1046 -0
  18. data/lib/bio/db/nexml/parser.rb +622 -0
  19. data/lib/bio/db/nexml/schema/README.txt +21 -0
  20. data/lib/bio/db/nexml/schema/abstract.xsd +159 -0
  21. data/lib/bio/db/nexml/schema/characters/README.txt +1 -0
  22. data/lib/bio/db/nexml/schema/characters/abstractcharacters.xsd +361 -0
  23. data/lib/bio/db/nexml/schema/characters/characters.xsd +22 -0
  24. data/lib/bio/db/nexml/schema/characters/continuous.xsd +190 -0
  25. data/lib/bio/db/nexml/schema/characters/dna.xsd +282 -0
  26. data/lib/bio/db/nexml/schema/characters/protein.xsd +280 -0
  27. data/lib/bio/db/nexml/schema/characters/restriction.xsd +239 -0
  28. data/lib/bio/db/nexml/schema/characters/rna.xsd +283 -0
  29. data/lib/bio/db/nexml/schema/characters/standard.xsd +261 -0
  30. data/lib/bio/db/nexml/schema/external/sawsdl.xsd +21 -0
  31. data/lib/bio/db/nexml/schema/external/xhtml-datatypes-1.xsd +177 -0
  32. data/lib/bio/db/nexml/schema/external/xlink.xsd +75 -0
  33. data/lib/bio/db/nexml/schema/external/xml.xsd +145 -0
  34. data/lib/bio/db/nexml/schema/meta/README.txt +2 -0
  35. data/lib/bio/db/nexml/schema/meta/annotations.xsd +100 -0
  36. data/lib/bio/db/nexml/schema/meta/meta.xsd +294 -0
  37. data/lib/bio/db/nexml/schema/nexml.xsd +104 -0
  38. data/lib/bio/db/nexml/schema/taxa/README.txt +2 -0
  39. data/lib/bio/db/nexml/schema/taxa/taxa.xsd +39 -0
  40. data/lib/bio/db/nexml/schema/trees/README.txt +2 -0
  41. data/lib/bio/db/nexml/schema/trees/abstracttrees.xsd +135 -0
  42. data/lib/bio/db/nexml/schema/trees/network.xsd +113 -0
  43. data/lib/bio/db/nexml/schema/trees/tree.xsd +149 -0
  44. data/lib/bio/db/nexml/schema/trees/trees.xsd +36 -0
  45. data/lib/bio/db/nexml/taxa.rb +147 -0
  46. data/lib/bio/db/nexml/trees.rb +663 -0
  47. data/lib/bio/db/nexml/writer.rb +265 -0
  48. data/test/data/nexml/test.xml +69 -0
  49. data/test/test_bio-nexml.rb +17 -0
  50. data/test/unit/bio/db/nexml/tc_factory.rb +119 -0
  51. data/test/unit/bio/db/nexml/tc_mapper.rb +78 -0
  52. data/test/unit/bio/db/nexml/tc_matrix.rb +551 -0
  53. data/test/unit/bio/db/nexml/tc_parser.rb +21 -0
  54. data/test/unit/bio/db/nexml/tc_taxa.rb +118 -0
  55. data/test/unit/bio/db/nexml/tc_trees.rb +370 -0
  56. data/test/unit/bio/db/nexml/tc_writer.rb +633 -0
  57. metadata +253 -0
@@ -0,0 +1,59 @@
1
+ module Bio
2
+ module NeXML
3
+ module Mapper # :nodoc:
4
+
5
+ # Repository is a hash based store for NeXML objects.
6
+ class HashRepository < Hash
7
+
8
+ # Append a method to the Repository.
9
+ def <<( object )
10
+ self[ object.id ] = object
11
+ self
12
+ end
13
+ alias append <<
14
+
15
+ # Reset the object in the repository to use the ones passed.
16
+ def objects=( objects )
17
+ self.clear
18
+ objects.each { |o| self << o }
19
+ end
20
+
21
+ def objects
22
+ self.values
23
+ end
24
+
25
+ alias __delete__ delete
26
+
27
+ # Delete an object.
28
+ def delete( object )
29
+ __delete__( object.id )
30
+ end
31
+
32
+ alias __each__ each
33
+
34
+ # Iterate over each object in the repository.
35
+ def each( &block )
36
+ each_value( &block )
37
+ end
38
+
39
+ # Iterate over each object passing both the id and the
40
+ # object to the block given.
41
+ def each_with_id( &block )
42
+ __each__( &block )
43
+ end
44
+
45
+ def include?( object )
46
+ self[ object.id ] == object
47
+ end
48
+ end
49
+
50
+ class ArrayRepository < Array
51
+ def objects
52
+ self
53
+ end
54
+
55
+ alias append <<
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,1046 @@
1
+ module Bio
2
+ module NeXML
3
+ # State defines a possible observation with its 'symbol' attribute. A state may be ambiguous. An
4
+ # ambiguous state must define an ambiguity mapping which may be 'polymorphic', resolved in an
5
+ # 'and' context, or uncertain, resolved in a 'or' context.
6
+ #
7
+ # state = Bio::NeXML::State.new( 'state1', :label => 'A label' )
8
+ # state.id #=> 'state1'
9
+ # state.label #=> 'A label'
10
+ # state.ambiguous? #=> true
11
+ # state.ambiguity #=> :polymorphic
12
+ class State
13
+ include Enumerable
14
+ include Mapper
15
+ @@writer = Bio::NeXML::Writer.new
16
+
17
+ # A file level unique identifier.
18
+ attr_accessor :id
19
+
20
+ # Observation for this state.
21
+ attr_reader :symbol
22
+
23
+ # Polymorphic or uncertain.
24
+ attr_accessor :ambiguity
25
+
26
+ # A human readable description of the state.
27
+ attr_accessor :label
28
+
29
+ # Each state is contained in a states element.
30
+ belongs_to :states
31
+
32
+ # Refer to the polymorphic or uncertain state that it belongs to.
33
+ belongs_to :state_set, :update => :member
34
+
35
+ # A polymorphic or uncertain state will have one or more members.
36
+ has_n :members, :index => false, :update => :state_set
37
+
38
+ has_n :cells, :index => false
39
+
40
+ def initialize( id, symbol = nil, options = {}, &block )
41
+ @id = id
42
+ symbol.is_a?( Hash ) ? options = symbol : self.symbol = symbol
43
+ properties( options ) unless options.empty?
44
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
45
+ end
46
+
47
+ def symbol=( symbol )
48
+ @symbol = symbol
49
+ end
50
+
51
+ # Takes a Bio::NeXML::State object and adds it to the ambiguity mapping of the state.
52
+ # Returns # <tt>self</tt>.
53
+ def add_member( member ); end if false
54
+
55
+ def ambiguous?
56
+ !!ambiguity
57
+ end
58
+
59
+ def polymorphic?
60
+ ambiguity == :polymorphic
61
+ end
62
+
63
+ def uncertain?
64
+ ambiguity == :uncertain
65
+ end
66
+
67
+ def include?( member )
68
+ has_member?( member )
69
+ end
70
+
71
+ def count
72
+ number_of_members
73
+ end
74
+ alias length count
75
+
76
+ # Iterate over each member in <tt>self</tt> passing it to the block given. If no block is provided,
77
+ # it returns an Enumerator.
78
+ def each( &block )
79
+ @members.each( &block )
80
+ end
81
+
82
+ def to_str
83
+ symbol.to_s
84
+ end
85
+ alias to_s to_str
86
+
87
+ def to_xml
88
+ tagname = nil
89
+ if ambiguity == :polymorphic
90
+ tagname = "polymorphic_state_set"
91
+ elsif ambiguity == :uncertain
92
+ tagname = "uncertain_state_set"
93
+ else
94
+ tagname = "state"
95
+ end
96
+ node = @@writer.create_node( tagname, @@writer.attributes( self, :id, :label, :symbol ) )
97
+ if count > 0
98
+ self.each_member do |member|
99
+ node << @@writer.create_node( "member", :state => member.id )
100
+ end
101
+ end
102
+ node
103
+ end
104
+
105
+ class << self
106
+ def polymorphic( id, symbol = nil, options = {}, &block )
107
+ state = new( id, symbol, options, &block )
108
+ state.ambiguity = :polymorphic
109
+ state
110
+ end
111
+
112
+ def uncertain( id, symbol = nil, options = {}, &block )
113
+ state = new( id, symbol, options, &block )
114
+ state.ambiguity = :uncertain
115
+ state
116
+ end
117
+ end
118
+ end #end class State
119
+
120
+ # A char specifies which states apply to matrix columns.
121
+ class Char
122
+ include Mapper
123
+ @@writer = Bio::NeXML::Writer.new
124
+
125
+ # A file level unique identifier.
126
+ attr_accessor :id
127
+
128
+ # A human readable description.
129
+ attr_accessor :label
130
+
131
+ # Each char links to a states as a means of describing possible observations for that
132
+ # particular column.
133
+ belongs_to :format
134
+ belongs_to :states
135
+
136
+ has_n :cells, :index => false
137
+
138
+ def initialize( id, states = nil, options = {} )
139
+ @id = id
140
+ unless states.nil?
141
+ states.is_a?( Hash ) ? options = states : self.states = states
142
+ end
143
+ properties( options ) unless options.empty?
144
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
145
+ end
146
+
147
+ def to_xml
148
+ @@writer.create_node( "char", @@writer.attributes( self, :id, :states, :label, :codon ) )
149
+ end
150
+ end
151
+
152
+ class States
153
+ include Enumerable
154
+ include Mapper
155
+ @@writer = Bio::NeXML::Writer.new
156
+
157
+ # A file level unique identifier.
158
+ attr_accessor :id
159
+
160
+ # A human readable description of the state.
161
+ attr_accessor :label
162
+
163
+ belongs_to :format
164
+
165
+ # Possible observation states.
166
+ has_n :states
167
+
168
+ # Matrix columns linked to this states.
169
+ has_n :chars
170
+
171
+ def initialize( id, options = {} )
172
+ @id = id
173
+ properties( options ) unless options.empty?
174
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
175
+ end
176
+
177
+ def create_state( symbol = nil, options = {} )
178
+ state = State.new( Bio::NeXML.generate_id( State ), symbol, options )
179
+ add_state( state )
180
+ state
181
+ end
182
+
183
+ def add_state( state ); end if false # dummy for rdoc
184
+
185
+ def delete_state( state ); end if false # dummy for rdoc
186
+
187
+ def has_state?( state ); end if false # dummy for rdoc
188
+
189
+ def get_state_by_id( state ); end if false # dummy for rdoc
190
+
191
+ def each_state( state ); end if false # dummy for rdoc
192
+
193
+ def each_char( state ); end if false # dummy for rdoc
194
+
195
+ def include?( state )
196
+ has_state?( state )
197
+ end
198
+
199
+ def get_state_by_symbol( symbol )
200
+ matches = each_state.select{ |s| s.symbol == symbol }
201
+ matches.first
202
+ end
203
+
204
+ # Iterate over each state set in <tt>self</tt> passing it to the block given. If no block is provided,
205
+ # it returns an Enumerator.
206
+ def each( &block )
207
+ @states.each( &block )
208
+ end
209
+
210
+ def to_xml
211
+ node = @@writer.create_node( "states", @@writer.attributes( self, :id, :label ) )
212
+ self.each_state do |state|
213
+ node << state.to_xml
214
+ end
215
+ node
216
+ end
217
+ end
218
+
219
+ class Format
220
+ @@writer = Bio::NeXML::Writer.new
221
+ include Mapper
222
+
223
+ # A format block must define set(s) of possible observation states.
224
+ has_n :states, :singularize => false
225
+
226
+ # A format will have one or more columns( chars => columns ),
227
+ has_n :chars, :index => false
228
+
229
+ # Because format elements don't have id attributes, we will use
230
+ # object_id in this case
231
+ attr_accessor :id
232
+
233
+ def initialize( options = {} )
234
+ @id = self.object_id
235
+ properties( options ) unless options.empty?
236
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
237
+ end
238
+
239
+ def create_states( options = {} )
240
+ states = States.new( Bio::NeXML.generate_id( States ), options )
241
+ add_states states
242
+ states
243
+ end
244
+
245
+ def create_char( states = nil, options = {} )
246
+ char = Char.new( Bio::NeXML.generate_id( Char ), states, options )
247
+ add_char char
248
+ char
249
+ end
250
+
251
+ def add_states( states )
252
+ # dummy for rdoc
253
+ end if false
254
+
255
+ # Returns true if the given state set( Bio::NeXML::States object ) is
256
+ # defined for the format block.
257
+ def has_states?( states )
258
+ # dummy for rdoc
259
+ end if false
260
+
261
+ # Remove a state set from the format.
262
+ # * Arguments :
263
+ # states( required ) - a Bio::NeXML::State object.
264
+ # * Returns : the deleted object.
265
+ # format.delete_states( states )
266
+ # format.states #=> [ .. .. ]
267
+ # states.format #=> nil
268
+ def delete_states( states )
269
+ # dummy for rdoc
270
+ end if false
271
+
272
+ # Fetch a state set( Bio::NeXML::States object ) by id. Returns <tt>nil</tt> if none found.
273
+ def get_states_by_id( id )
274
+ # dummy for rdoc
275
+ end if false
276
+
277
+ # Returns the number of state sets defined for the matrix.
278
+ def number_of_states
279
+ # dummy for rdoc
280
+ end if false
281
+
282
+ # Add a column definition to the format.
283
+ # * Arguments :
284
+ # char( required ) - a Bio::NeXML::Char object.
285
+ # * Returns : <tt>self</tt>.
286
+ # format.add_char( char )
287
+ # format.chars #=> [ .. char .. ]
288
+ # char.format #=> format
289
+ def add_char( char )
290
+ # dummy for rdoc
291
+ end if false
292
+
293
+ # Remove a column definition from the matrix.
294
+ # * Arguments :
295
+ # char( required ) - a Bio::NeXML::Char object.
296
+ # * Returns : the deleted object.
297
+ # matrix.delete_char( char )
298
+ # matrix.chars #=> [ .. .. ]
299
+ # char.matrix #=> nil
300
+ def delete_char( char )
301
+ # dummy for rdoc
302
+ end if false
303
+
304
+ # Returns an array of state sets( Bio::NeXML::States objects ) for the matrix.
305
+ # matrix.states #=> [ .. .. ]
306
+ def states
307
+ # dummy for rdoc
308
+ end if false
309
+
310
+ # Returns an array of column definitions( Bio::NeXML::Char objects ) for the matrix.
311
+ # matrix.chars #=> [ .. .. ]
312
+ def chars
313
+ # dummy for rdoc
314
+ end if false
315
+
316
+ # Add state sets to the matrix. This function will overwrite previous state set definitions
317
+ # for the matrix if any.
318
+ # * Arguments :
319
+ # states( required ) - an array of Bio::NeXML::States object.
320
+ # matrix.states = [ states ]
321
+ # matrix.states #=> [ states ]
322
+ # states.matrix #=> matrix
323
+ def states=( states )
324
+ # dummy for rdoc
325
+ end if false
326
+
327
+ # Add column definitions to the matrix. This function will override the previous column
328
+ # definitions if any.
329
+ # * Arguments :
330
+ # chars( required ) - an array of Bio::NeXML::Char object.
331
+ # matrix.chars = [ char ]
332
+ # matrix.chars #=> [ char ]
333
+ # char.matrix #=> matrix
334
+ def chars=( chars )
335
+ # dummy for rdoc
336
+ end if false
337
+
338
+ # Fetch a state set( Bio::NeXML::States object ) by id. Returns <tt>nil</tt> if none found.
339
+ def get_states_by_id( id )
340
+ # dummy for rdoc
341
+ end if false
342
+
343
+ # Fetch a column definition( Bio::NeXML::Char object ) by id. Returns <tt>nil</tt> if none
344
+ # found.
345
+ def get_char_by_id( id )
346
+ matches = each_char.select{ |c| c.id == id } # XXX not sure why I have to implement this?
347
+ matches.first
348
+ end
349
+
350
+ # Returns true if the given state set( Bio::NeXML::States object ) is defined for the matrix.
351
+ def has_states?( states )
352
+ # dummy for rdoc
353
+ end if false
354
+
355
+ # Returns true if the given column definition( Bio::NeXML::Char object ) is defined for the matrix.
356
+ def has_char?( char )
357
+ # dummy for rdoc
358
+ end if false
359
+
360
+ # Iterate over each state sets( Bio::NeXML::States object ) defined for the matrix. Returns an
361
+ # Enumerator if no block is provided.
362
+ def each_states
363
+ # dummy for rdoc
364
+ end if false
365
+
366
+ # Iterate over each column definitions( Bio::NeXML::Char object ) defined for the matrix. Returns
367
+ # an Enumerator if no block is provided.
368
+ def each_char
369
+ # dummy for rdoc
370
+ end if false
371
+
372
+ # Returns the number of state sets defined for the matrix.
373
+ def number_of_states
374
+ # dummy for rdoc
375
+ end if false
376
+
377
+ # Returns the number of column definitions defined for the matrix.
378
+ def number_of_chars
379
+ # dummy for rdoc
380
+ end if false
381
+
382
+ def to_xml
383
+ node = @@writer.create_node( "format" )
384
+
385
+ self.each_states do |states|
386
+ node << states.to_xml
387
+ end
388
+
389
+ self.each_char do |char|
390
+ node << char.to_xml
391
+ end
392
+
393
+ node
394
+ end
395
+
396
+ end # end of format
397
+
398
+ # Cell is the smallest unit of a character state matrix or of a sequence. A cell maybe bound or
399
+ # unbound. If a cell points to a char and has a state, it is a bound cell. Bound cells correspond
400
+ # to the cell tag of NeXML. Value of a bound cell is the same as the 'symbol' of the state it points
401
+ # to. Value of a bound cell may be changed by assigning a different state to it. An unbound cell
402
+ # holds a raw value.
403
+ # cell = Bio::NeXML::Cell.new( 'A' )
404
+ # cell.bound? #=> false
405
+ # cell.value #=> 'A'
406
+ #
407
+ # # Assign a new value to an unbound cell.
408
+ # cell.value = 'B'
409
+ # cell.value #=> 'B'
410
+ #
411
+ # cell = Bio::NeXML::Cell.new( :char => char1, :state => stateA )
412
+ # cell.bound? #=> true
413
+ # cell.value #=> 'A'
414
+ #
415
+ # # Can not assign a value to a bound cell directly.
416
+ # cell.value = 'B'
417
+ # cell.value #=> 'A'
418
+ #
419
+ # # Changing the state of a bound cell changes its value.
420
+ # cell.state = stateB
421
+ # cell.value #=> 'B'
422
+ class Cell
423
+ include Mapper
424
+ @@writer = Bio::NeXML::Writer.new
425
+
426
+ attr_accessor :char
427
+ attr_accessor :state
428
+ attr_accessor :label
429
+
430
+ belongs_to :state
431
+ belongs_to :char
432
+
433
+ belongs_to :cellrow
434
+ alias row cellrow
435
+
436
+ def initialize( char = nil, state = nil, options = {} )
437
+ case char
438
+ when Hash
439
+ properties( char )
440
+ when Char
441
+ self.char = char
442
+ else
443
+ @value = char unless char.nil?
444
+ end
445
+
446
+ case state
447
+ when State
448
+ self.state = state
449
+ when Hash
450
+ properties( state )
451
+ end
452
+
453
+ properties( options ) unless options.nil?
454
+ end
455
+
456
+ # Return the value of a cell.
457
+ def value
458
+ bound? ? state.symbol : @value
459
+ end
460
+ alias symbol value
461
+
462
+ def value=( value )
463
+ bound? ? nil : @value = value
464
+ end
465
+
466
+ def bound?
467
+ !!( char and state )
468
+ end
469
+
470
+ # Allow cells to be implicitly used as a String.
471
+ def to_str
472
+ value.to_s
473
+ end
474
+ alias to_s to_str
475
+
476
+ def to_xml
477
+ @@writer.create_node( "cell", @@writer.attributes( self, :state, :char ) )
478
+ end
479
+
480
+ end
481
+
482
+ class ContinuousCell < Cell
483
+ def value
484
+ @value
485
+ end
486
+ def value=( value )
487
+ @value = value
488
+ end
489
+ def state=( value )
490
+ @value = value
491
+ end
492
+ alias symbol value
493
+ alias state value
494
+ end
495
+
496
+ class Sequence
497
+ include Mapper
498
+ @@writer = Bio::NeXML::Writer.new
499
+
500
+ # Every sequence belongs to a row
501
+ belongs_to :seqrow
502
+ alias row seqrow
503
+
504
+ attr_accessor :value
505
+
506
+ # Because seq elements don't have id attributes, we will use
507
+ # object_id in this case
508
+ attr_accessor :id
509
+
510
+ def initialize( options = {} )
511
+ properties( options ) unless options.empty?
512
+ @id = self.object_id
513
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
514
+ end
515
+
516
+ def type
517
+ return nil if cells.empty?
518
+ cells.first.bound? ? :granular : :raw
519
+ end
520
+
521
+ def each_value( &block ) # :yields: value
522
+ if block_given?
523
+ cells.each { |c| yield c.value }
524
+ else
525
+ enum_for( :each_value )
526
+ end
527
+ end
528
+
529
+ def to_xml
530
+ node = @@writer.create_node( "seq" )
531
+ node << self.value
532
+ node
533
+ end
534
+
535
+ end
536
+
537
+ class Matrix
538
+ @@writer = Bio::NeXML::Writer.new
539
+ include Mapper
540
+ has_n :rows, :index => false
541
+ belongs_to :characters
542
+
543
+ # Because matrix elements don't have id attributes, we will use
544
+ # object_id in this case
545
+ attr_accessor :id
546
+
547
+ def initialize( options = {} )
548
+ @id = self.object_id
549
+ properties( options ) unless options.empty?
550
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
551
+ end
552
+
553
+ def add_row( row )
554
+ # dummy for rdoc
555
+ end if false
556
+
557
+ # Returns true if the given row ( Bio::NeXML::Row object ) is
558
+ # defined for the matrix block.
559
+ def has_rows?( rows )
560
+ # dummy for rdoc
561
+ end if false
562
+
563
+ # Remove a row from the matrix.
564
+ # * Arguments :
565
+ # row( required ) - a Bio::NeXML::Row object.
566
+ # * Returns : the deleted object.
567
+ # matrix.delete_row( row )
568
+ # matrix.rows #=> [ .. .. ]
569
+ # row.matrix #=> nil
570
+ def delete_row( row )
571
+ # dummy for rdoc
572
+ end if false
573
+
574
+ # Fetch a row ( Bio::NeXML::Row object ) by id. Returns <tt>nil</tt> if none found.
575
+ def get_row_by_id( id )
576
+ # dummy for rdoc
577
+ end if false
578
+
579
+ # Returns the number of rows defined for the matrix.
580
+ def number_of_rows
581
+ # dummy for rdoc
582
+ end if false
583
+
584
+
585
+ # Returns an array of rows( Bio::NeXML::Rows objects ) for the matrix.
586
+ # matrix.rows #=> [ .. .. ]
587
+ def rows
588
+ # dummy for rdoc
589
+ end if false
590
+
591
+ # Add rowsthe matrix. This function will overwrite previous rows
592
+ # for the matrix if any.
593
+ # * Arguments :
594
+ # rows( required ) - an array of Bio::NeXML::Row object.
595
+ # matrix.rows = [ rows ]
596
+ # matrix.rows #=> [ rows ]
597
+ # rows.matrix #=> matrix
598
+ def rows=( rows )
599
+ # dummy for rdoc
600
+ end if false
601
+
602
+ # Returns true if the given row( Bio::NeXML::Row object ) is defined for the matrix.
603
+ def has_row?( rows )
604
+ # dummy for rdoc
605
+ end if false
606
+
607
+ # Iterate over each row ( Bio::NeXML::Row object ) defined for the matrix. Returns an
608
+ # Enumerator if no block is provided.
609
+ def each_row
610
+ # dummy for rdoc
611
+ end if false
612
+
613
+ # Returns the number of rows defined for the matrix.
614
+ def number_of_rows
615
+ # dummy for rdoc
616
+ end if false
617
+
618
+ def to_xml
619
+ node = @@writer.create_node( "matrix" )
620
+ self.each_row do |row|
621
+ node << row.to_xml
622
+ end
623
+ node
624
+ end
625
+
626
+ end
627
+
628
+ class SeqMatrix < Matrix
629
+ def create_row( options = {} )
630
+ row = SeqRow.new( Bio::NeXML.generate_id( SeqRow ), options )
631
+ add_row row
632
+ row
633
+ end
634
+ end
635
+
636
+ class CellMatrix < Matrix
637
+ def create_row( options = {} )
638
+ row = CellRow.new( Bio::NeXML.generate_id( CellRow ), options )
639
+ add_row row
640
+ row
641
+ end
642
+ end
643
+
644
+ class Row
645
+ include Mapper
646
+
647
+ # A file level unique identifier.
648
+ attr_accessor :id
649
+
650
+ # A human readable description.
651
+ attr_accessor :label
652
+
653
+ # Every row refers to a taxon.
654
+ belongs_to :otu
655
+ belongs_to :matrix
656
+
657
+ def initialize( id, options = {} )
658
+ @id = id
659
+ properties( options ) unless options.empty?
660
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
661
+ end
662
+
663
+ end # end of row class
664
+ class SeqRow < Row
665
+ @@writer = Bio::NeXML::Writer.new
666
+ # actually, probably only one <seq/> element
667
+ has_n :sequences
668
+ # Below are methods stubs to be picked up by rdoc, as these methods are generated dynamically.
669
+
670
+ # Add a sequence( row ) to the matrix.
671
+ # * Arguments :
672
+ # sequence( required ) - a Bio::NeXML::Sequence object.
673
+ # * Returns : <tt>self</tt>.
674
+ # matrix.add_matrix( sequence )
675
+ # matrix.sequences #=> [ .. sequence .. ]
676
+ # sequence.matrix #=> matrix
677
+ def add_sequence( sequence )
678
+ # dummy for rdoc
679
+ end if false
680
+
681
+ # Remove a sequence( row ) from the matrix.
682
+ # * Arguments :
683
+ # sequence( required ) - a Bio::NeXML::Sequence object.
684
+ # * Returns : the deleted object.
685
+ # matrix.delete_sequence( sequence )
686
+ # matrix.sequences #=> [ .. .. ]
687
+ # sequence.matrix #=> nil
688
+ def delete_sequence( sequence )
689
+ # dummy for rdoc
690
+ end if false
691
+
692
+ # Returns an array of sequences ( Bio::NeXML::Sequence objects ) for the matrix.
693
+ # matrix.sequences #=> [ .. .. ]
694
+ def sequences
695
+ # dummy for rdoc
696
+ end if false
697
+
698
+ # Add sequences to the matirx. This function will override previous sequences if any.
699
+ # * Arguments :
700
+ # sequences( required ) - an array of Bio::NeXML::Sequence object.
701
+ # matrix.sequences = [ sequence ]
702
+ # matrix.sequences #=> [ sequence ]
703
+ # sequence.matrix #=> matrix
704
+ def sequences=( sequences )
705
+ # dummy for rdoc
706
+ end if false
707
+
708
+ # Returns true if the given sequence( Bio::NeXML::Sequence object ) is defined for the matrix.
709
+ def has_sequence?( sequence )
710
+ # dummy for rdoc
711
+ end if false
712
+
713
+ # Iterate over each sequence ( Bio::NeXML::Sequence object ) defined for the matrix. Returns
714
+ # an Enumerator if no block is provided.
715
+ def each_sequence
716
+ # dummy for rdoc
717
+ end if false
718
+
719
+ # Returns the number of sequences defined for the matrix.
720
+ def number_of_sequences
721
+ # dummy for rdoc
722
+ end if false
723
+
724
+ def to_xml
725
+ node = @@writer.create_node( "row", @@writer.attributes( self, :id, :otu, :label ) )
726
+ node << self.sequences.first.to_xml
727
+ node
728
+ end
729
+
730
+ end
731
+ class CellRow < Row
732
+ @@writer = Bio::NeXML::Writer.new
733
+ has_n :cells, :index => false
734
+ # Add a cell to the row
735
+ # * Arguments :
736
+ # cell( required ) - a Bio::NeXML::Cell object.
737
+ # * Returns : <tt>self</tt>.
738
+ # row.add_cell( cell )
739
+ # row.cells #=> [ .. cell .. ]
740
+ # cell.row #=> row
741
+ def add_cell( cell )
742
+ # dummy for rdoc
743
+ end if false
744
+
745
+ # Remove a cell from the row
746
+ # * Arguments :
747
+ # cell( required ) - a Bio::NeXML::Cell object.
748
+ # * Returns : the deleted object.
749
+ # row.delete_cell( cell )
750
+ # row.cells #=> [ .. .. ]
751
+ # cell.row #=> nil
752
+ def delete_cell( cell )
753
+ # dummy for rdoc
754
+ end if false
755
+
756
+ # Returns an array of cells ( Bio::NeXML::Cell objects ) for the row.
757
+ # matrix.cells #=> [ .. .. ]
758
+ def cells
759
+ # dummy for rdoc
760
+ end if false
761
+
762
+ # Add cells to the row. This function will override previous cells if any.
763
+ # * Arguments :
764
+ # cells( required ) - an array of Bio::NeXML::Cell object.
765
+ # row.cells = [ cells ]
766
+ # row.cells #=> [ cells ]
767
+ # cell.row #=> row
768
+ def cells=( cells )
769
+ # dummy for rdoc
770
+ end if false
771
+
772
+ # Returns true if the given cell( Bio::NeXML::Cell object ) is defined for the row.
773
+ def has_cell?( cell )
774
+ # dummy for rdoc
775
+ end if false
776
+
777
+ # Iterate over each cell ( Bio::NeXML::Cell object ) defined for the row. Returns
778
+ # an Enumerator if no block is provided.
779
+ def each_cell
780
+ # dummy for rdoc
781
+ end if false
782
+
783
+ # Returns the number of cells defined for the row.
784
+ def number_of_cells
785
+ # dummy for rdoc
786
+ end if false
787
+
788
+ def to_xml
789
+ node = @@writer.create_node( "row", @@writer.attributes( self, :id, :otu, :label ) )
790
+ self.each_cell do |cell|
791
+ node << cell.to_xml
792
+ end
793
+ node
794
+ end
795
+
796
+ end
797
+
798
+ # A character state matrix. This class is analogous to the characters element of NeXML.
799
+ class Characters
800
+ include Mapper
801
+ @@writer = Bio::NeXML::Writer.new
802
+
803
+ # An id should be uniquely scoped in an NeXML file. It need not be unique globally. It is a
804
+ # compulsory attribute.
805
+ attr_accessor :id
806
+
807
+ # A characters block holds a single format definition
808
+ attr_accessor :format
809
+
810
+ # A characters block holds a single matrix definition
811
+ attr_accessor :matrix
812
+
813
+ # A human readable description. Its usage is optional.
814
+ attr_accessor :label
815
+
816
+ belongs_to :nexml
817
+
818
+ # Every characters block compulsorily links to a taxa block( otus ).
819
+ belongs_to :otus
820
+
821
+ def initialize( id, options = {} )
822
+ @id = id
823
+ self.create_format
824
+ self.create_matrix
825
+ properties( options ) unless options.empty?
826
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
827
+ end
828
+
829
+ def add_format( format )
830
+ @format = format
831
+ end
832
+
833
+ def add_matrix( matrix )
834
+ @matrix = matrix
835
+ end
836
+
837
+ def to_xml
838
+ node = @@writer.create_node( "characters", @@writer.attributes( self, :id, :"xsi:type", :otus, :label ) )
839
+ node << self.format.to_xml
840
+ node << self.matrix.to_xml
841
+ node
842
+ end
843
+
844
+ def create_matrix( options = {} )
845
+ matrix = nil
846
+ if self.class.name =~ /Seqs$/
847
+ matrix = SeqMatrix.new( options )
848
+ else
849
+ matrix = CellMatrix.new( options )
850
+ end
851
+ add_matrix matrix
852
+ matrix
853
+ end
854
+
855
+ def create_format( options = {} )
856
+ format = Format.new( options )
857
+ states = format.create_states
858
+ lookup_table = self.lookup
859
+ state_for_symbol = {}
860
+ lookup_table.keys.each do |key|
861
+ if lookup_table[key].length == 1
862
+ state = states.create_state( :symbol => key )
863
+ state_for_symbol[key] = state
864
+ end
865
+ end
866
+ lookup_table.keys.each do |key|
867
+ if lookup_table[key].length != 1
868
+ state = states.create_state( :symbol => key, :ambiguity => :uncertain )
869
+ lookup_table[key].each do |symbol|
870
+ state.add_member( state_for_symbol[symbol] )
871
+ end
872
+ end
873
+ end
874
+ add_format format
875
+ format
876
+ end
877
+
878
+ def create_raw( string, row = nil )
879
+ matrix = self.matrix
880
+ format = self.format
881
+ if row == nil
882
+ row = matrix.create_row
883
+ end
884
+ if row.kind_of? SeqRow
885
+ sequence = Sequence.new
886
+ sequence.value = join_sequence split_sequence string
887
+ row.add_sequence( sequence )
888
+ end
889
+ if row.kind_of? CellRow
890
+ split_seq = split_sequence string
891
+ pos = 0
892
+ states = format.states.first
893
+ split_seq.each do |symbol|
894
+ char = format.chars[pos]
895
+ if char == nil
896
+ char = format.create_char( states )
897
+ end
898
+ state = states.get_state_by_symbol( symbol )
899
+ if state == nil
900
+ state = states.create_state( symbol )
901
+ end
902
+ cell = Cell.new char, state
903
+ row.add_cell cell
904
+ pos += 1
905
+ end
906
+ end
907
+ row
908
+ end
909
+
910
+ def split_sequence( string )
911
+ string.split(//)
912
+ end
913
+
914
+ def join_sequence( array )
915
+ array.join
916
+ end
917
+
918
+ end #end class Characters
919
+ class Dna < Characters
920
+ @@lookup = {
921
+ 'A' => [ 'A' ],
922
+ 'C' => [ 'C' ],
923
+ 'G' => [ 'G' ],
924
+ 'T' => [ 'T' ],
925
+ 'M' => [ 'A', 'C' ],
926
+ 'R' => [ 'A', 'G' ],
927
+ 'W' => [ 'A', 'T' ],
928
+ 'S' => [ 'C', 'G' ],
929
+ 'Y' => [ 'C', 'T' ],
930
+ 'K' => [ 'G', 'T' ],
931
+ 'V' => [ 'A', 'C', 'G' ],
932
+ 'H' => [ 'A', 'C', 'T' ],
933
+ 'D' => [ 'A', 'G', 'T' ],
934
+ 'B' => [ 'C', 'G', 'T' ],
935
+ 'X' => [ 'G', 'A', 'T', 'C' ],
936
+ 'N' => [ 'G', 'A', 'T', 'C' ],
937
+ '-' => [ ],
938
+ '?' => [ 'G', 'A', 'T', 'C' ],
939
+ };
940
+ def lookup
941
+ @@lookup
942
+ end
943
+ end
944
+ class DnaSeqs < Dna; end
945
+ class DnaCells < Dna; end
946
+ class Rna < Characters
947
+ @@lookup = {
948
+ 'A' => [ 'A' ],
949
+ 'C' => [ 'C' ],
950
+ 'G' => [ 'G' ],
951
+ 'U' => [ 'U' ],
952
+ 'M' => [ 'A', 'C' ],
953
+ 'R' => [ 'A', 'G' ],
954
+ 'W' => [ 'A', 'U' ],
955
+ 'S' => [ 'C', 'G' ],
956
+ 'Y' => [ 'C', 'U' ],
957
+ 'K' => [ 'G', 'U' ],
958
+ 'V' => [ 'A', 'C', 'G' ],
959
+ 'H' => [ 'A', 'C', 'U' ],
960
+ 'D' => [ 'A', 'G', 'U' ],
961
+ 'B' => [ 'C', 'G', 'U' ],
962
+ 'X' => [ 'G', 'A', 'U', 'C' ],
963
+ 'N' => [ 'G', 'A', 'U', 'C' ],
964
+ '-' => [ ],
965
+ '?' => [ 'G', 'A', 'U', 'C' ],
966
+ };
967
+ def lookup
968
+ @@lookup
969
+ end
970
+ end
971
+ class RnaSeqs < Rna; end
972
+ class RnaCells < Rna; end
973
+ class Protein < Characters
974
+ @@lookup = {
975
+ 'A' => [ 'A' ],
976
+ 'B' => [ 'D', 'N' ],
977
+ 'C' => [ 'C' ],
978
+ 'D' => [ 'D' ],
979
+ 'E' => [ 'E' ],
980
+ 'F' => [ 'F' ],
981
+ 'G' => [ 'G' ],
982
+ 'H' => [ 'H' ],
983
+ 'I' => [ 'I' ],
984
+ 'K' => [ 'K' ],
985
+ 'L' => [ 'L' ],
986
+ 'M' => [ 'M' ],
987
+ 'N' => [ 'N' ],
988
+ 'P' => [ 'P' ],
989
+ 'Q' => [ 'Q' ],
990
+ 'R' => [ 'R' ],
991
+ 'S' => [ 'S' ],
992
+ 'T' => [ 'T' ],
993
+ 'U' => [ 'U' ],
994
+ 'V' => [ 'V' ],
995
+ 'W' => [ 'W' ],
996
+ 'X' => [ 'X' ],
997
+ 'Y' => [ 'Y' ],
998
+ 'Z' => [ 'E', 'Q' ],
999
+ '*' => [ '*' ],
1000
+ '-' => [ ],
1001
+ '?' => [ 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', '*' ]
1002
+ };
1003
+ def lookup
1004
+ @@lookup
1005
+ end
1006
+ end
1007
+ class ProteinSeqs < Protein; end
1008
+ class ProteinCells < Protein; end
1009
+ class Standard < Characters
1010
+ @@lookup = {}
1011
+ def lookup
1012
+ @@lookup
1013
+ end
1014
+ def split_sequence( string )
1015
+ string.split
1016
+ end
1017
+ def join_sequence( array )
1018
+ array.join(" ")
1019
+ end
1020
+ end
1021
+ class StandardSeqs < Standard; end
1022
+ class StandardCells < Standard; end
1023
+ class Restriction < Characters
1024
+ @@lookup = { '0' => [ '0' ], '1' => [ '1' ] }
1025
+ def lookup
1026
+ @@lookup
1027
+ end
1028
+ end
1029
+ class RestrictionSeqs < Restriction; end
1030
+ class RestrictionCells < Restriction; end
1031
+ class Continuous < Characters
1032
+ @@lookup = {}
1033
+ def lookup
1034
+ @@lookup
1035
+ end
1036
+ def split_sequence( string )
1037
+ string.split
1038
+ end
1039
+ def join_sequence( array )
1040
+ array.join(" ")
1041
+ end
1042
+ end
1043
+ class ContinuousSeqs < Continuous; end
1044
+ class ContinuousCells < Continuous; end
1045
+ end
1046
+ end