bio-nexml 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/Gemfile +15 -0
  2. data/Gemfile.lock +24 -0
  3. data/LICENSE.txt +20 -0
  4. data/README.rdoc +47 -0
  5. data/Rakefile +55 -0
  6. data/TODO.txt +6 -0
  7. data/VERSION +1 -0
  8. data/bio-nexml.gemspec +126 -0
  9. data/extconf.rb +2 -0
  10. data/lib/bio-nexml.rb +0 -0
  11. data/lib/bio.rb +321 -0
  12. data/lib/bio/db/nexml.rb +109 -0
  13. data/lib/bio/db/nexml/mapper.rb +113 -0
  14. data/lib/bio/db/nexml/mapper/framework.rb +157 -0
  15. data/lib/bio/db/nexml/mapper/inflection.rb +99 -0
  16. data/lib/bio/db/nexml/mapper/repository.rb +59 -0
  17. data/lib/bio/db/nexml/matrix.rb +1046 -0
  18. data/lib/bio/db/nexml/parser.rb +622 -0
  19. data/lib/bio/db/nexml/schema/README.txt +21 -0
  20. data/lib/bio/db/nexml/schema/abstract.xsd +159 -0
  21. data/lib/bio/db/nexml/schema/characters/README.txt +1 -0
  22. data/lib/bio/db/nexml/schema/characters/abstractcharacters.xsd +361 -0
  23. data/lib/bio/db/nexml/schema/characters/characters.xsd +22 -0
  24. data/lib/bio/db/nexml/schema/characters/continuous.xsd +190 -0
  25. data/lib/bio/db/nexml/schema/characters/dna.xsd +282 -0
  26. data/lib/bio/db/nexml/schema/characters/protein.xsd +280 -0
  27. data/lib/bio/db/nexml/schema/characters/restriction.xsd +239 -0
  28. data/lib/bio/db/nexml/schema/characters/rna.xsd +283 -0
  29. data/lib/bio/db/nexml/schema/characters/standard.xsd +261 -0
  30. data/lib/bio/db/nexml/schema/external/sawsdl.xsd +21 -0
  31. data/lib/bio/db/nexml/schema/external/xhtml-datatypes-1.xsd +177 -0
  32. data/lib/bio/db/nexml/schema/external/xlink.xsd +75 -0
  33. data/lib/bio/db/nexml/schema/external/xml.xsd +145 -0
  34. data/lib/bio/db/nexml/schema/meta/README.txt +2 -0
  35. data/lib/bio/db/nexml/schema/meta/annotations.xsd +100 -0
  36. data/lib/bio/db/nexml/schema/meta/meta.xsd +294 -0
  37. data/lib/bio/db/nexml/schema/nexml.xsd +104 -0
  38. data/lib/bio/db/nexml/schema/taxa/README.txt +2 -0
  39. data/lib/bio/db/nexml/schema/taxa/taxa.xsd +39 -0
  40. data/lib/bio/db/nexml/schema/trees/README.txt +2 -0
  41. data/lib/bio/db/nexml/schema/trees/abstracttrees.xsd +135 -0
  42. data/lib/bio/db/nexml/schema/trees/network.xsd +113 -0
  43. data/lib/bio/db/nexml/schema/trees/tree.xsd +149 -0
  44. data/lib/bio/db/nexml/schema/trees/trees.xsd +36 -0
  45. data/lib/bio/db/nexml/taxa.rb +147 -0
  46. data/lib/bio/db/nexml/trees.rb +663 -0
  47. data/lib/bio/db/nexml/writer.rb +265 -0
  48. data/test/data/nexml/test.xml +69 -0
  49. data/test/test_bio-nexml.rb +17 -0
  50. data/test/unit/bio/db/nexml/tc_factory.rb +119 -0
  51. data/test/unit/bio/db/nexml/tc_mapper.rb +78 -0
  52. data/test/unit/bio/db/nexml/tc_matrix.rb +551 -0
  53. data/test/unit/bio/db/nexml/tc_parser.rb +21 -0
  54. data/test/unit/bio/db/nexml/tc_taxa.rb +118 -0
  55. data/test/unit/bio/db/nexml/tc_trees.rb +370 -0
  56. data/test/unit/bio/db/nexml/tc_writer.rb +633 -0
  57. metadata +253 -0
@@ -0,0 +1,59 @@
1
+ module Bio
2
+ module NeXML
3
+ module Mapper # :nodoc:
4
+
5
+ # Repository is a hash based store for NeXML objects.
6
+ class HashRepository < Hash
7
+
8
+ # Append a method to the Repository.
9
+ def <<( object )
10
+ self[ object.id ] = object
11
+ self
12
+ end
13
+ alias append <<
14
+
15
+ # Reset the object in the repository to use the ones passed.
16
+ def objects=( objects )
17
+ self.clear
18
+ objects.each { |o| self << o }
19
+ end
20
+
21
+ def objects
22
+ self.values
23
+ end
24
+
25
+ alias __delete__ delete
26
+
27
+ # Delete an object.
28
+ def delete( object )
29
+ __delete__( object.id )
30
+ end
31
+
32
+ alias __each__ each
33
+
34
+ # Iterate over each object in the repository.
35
+ def each( &block )
36
+ each_value( &block )
37
+ end
38
+
39
+ # Iterate over each object passing both the id and the
40
+ # object to the block given.
41
+ def each_with_id( &block )
42
+ __each__( &block )
43
+ end
44
+
45
+ def include?( object )
46
+ self[ object.id ] == object
47
+ end
48
+ end
49
+
50
+ class ArrayRepository < Array
51
+ def objects
52
+ self
53
+ end
54
+
55
+ alias append <<
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,1046 @@
1
+ module Bio
2
+ module NeXML
3
+ # State defines a possible observation with its 'symbol' attribute. A state may be ambiguous. An
4
+ # ambiguous state must define an ambiguity mapping which may be 'polymorphic', resolved in an
5
+ # 'and' context, or uncertain, resolved in a 'or' context.
6
+ #
7
+ # state = Bio::NeXML::State.new( 'state1', :label => 'A label' )
8
+ # state.id #=> 'state1'
9
+ # state.label #=> 'A label'
10
+ # state.ambiguous? #=> true
11
+ # state.ambiguity #=> :polymorphic
12
+ class State
13
+ include Enumerable
14
+ include Mapper
15
+ @@writer = Bio::NeXML::Writer.new
16
+
17
+ # A file level unique identifier.
18
+ attr_accessor :id
19
+
20
+ # Observation for this state.
21
+ attr_reader :symbol
22
+
23
+ # Polymorphic or uncertain.
24
+ attr_accessor :ambiguity
25
+
26
+ # A human readable description of the state.
27
+ attr_accessor :label
28
+
29
+ # Each state is contained in a states element.
30
+ belongs_to :states
31
+
32
+ # Refer to the polymorphic or uncertain state that it belongs to.
33
+ belongs_to :state_set, :update => :member
34
+
35
+ # A polymorphic or uncertain state will have one or more members.
36
+ has_n :members, :index => false, :update => :state_set
37
+
38
+ has_n :cells, :index => false
39
+
40
+ def initialize( id, symbol = nil, options = {}, &block )
41
+ @id = id
42
+ symbol.is_a?( Hash ) ? options = symbol : self.symbol = symbol
43
+ properties( options ) unless options.empty?
44
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
45
+ end
46
+
47
+ def symbol=( symbol )
48
+ @symbol = symbol
49
+ end
50
+
51
+ # Takes a Bio::NeXML::State object and adds it to the ambiguity mapping of the state.
52
+ # Returns # <tt>self</tt>.
53
+ def add_member( member ); end if false
54
+
55
+ def ambiguous?
56
+ !!ambiguity
57
+ end
58
+
59
+ def polymorphic?
60
+ ambiguity == :polymorphic
61
+ end
62
+
63
+ def uncertain?
64
+ ambiguity == :uncertain
65
+ end
66
+
67
+ def include?( member )
68
+ has_member?( member )
69
+ end
70
+
71
+ def count
72
+ number_of_members
73
+ end
74
+ alias length count
75
+
76
+ # Iterate over each member in <tt>self</tt> passing it to the block given. If no block is provided,
77
+ # it returns an Enumerator.
78
+ def each( &block )
79
+ @members.each( &block )
80
+ end
81
+
82
+ def to_str
83
+ symbol.to_s
84
+ end
85
+ alias to_s to_str
86
+
87
+ def to_xml
88
+ tagname = nil
89
+ if ambiguity == :polymorphic
90
+ tagname = "polymorphic_state_set"
91
+ elsif ambiguity == :uncertain
92
+ tagname = "uncertain_state_set"
93
+ else
94
+ tagname = "state"
95
+ end
96
+ node = @@writer.create_node( tagname, @@writer.attributes( self, :id, :label, :symbol ) )
97
+ if count > 0
98
+ self.each_member do |member|
99
+ node << @@writer.create_node( "member", :state => member.id )
100
+ end
101
+ end
102
+ node
103
+ end
104
+
105
+ class << self
106
+ def polymorphic( id, symbol = nil, options = {}, &block )
107
+ state = new( id, symbol, options, &block )
108
+ state.ambiguity = :polymorphic
109
+ state
110
+ end
111
+
112
+ def uncertain( id, symbol = nil, options = {}, &block )
113
+ state = new( id, symbol, options, &block )
114
+ state.ambiguity = :uncertain
115
+ state
116
+ end
117
+ end
118
+ end #end class State
119
+
120
+ # A char specifies which states apply to matrix columns.
121
+ class Char
122
+ include Mapper
123
+ @@writer = Bio::NeXML::Writer.new
124
+
125
+ # A file level unique identifier.
126
+ attr_accessor :id
127
+
128
+ # A human readable description.
129
+ attr_accessor :label
130
+
131
+ # Each char links to a states as a means of describing possible observations for that
132
+ # particular column.
133
+ belongs_to :format
134
+ belongs_to :states
135
+
136
+ has_n :cells, :index => false
137
+
138
+ def initialize( id, states = nil, options = {} )
139
+ @id = id
140
+ unless states.nil?
141
+ states.is_a?( Hash ) ? options = states : self.states = states
142
+ end
143
+ properties( options ) unless options.empty?
144
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
145
+ end
146
+
147
+ def to_xml
148
+ @@writer.create_node( "char", @@writer.attributes( self, :id, :states, :label, :codon ) )
149
+ end
150
+ end
151
+
152
+ class States
153
+ include Enumerable
154
+ include Mapper
155
+ @@writer = Bio::NeXML::Writer.new
156
+
157
+ # A file level unique identifier.
158
+ attr_accessor :id
159
+
160
+ # A human readable description of the state.
161
+ attr_accessor :label
162
+
163
+ belongs_to :format
164
+
165
+ # Possible observation states.
166
+ has_n :states
167
+
168
+ # Matrix columns linked to this states.
169
+ has_n :chars
170
+
171
+ def initialize( id, options = {} )
172
+ @id = id
173
+ properties( options ) unless options.empty?
174
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
175
+ end
176
+
177
+ def create_state( symbol = nil, options = {} )
178
+ state = State.new( Bio::NeXML.generate_id( State ), symbol, options )
179
+ add_state( state )
180
+ state
181
+ end
182
+
183
+ def add_state( state ); end if false # dummy for rdoc
184
+
185
+ def delete_state( state ); end if false # dummy for rdoc
186
+
187
+ def has_state?( state ); end if false # dummy for rdoc
188
+
189
+ def get_state_by_id( state ); end if false # dummy for rdoc
190
+
191
+ def each_state( state ); end if false # dummy for rdoc
192
+
193
+ def each_char( state ); end if false # dummy for rdoc
194
+
195
+ def include?( state )
196
+ has_state?( state )
197
+ end
198
+
199
+ def get_state_by_symbol( symbol )
200
+ matches = each_state.select{ |s| s.symbol == symbol }
201
+ matches.first
202
+ end
203
+
204
+ # Iterate over each state set in <tt>self</tt> passing it to the block given. If no block is provided,
205
+ # it returns an Enumerator.
206
+ def each( &block )
207
+ @states.each( &block )
208
+ end
209
+
210
+ def to_xml
211
+ node = @@writer.create_node( "states", @@writer.attributes( self, :id, :label ) )
212
+ self.each_state do |state|
213
+ node << state.to_xml
214
+ end
215
+ node
216
+ end
217
+ end
218
+
219
+ class Format
220
+ @@writer = Bio::NeXML::Writer.new
221
+ include Mapper
222
+
223
+ # A format block must define set(s) of possible observation states.
224
+ has_n :states, :singularize => false
225
+
226
+ # A format will have one or more columns( chars => columns ),
227
+ has_n :chars, :index => false
228
+
229
+ # Because format elements don't have id attributes, we will use
230
+ # object_id in this case
231
+ attr_accessor :id
232
+
233
+ def initialize( options = {} )
234
+ @id = self.object_id
235
+ properties( options ) unless options.empty?
236
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
237
+ end
238
+
239
+ def create_states( options = {} )
240
+ states = States.new( Bio::NeXML.generate_id( States ), options )
241
+ add_states states
242
+ states
243
+ end
244
+
245
+ def create_char( states = nil, options = {} )
246
+ char = Char.new( Bio::NeXML.generate_id( Char ), states, options )
247
+ add_char char
248
+ char
249
+ end
250
+
251
+ def add_states( states )
252
+ # dummy for rdoc
253
+ end if false
254
+
255
+ # Returns true if the given state set( Bio::NeXML::States object ) is
256
+ # defined for the format block.
257
+ def has_states?( states )
258
+ # dummy for rdoc
259
+ end if false
260
+
261
+ # Remove a state set from the format.
262
+ # * Arguments :
263
+ # states( required ) - a Bio::NeXML::State object.
264
+ # * Returns : the deleted object.
265
+ # format.delete_states( states )
266
+ # format.states #=> [ .. .. ]
267
+ # states.format #=> nil
268
+ def delete_states( states )
269
+ # dummy for rdoc
270
+ end if false
271
+
272
+ # Fetch a state set( Bio::NeXML::States object ) by id. Returns <tt>nil</tt> if none found.
273
+ def get_states_by_id( id )
274
+ # dummy for rdoc
275
+ end if false
276
+
277
+ # Returns the number of state sets defined for the matrix.
278
+ def number_of_states
279
+ # dummy for rdoc
280
+ end if false
281
+
282
+ # Add a column definition to the format.
283
+ # * Arguments :
284
+ # char( required ) - a Bio::NeXML::Char object.
285
+ # * Returns : <tt>self</tt>.
286
+ # format.add_char( char )
287
+ # format.chars #=> [ .. char .. ]
288
+ # char.format #=> format
289
+ def add_char( char )
290
+ # dummy for rdoc
291
+ end if false
292
+
293
+ # Remove a column definition from the matrix.
294
+ # * Arguments :
295
+ # char( required ) - a Bio::NeXML::Char object.
296
+ # * Returns : the deleted object.
297
+ # matrix.delete_char( char )
298
+ # matrix.chars #=> [ .. .. ]
299
+ # char.matrix #=> nil
300
+ def delete_char( char )
301
+ # dummy for rdoc
302
+ end if false
303
+
304
+ # Returns an array of state sets( Bio::NeXML::States objects ) for the matrix.
305
+ # matrix.states #=> [ .. .. ]
306
+ def states
307
+ # dummy for rdoc
308
+ end if false
309
+
310
+ # Returns an array of column definitions( Bio::NeXML::Char objects ) for the matrix.
311
+ # matrix.chars #=> [ .. .. ]
312
+ def chars
313
+ # dummy for rdoc
314
+ end if false
315
+
316
+ # Add state sets to the matrix. This function will overwrite previous state set definitions
317
+ # for the matrix if any.
318
+ # * Arguments :
319
+ # states( required ) - an array of Bio::NeXML::States object.
320
+ # matrix.states = [ states ]
321
+ # matrix.states #=> [ states ]
322
+ # states.matrix #=> matrix
323
+ def states=( states )
324
+ # dummy for rdoc
325
+ end if false
326
+
327
+ # Add column definitions to the matrix. This function will override the previous column
328
+ # definitions if any.
329
+ # * Arguments :
330
+ # chars( required ) - an array of Bio::NeXML::Char object.
331
+ # matrix.chars = [ char ]
332
+ # matrix.chars #=> [ char ]
333
+ # char.matrix #=> matrix
334
+ def chars=( chars )
335
+ # dummy for rdoc
336
+ end if false
337
+
338
+ # Fetch a state set( Bio::NeXML::States object ) by id. Returns <tt>nil</tt> if none found.
339
+ def get_states_by_id( id )
340
+ # dummy for rdoc
341
+ end if false
342
+
343
+ # Fetch a column definition( Bio::NeXML::Char object ) by id. Returns <tt>nil</tt> if none
344
+ # found.
345
+ def get_char_by_id( id )
346
+ matches = each_char.select{ |c| c.id == id } # XXX not sure why I have to implement this?
347
+ matches.first
348
+ end
349
+
350
+ # Returns true if the given state set( Bio::NeXML::States object ) is defined for the matrix.
351
+ def has_states?( states )
352
+ # dummy for rdoc
353
+ end if false
354
+
355
+ # Returns true if the given column definition( Bio::NeXML::Char object ) is defined for the matrix.
356
+ def has_char?( char )
357
+ # dummy for rdoc
358
+ end if false
359
+
360
+ # Iterate over each state sets( Bio::NeXML::States object ) defined for the matrix. Returns an
361
+ # Enumerator if no block is provided.
362
+ def each_states
363
+ # dummy for rdoc
364
+ end if false
365
+
366
+ # Iterate over each column definitions( Bio::NeXML::Char object ) defined for the matrix. Returns
367
+ # an Enumerator if no block is provided.
368
+ def each_char
369
+ # dummy for rdoc
370
+ end if false
371
+
372
+ # Returns the number of state sets defined for the matrix.
373
+ def number_of_states
374
+ # dummy for rdoc
375
+ end if false
376
+
377
+ # Returns the number of column definitions defined for the matrix.
378
+ def number_of_chars
379
+ # dummy for rdoc
380
+ end if false
381
+
382
+ def to_xml
383
+ node = @@writer.create_node( "format" )
384
+
385
+ self.each_states do |states|
386
+ node << states.to_xml
387
+ end
388
+
389
+ self.each_char do |char|
390
+ node << char.to_xml
391
+ end
392
+
393
+ node
394
+ end
395
+
396
+ end # end of format
397
+
398
+ # Cell is the smallest unit of a character state matrix or of a sequence. A cell maybe bound or
399
+ # unbound. If a cell points to a char and has a state, it is a bound cell. Bound cells correspond
400
+ # to the cell tag of NeXML. Value of a bound cell is the same as the 'symbol' of the state it points
401
+ # to. Value of a bound cell may be changed by assigning a different state to it. An unbound cell
402
+ # holds a raw value.
403
+ # cell = Bio::NeXML::Cell.new( 'A' )
404
+ # cell.bound? #=> false
405
+ # cell.value #=> 'A'
406
+ #
407
+ # # Assign a new value to an unbound cell.
408
+ # cell.value = 'B'
409
+ # cell.value #=> 'B'
410
+ #
411
+ # cell = Bio::NeXML::Cell.new( :char => char1, :state => stateA )
412
+ # cell.bound? #=> true
413
+ # cell.value #=> 'A'
414
+ #
415
+ # # Can not assign a value to a bound cell directly.
416
+ # cell.value = 'B'
417
+ # cell.value #=> 'A'
418
+ #
419
+ # # Changing the state of a bound cell changes its value.
420
+ # cell.state = stateB
421
+ # cell.value #=> 'B'
422
+ class Cell
423
+ include Mapper
424
+ @@writer = Bio::NeXML::Writer.new
425
+
426
+ attr_accessor :char
427
+ attr_accessor :state
428
+ attr_accessor :label
429
+
430
+ belongs_to :state
431
+ belongs_to :char
432
+
433
+ belongs_to :cellrow
434
+ alias row cellrow
435
+
436
+ def initialize( char = nil, state = nil, options = {} )
437
+ case char
438
+ when Hash
439
+ properties( char )
440
+ when Char
441
+ self.char = char
442
+ else
443
+ @value = char unless char.nil?
444
+ end
445
+
446
+ case state
447
+ when State
448
+ self.state = state
449
+ when Hash
450
+ properties( state )
451
+ end
452
+
453
+ properties( options ) unless options.nil?
454
+ end
455
+
456
+ # Return the value of a cell.
457
+ def value
458
+ bound? ? state.symbol : @value
459
+ end
460
+ alias symbol value
461
+
462
+ def value=( value )
463
+ bound? ? nil : @value = value
464
+ end
465
+
466
+ def bound?
467
+ !!( char and state )
468
+ end
469
+
470
+ # Allow cells to be implicitly used as a String.
471
+ def to_str
472
+ value.to_s
473
+ end
474
+ alias to_s to_str
475
+
476
+ def to_xml
477
+ @@writer.create_node( "cell", @@writer.attributes( self, :state, :char ) )
478
+ end
479
+
480
+ end
481
+
482
+ class ContinuousCell < Cell
483
+ def value
484
+ @value
485
+ end
486
+ def value=( value )
487
+ @value = value
488
+ end
489
+ def state=( value )
490
+ @value = value
491
+ end
492
+ alias symbol value
493
+ alias state value
494
+ end
495
+
496
+ class Sequence
497
+ include Mapper
498
+ @@writer = Bio::NeXML::Writer.new
499
+
500
+ # Every sequence belongs to a row
501
+ belongs_to :seqrow
502
+ alias row seqrow
503
+
504
+ attr_accessor :value
505
+
506
+ # Because seq elements don't have id attributes, we will use
507
+ # object_id in this case
508
+ attr_accessor :id
509
+
510
+ def initialize( options = {} )
511
+ properties( options ) unless options.empty?
512
+ @id = self.object_id
513
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
514
+ end
515
+
516
+ def type
517
+ return nil if cells.empty?
518
+ cells.first.bound? ? :granular : :raw
519
+ end
520
+
521
+ def each_value( &block ) # :yields: value
522
+ if block_given?
523
+ cells.each { |c| yield c.value }
524
+ else
525
+ enum_for( :each_value )
526
+ end
527
+ end
528
+
529
+ def to_xml
530
+ node = @@writer.create_node( "seq" )
531
+ node << self.value
532
+ node
533
+ end
534
+
535
+ end
536
+
537
+ class Matrix
538
+ @@writer = Bio::NeXML::Writer.new
539
+ include Mapper
540
+ has_n :rows, :index => false
541
+ belongs_to :characters
542
+
543
+ # Because matrix elements don't have id attributes, we will use
544
+ # object_id in this case
545
+ attr_accessor :id
546
+
547
+ def initialize( options = {} )
548
+ @id = self.object_id
549
+ properties( options ) unless options.empty?
550
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
551
+ end
552
+
553
+ def add_row( row )
554
+ # dummy for rdoc
555
+ end if false
556
+
557
+ # Returns true if the given row ( Bio::NeXML::Row object ) is
558
+ # defined for the matrix block.
559
+ def has_rows?( rows )
560
+ # dummy for rdoc
561
+ end if false
562
+
563
+ # Remove a row from the matrix.
564
+ # * Arguments :
565
+ # row( required ) - a Bio::NeXML::Row object.
566
+ # * Returns : the deleted object.
567
+ # matrix.delete_row( row )
568
+ # matrix.rows #=> [ .. .. ]
569
+ # row.matrix #=> nil
570
+ def delete_row( row )
571
+ # dummy for rdoc
572
+ end if false
573
+
574
+ # Fetch a row ( Bio::NeXML::Row object ) by id. Returns <tt>nil</tt> if none found.
575
+ def get_row_by_id( id )
576
+ # dummy for rdoc
577
+ end if false
578
+
579
+ # Returns the number of rows defined for the matrix.
580
+ def number_of_rows
581
+ # dummy for rdoc
582
+ end if false
583
+
584
+
585
+ # Returns an array of rows( Bio::NeXML::Rows objects ) for the matrix.
586
+ # matrix.rows #=> [ .. .. ]
587
+ def rows
588
+ # dummy for rdoc
589
+ end if false
590
+
591
+ # Add rowsthe matrix. This function will overwrite previous rows
592
+ # for the matrix if any.
593
+ # * Arguments :
594
+ # rows( required ) - an array of Bio::NeXML::Row object.
595
+ # matrix.rows = [ rows ]
596
+ # matrix.rows #=> [ rows ]
597
+ # rows.matrix #=> matrix
598
+ def rows=( rows )
599
+ # dummy for rdoc
600
+ end if false
601
+
602
+ # Returns true if the given row( Bio::NeXML::Row object ) is defined for the matrix.
603
+ def has_row?( rows )
604
+ # dummy for rdoc
605
+ end if false
606
+
607
+ # Iterate over each row ( Bio::NeXML::Row object ) defined for the matrix. Returns an
608
+ # Enumerator if no block is provided.
609
+ def each_row
610
+ # dummy for rdoc
611
+ end if false
612
+
613
+ # Returns the number of rows defined for the matrix.
614
+ def number_of_rows
615
+ # dummy for rdoc
616
+ end if false
617
+
618
+ def to_xml
619
+ node = @@writer.create_node( "matrix" )
620
+ self.each_row do |row|
621
+ node << row.to_xml
622
+ end
623
+ node
624
+ end
625
+
626
+ end
627
+
628
+ class SeqMatrix < Matrix
629
+ def create_row( options = {} )
630
+ row = SeqRow.new( Bio::NeXML.generate_id( SeqRow ), options )
631
+ add_row row
632
+ row
633
+ end
634
+ end
635
+
636
+ class CellMatrix < Matrix
637
+ def create_row( options = {} )
638
+ row = CellRow.new( Bio::NeXML.generate_id( CellRow ), options )
639
+ add_row row
640
+ row
641
+ end
642
+ end
643
+
644
+ class Row
645
+ include Mapper
646
+
647
+ # A file level unique identifier.
648
+ attr_accessor :id
649
+
650
+ # A human readable description.
651
+ attr_accessor :label
652
+
653
+ # Every row refers to a taxon.
654
+ belongs_to :otu
655
+ belongs_to :matrix
656
+
657
+ def initialize( id, options = {} )
658
+ @id = id
659
+ properties( options ) unless options.empty?
660
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
661
+ end
662
+
663
+ end # end of row class
664
+ class SeqRow < Row
665
+ @@writer = Bio::NeXML::Writer.new
666
+ # actually, probably only one <seq/> element
667
+ has_n :sequences
668
+ # Below are methods stubs to be picked up by rdoc, as these methods are generated dynamically.
669
+
670
+ # Add a sequence( row ) to the matrix.
671
+ # * Arguments :
672
+ # sequence( required ) - a Bio::NeXML::Sequence object.
673
+ # * Returns : <tt>self</tt>.
674
+ # matrix.add_matrix( sequence )
675
+ # matrix.sequences #=> [ .. sequence .. ]
676
+ # sequence.matrix #=> matrix
677
+ def add_sequence( sequence )
678
+ # dummy for rdoc
679
+ end if false
680
+
681
+ # Remove a sequence( row ) from the matrix.
682
+ # * Arguments :
683
+ # sequence( required ) - a Bio::NeXML::Sequence object.
684
+ # * Returns : the deleted object.
685
+ # matrix.delete_sequence( sequence )
686
+ # matrix.sequences #=> [ .. .. ]
687
+ # sequence.matrix #=> nil
688
+ def delete_sequence( sequence )
689
+ # dummy for rdoc
690
+ end if false
691
+
692
+ # Returns an array of sequences ( Bio::NeXML::Sequence objects ) for the matrix.
693
+ # matrix.sequences #=> [ .. .. ]
694
+ def sequences
695
+ # dummy for rdoc
696
+ end if false
697
+
698
+ # Add sequences to the matirx. This function will override previous sequences if any.
699
+ # * Arguments :
700
+ # sequences( required ) - an array of Bio::NeXML::Sequence object.
701
+ # matrix.sequences = [ sequence ]
702
+ # matrix.sequences #=> [ sequence ]
703
+ # sequence.matrix #=> matrix
704
+ def sequences=( sequences )
705
+ # dummy for rdoc
706
+ end if false
707
+
708
+ # Returns true if the given sequence( Bio::NeXML::Sequence object ) is defined for the matrix.
709
+ def has_sequence?( sequence )
710
+ # dummy for rdoc
711
+ end if false
712
+
713
+ # Iterate over each sequence ( Bio::NeXML::Sequence object ) defined for the matrix. Returns
714
+ # an Enumerator if no block is provided.
715
+ def each_sequence
716
+ # dummy for rdoc
717
+ end if false
718
+
719
+ # Returns the number of sequences defined for the matrix.
720
+ def number_of_sequences
721
+ # dummy for rdoc
722
+ end if false
723
+
724
+ def to_xml
725
+ node = @@writer.create_node( "row", @@writer.attributes( self, :id, :otu, :label ) )
726
+ node << self.sequences.first.to_xml
727
+ node
728
+ end
729
+
730
+ end
731
+ class CellRow < Row
732
+ @@writer = Bio::NeXML::Writer.new
733
+ has_n :cells, :index => false
734
+ # Add a cell to the row
735
+ # * Arguments :
736
+ # cell( required ) - a Bio::NeXML::Cell object.
737
+ # * Returns : <tt>self</tt>.
738
+ # row.add_cell( cell )
739
+ # row.cells #=> [ .. cell .. ]
740
+ # cell.row #=> row
741
+ def add_cell( cell )
742
+ # dummy for rdoc
743
+ end if false
744
+
745
+ # Remove a cell from the row
746
+ # * Arguments :
747
+ # cell( required ) - a Bio::NeXML::Cell object.
748
+ # * Returns : the deleted object.
749
+ # row.delete_cell( cell )
750
+ # row.cells #=> [ .. .. ]
751
+ # cell.row #=> nil
752
+ def delete_cell( cell )
753
+ # dummy for rdoc
754
+ end if false
755
+
756
+ # Returns an array of cells ( Bio::NeXML::Cell objects ) for the row.
757
+ # matrix.cells #=> [ .. .. ]
758
+ def cells
759
+ # dummy for rdoc
760
+ end if false
761
+
762
+ # Add cells to the row. This function will override previous cells if any.
763
+ # * Arguments :
764
+ # cells( required ) - an array of Bio::NeXML::Cell object.
765
+ # row.cells = [ cells ]
766
+ # row.cells #=> [ cells ]
767
+ # cell.row #=> row
768
+ def cells=( cells )
769
+ # dummy for rdoc
770
+ end if false
771
+
772
+ # Returns true if the given cell( Bio::NeXML::Cell object ) is defined for the row.
773
+ def has_cell?( cell )
774
+ # dummy for rdoc
775
+ end if false
776
+
777
+ # Iterate over each cell ( Bio::NeXML::Cell object ) defined for the row. Returns
778
+ # an Enumerator if no block is provided.
779
+ def each_cell
780
+ # dummy for rdoc
781
+ end if false
782
+
783
+ # Returns the number of cells defined for the row.
784
+ def number_of_cells
785
+ # dummy for rdoc
786
+ end if false
787
+
788
+ def to_xml
789
+ node = @@writer.create_node( "row", @@writer.attributes( self, :id, :otu, :label ) )
790
+ self.each_cell do |cell|
791
+ node << cell.to_xml
792
+ end
793
+ node
794
+ end
795
+
796
+ end
797
+
798
+ # A character state matrix. This class is analogous to the characters element of NeXML.
799
+ class Characters
800
+ include Mapper
801
+ @@writer = Bio::NeXML::Writer.new
802
+
803
+ # An id should be uniquely scoped in an NeXML file. It need not be unique globally. It is a
804
+ # compulsory attribute.
805
+ attr_accessor :id
806
+
807
+ # A characters block holds a single format definition
808
+ attr_accessor :format
809
+
810
+ # A characters block holds a single matrix definition
811
+ attr_accessor :matrix
812
+
813
+ # A human readable description. Its usage is optional.
814
+ attr_accessor :label
815
+
816
+ belongs_to :nexml
817
+
818
+ # Every characters block compulsorily links to a taxa block( otus ).
819
+ belongs_to :otus
820
+
821
+ def initialize( id, options = {} )
822
+ @id = id
823
+ self.create_format
824
+ self.create_matrix
825
+ properties( options ) unless options.empty?
826
+ block.arity < 1 ? instance_eval( &block ) : block.call( self ) if block_given?
827
+ end
828
+
829
+ def add_format( format )
830
+ @format = format
831
+ end
832
+
833
+ def add_matrix( matrix )
834
+ @matrix = matrix
835
+ end
836
+
837
+ def to_xml
838
+ node = @@writer.create_node( "characters", @@writer.attributes( self, :id, :"xsi:type", :otus, :label ) )
839
+ node << self.format.to_xml
840
+ node << self.matrix.to_xml
841
+ node
842
+ end
843
+
844
+ def create_matrix( options = {} )
845
+ matrix = nil
846
+ if self.class.name =~ /Seqs$/
847
+ matrix = SeqMatrix.new( options )
848
+ else
849
+ matrix = CellMatrix.new( options )
850
+ end
851
+ add_matrix matrix
852
+ matrix
853
+ end
854
+
855
+ def create_format( options = {} )
856
+ format = Format.new( options )
857
+ states = format.create_states
858
+ lookup_table = self.lookup
859
+ state_for_symbol = {}
860
+ lookup_table.keys.each do |key|
861
+ if lookup_table[key].length == 1
862
+ state = states.create_state( :symbol => key )
863
+ state_for_symbol[key] = state
864
+ end
865
+ end
866
+ lookup_table.keys.each do |key|
867
+ if lookup_table[key].length != 1
868
+ state = states.create_state( :symbol => key, :ambiguity => :uncertain )
869
+ lookup_table[key].each do |symbol|
870
+ state.add_member( state_for_symbol[symbol] )
871
+ end
872
+ end
873
+ end
874
+ add_format format
875
+ format
876
+ end
877
+
878
+ def create_raw( string, row = nil )
879
+ matrix = self.matrix
880
+ format = self.format
881
+ if row == nil
882
+ row = matrix.create_row
883
+ end
884
+ if row.kind_of? SeqRow
885
+ sequence = Sequence.new
886
+ sequence.value = join_sequence split_sequence string
887
+ row.add_sequence( sequence )
888
+ end
889
+ if row.kind_of? CellRow
890
+ split_seq = split_sequence string
891
+ pos = 0
892
+ states = format.states.first
893
+ split_seq.each do |symbol|
894
+ char = format.chars[pos]
895
+ if char == nil
896
+ char = format.create_char( states )
897
+ end
898
+ state = states.get_state_by_symbol( symbol )
899
+ if state == nil
900
+ state = states.create_state( symbol )
901
+ end
902
+ cell = Cell.new char, state
903
+ row.add_cell cell
904
+ pos += 1
905
+ end
906
+ end
907
+ row
908
+ end
909
+
910
+ def split_sequence( string )
911
+ string.split(//)
912
+ end
913
+
914
+ def join_sequence( array )
915
+ array.join
916
+ end
917
+
918
+ end #end class Characters
919
+ class Dna < Characters
920
+ @@lookup = {
921
+ 'A' => [ 'A' ],
922
+ 'C' => [ 'C' ],
923
+ 'G' => [ 'G' ],
924
+ 'T' => [ 'T' ],
925
+ 'M' => [ 'A', 'C' ],
926
+ 'R' => [ 'A', 'G' ],
927
+ 'W' => [ 'A', 'T' ],
928
+ 'S' => [ 'C', 'G' ],
929
+ 'Y' => [ 'C', 'T' ],
930
+ 'K' => [ 'G', 'T' ],
931
+ 'V' => [ 'A', 'C', 'G' ],
932
+ 'H' => [ 'A', 'C', 'T' ],
933
+ 'D' => [ 'A', 'G', 'T' ],
934
+ 'B' => [ 'C', 'G', 'T' ],
935
+ 'X' => [ 'G', 'A', 'T', 'C' ],
936
+ 'N' => [ 'G', 'A', 'T', 'C' ],
937
+ '-' => [ ],
938
+ '?' => [ 'G', 'A', 'T', 'C' ],
939
+ };
940
+ def lookup
941
+ @@lookup
942
+ end
943
+ end
944
+ class DnaSeqs < Dna; end
945
+ class DnaCells < Dna; end
946
+ class Rna < Characters
947
+ @@lookup = {
948
+ 'A' => [ 'A' ],
949
+ 'C' => [ 'C' ],
950
+ 'G' => [ 'G' ],
951
+ 'U' => [ 'U' ],
952
+ 'M' => [ 'A', 'C' ],
953
+ 'R' => [ 'A', 'G' ],
954
+ 'W' => [ 'A', 'U' ],
955
+ 'S' => [ 'C', 'G' ],
956
+ 'Y' => [ 'C', 'U' ],
957
+ 'K' => [ 'G', 'U' ],
958
+ 'V' => [ 'A', 'C', 'G' ],
959
+ 'H' => [ 'A', 'C', 'U' ],
960
+ 'D' => [ 'A', 'G', 'U' ],
961
+ 'B' => [ 'C', 'G', 'U' ],
962
+ 'X' => [ 'G', 'A', 'U', 'C' ],
963
+ 'N' => [ 'G', 'A', 'U', 'C' ],
964
+ '-' => [ ],
965
+ '?' => [ 'G', 'A', 'U', 'C' ],
966
+ };
967
+ def lookup
968
+ @@lookup
969
+ end
970
+ end
971
+ class RnaSeqs < Rna; end
972
+ class RnaCells < Rna; end
973
+ class Protein < Characters
974
+ @@lookup = {
975
+ 'A' => [ 'A' ],
976
+ 'B' => [ 'D', 'N' ],
977
+ 'C' => [ 'C' ],
978
+ 'D' => [ 'D' ],
979
+ 'E' => [ 'E' ],
980
+ 'F' => [ 'F' ],
981
+ 'G' => [ 'G' ],
982
+ 'H' => [ 'H' ],
983
+ 'I' => [ 'I' ],
984
+ 'K' => [ 'K' ],
985
+ 'L' => [ 'L' ],
986
+ 'M' => [ 'M' ],
987
+ 'N' => [ 'N' ],
988
+ 'P' => [ 'P' ],
989
+ 'Q' => [ 'Q' ],
990
+ 'R' => [ 'R' ],
991
+ 'S' => [ 'S' ],
992
+ 'T' => [ 'T' ],
993
+ 'U' => [ 'U' ],
994
+ 'V' => [ 'V' ],
995
+ 'W' => [ 'W' ],
996
+ 'X' => [ 'X' ],
997
+ 'Y' => [ 'Y' ],
998
+ 'Z' => [ 'E', 'Q' ],
999
+ '*' => [ '*' ],
1000
+ '-' => [ ],
1001
+ '?' => [ 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', '*' ]
1002
+ };
1003
+ def lookup
1004
+ @@lookup
1005
+ end
1006
+ end
1007
+ class ProteinSeqs < Protein; end
1008
+ class ProteinCells < Protein; end
1009
+ class Standard < Characters
1010
+ @@lookup = {}
1011
+ def lookup
1012
+ @@lookup
1013
+ end
1014
+ def split_sequence( string )
1015
+ string.split
1016
+ end
1017
+ def join_sequence( array )
1018
+ array.join(" ")
1019
+ end
1020
+ end
1021
+ class StandardSeqs < Standard; end
1022
+ class StandardCells < Standard; end
1023
+ class Restriction < Characters
1024
+ @@lookup = { '0' => [ '0' ], '1' => [ '1' ] }
1025
+ def lookup
1026
+ @@lookup
1027
+ end
1028
+ end
1029
+ class RestrictionSeqs < Restriction; end
1030
+ class RestrictionCells < Restriction; end
1031
+ class Continuous < Characters
1032
+ @@lookup = {}
1033
+ def lookup
1034
+ @@lookup
1035
+ end
1036
+ def split_sequence( string )
1037
+ string.split
1038
+ end
1039
+ def join_sequence( array )
1040
+ array.join(" ")
1041
+ end
1042
+ end
1043
+ class ContinuousSeqs < Continuous; end
1044
+ class ContinuousCells < Continuous; end
1045
+ end
1046
+ end