bioruby-phyloxml 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1186 @@
1
+ #
2
+ # = bio/db/phyloxml_elements.rb - PhyloXML Element classes
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+ # == Description
10
+ #
11
+ # This file containts the classes to represent PhyloXML elements.
12
+ #
13
+ # == References
14
+ #
15
+ # * http://www.phyloxml.org
16
+ #
17
+ # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
18
+
19
+ require 'bio/tree'
20
+ require 'bio/sequence'
21
+ require 'bio/reference'
22
+
23
+ require 'libxml'
24
+
25
+ module Bio
26
+
27
+ module PhyloXML
28
+
29
+ # Taxonomy class for PhyloXML
30
+ class Taxonomy
31
+ #pattern = [a-zA-Z0-9_]{2,10} Can refer to any code/abbreviation/mnemonic, such as Bsu for Bacillus subtilis.
32
+ attr_accessor :code
33
+
34
+ # String.
35
+ attr_accessor :scientific_name
36
+ # An array of strings
37
+ attr_accessor :common_names
38
+
39
+ # value comes from list: domain kingdom, subkingdom, branch, infrakingdom,
40
+ # superphylum, phylum, subphylum, infraphylum, microphylum, superdivision,
41
+ # division, subdivision, infradivision, superclass, class, subclass,
42
+ # infraclass, superlegion, legion, sublegion, infralegion, supercohort,
43
+ # cohort, subcohort, infracohort, superorder, order, suborder,
44
+ # superfamily, family, subfamily, supertribe, tribe, subtribe, infratribe,
45
+ # genus, subgenus, superspecies, species, subspecies, variety, subvariety,
46
+ # form, subform, cultivar, unknown, other
47
+ attr_accessor :rank
48
+
49
+ # is used to keep the authority, such as 'J. G. Cooper, 1863', associated with the 'scientific_name'.
50
+ attr_accessor :authority
51
+
52
+ # An array of strings. Holds synonyms for scientific names or common names.
53
+ attr_accessor :synonyms
54
+
55
+ # creates a new Bio::PhyloXML::Taxonomy object.
56
+ def initialize
57
+ @common_names = []
58
+ @synonyms = []
59
+
60
+ # below attributes may be PhyloXML specific.
61
+ @other = []
62
+ end
63
+
64
+ #---
65
+ # Attributes and methods below may be PhyloXML specific.
66
+ #+++
67
+
68
+ # String. Unique identifier of a taxon.
69
+ attr_accessor :taxonomy_id
70
+ #Used to link other elements to a taxonomy (on the xml-level)
71
+ attr_accessor :id_source
72
+ # Uri object
73
+ attr_accessor :uri
74
+
75
+ # Array of Other objects. Used to save additional information from other than
76
+ # PhyloXML namspace.
77
+ attr_accessor :other
78
+
79
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
80
+ def to_xml
81
+ taxonomy = LibXML::XML::Node.new('taxonomy')
82
+ taxonomy["type"] = @type if (defined? @type) && @type
83
+ taxonomy["id_source"] = @id_source if (defined? @id_source) && @id_source
84
+
85
+ PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', (defined? @taxonomy_id) ? @taxonomy_id : nil],
86
+ [:pattern, 'code', (defined? @code) ? @code : nil, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
87
+ [:simple, 'scientific_name', (defined? @scientific_name) ? @scientific_name : nil],
88
+ [:simple, 'authority', (defined? @authority) ? @authority : nil],
89
+ [:simplearr, 'common_name', (defined? @common_names) ? @common_names : nil],
90
+ [:simplearr, 'synonym', (defined? @synonyms) ? @synonyms : nil],
91
+ [:simple, 'rank', (defined? @rank) ? @rank : nil],
92
+ [:complex, 'uri',(defined? @uri) ? @uri : nil]])
93
+ #@todo anything else
94
+
95
+
96
+ return taxonomy
97
+ end
98
+
99
+ end
100
+
101
+ # Object to hold one phylogeny element (and its subelements.) Extended version of Bio::Tree.
102
+ class Tree < Bio::Tree
103
+ # String. Name of tree (name subelement of phylogeny element).
104
+ attr_accessor :name
105
+
106
+ # Id object.
107
+ attr_accessor :phylogeny_id
108
+
109
+ # String. Description of tree.
110
+ attr_accessor :description
111
+
112
+ # Boolean. Can be used to indicate that the phylogeny is not allowed to be rooted differently (i.e. because it is associated with root dependent data, such as gene duplications).
113
+ attr_accessor :rerootable
114
+
115
+ # Boolean. Required element.
116
+ attr_accessor :rooted
117
+
118
+ # Array of Property object. Allows for typed and referenced properties from external resources to be attached.
119
+ attr_accessor :properties
120
+
121
+ # CladeRelation object. This is used to express a typed relationship between two clades. For example it could be used to describe multiple parents of a clade.
122
+ attr_accessor :clade_relations
123
+
124
+ # SequenceRelation object. This is used to express a typed relationship between two sequences. For example it could be used to describe an orthology.
125
+ attr_accessor :sequence_relations
126
+
127
+ # Array of confidence object
128
+ attr_accessor :confidences
129
+
130
+ # String.
131
+ attr_accessor :branch_length_unit
132
+
133
+ # String. Indicate the type of phylogeny (i.e. 'gene tree').
134
+ attr_accessor :type
135
+
136
+ # String. Date
137
+ attr_accessor :date
138
+
139
+ # Array of Other objects. Used to save additional information from other than
140
+ # PhyloXML namspace.
141
+ attr_accessor :other
142
+
143
+ def initialize
144
+ super
145
+ @sequence_relations = []
146
+ @clade_relations = []
147
+ @confidences = []
148
+ @properties = []
149
+ @other = []
150
+ end
151
+
152
+ end
153
+
154
+
155
+ # == Description
156
+ # Class to hold clade element of phyloXML.
157
+ class Node
158
+
159
+ # Events at the root node of a clade (e.g. one gene duplication).
160
+ attr_accessor :events
161
+
162
+ # String. Used to link other elements to a clade (node) (on the xml-level).
163
+ attr_accessor :id_source
164
+
165
+ # String. Name of the node.
166
+ attr_accessor :name
167
+
168
+ # Float. Branch width for this node (including parent branch). Applies for the whole clade unless overwritten in sub-clades.
169
+ attr_reader :width
170
+
171
+ def width=(str)
172
+ @width = str.to_f
173
+ end
174
+
175
+ # Array of Taxonomy objects. Describes taxonomic information for a clade.
176
+ attr_accessor :taxonomies
177
+
178
+ # Array of Confidence objects. Indicates the support for a clade/parent branch.
179
+ attr_accessor :confidences
180
+
181
+ # BranchColor object. Apply for the whole clade unless overwritten in sub-clade.
182
+ attr_accessor :color
183
+
184
+ # Id object
185
+ attr_accessor :node_id
186
+
187
+ # Array of Sequence objects. Represents a molecular sequence (Protein, DNA, RNA) associated with a node.
188
+ attr_accessor :sequences
189
+
190
+ # BinaryCharacters object. The names and/or counts of binary characters present, gained, and lost at the root of a clade.
191
+ attr_accessor :binary_characters
192
+
193
+ # Array of Distribution objects. The geographic distribution of the items of a clade (species, sequences), intended for phylogeographic applications.
194
+ attr_accessor :distributions
195
+
196
+ # Date object. A date associated with a clade/node.
197
+ attr_accessor :date
198
+
199
+ #Array of Reference objects. A literature reference for a clade.
200
+ attr_accessor :references
201
+
202
+ #An array of Property objects, for example depth for sea animals.
203
+ attr_accessor :properties
204
+
205
+ # Array of Other objects. Used to save additional information from other than
206
+ # PhyloXML namspace.
207
+ attr_accessor :other
208
+
209
+ def initialize
210
+ @confidences = []
211
+ @sequences = []
212
+ @taxonomies = []
213
+ @distributions = []
214
+ @references = []
215
+ @properties = []
216
+ @other = []
217
+ end
218
+
219
+
220
+ # Converts to a Bio::Tree::Node object. If it contains several taxonomies
221
+ # Bio::Tree::Node#scientific name will get the scientific name of the first
222
+ # taxonomy.
223
+ #
224
+ # If there are several confidence values, the first with bootstrap type will
225
+ # be returned as Bio::Tree::Node#bootstrap
226
+ #
227
+ # tree = phyloxmlparser.next_tree
228
+ #
229
+ # node = tree.get_node_by_name("A").to_biotreenode
230
+ #
231
+ # ---
232
+ # *Returns*:: Bio::Tree::Node
233
+ def to_biotreenode
234
+ node = Bio::Tree::Node.new
235
+ node.name = @name
236
+ node.scientific_name = @taxonomies[0].scientific_name if not @taxonomies.empty?
237
+ #@todo what if there are more?
238
+ node.taxonomy_id = @taxonomies[0].taxononmy_id if @taxonomies[0] != nil
239
+
240
+ if not @confidences.empty?
241
+ @confidences.each do |confidence|
242
+ if confidence.type == "bootstrap"
243
+ node.bootstrap = confidence.value
244
+ break
245
+ end
246
+ end
247
+ end
248
+ return node
249
+ end
250
+
251
+ # Extracts the relevant information from node (specifically taxonomy and
252
+ # sequence) to create Bio::Sequence object. Node can have several sequences,
253
+ # so parameter to this method is to specify which sequence to extract.
254
+ #
255
+ # ---
256
+ # *Returns*:: Bio::Sequence
257
+ def extract_biosequence(seq_i=0)
258
+
259
+ seq = @sequences[seq_i].to_biosequence
260
+ seq.classification = []
261
+ @taxonomies.each do |t|
262
+ seq.classification << t.scientific_name
263
+ if t.rank == "species"
264
+ seq.species = t.scientific_name
265
+ end
266
+ end
267
+
268
+ #seq.division => .. http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2
269
+ # It doesn't seem there is anything in PhyloXML corresponding to this.
270
+
271
+ return seq
272
+ end
273
+
274
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
275
+ def to_xml(branch_length, write_branch_length_as_subelement)
276
+ clade = LibXML::XML::Node.new('clade')
277
+
278
+ PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', (defined? @name) ? @name : nil]])
279
+
280
+ if branch_length != nil
281
+ if write_branch_length_as_subelement
282
+ clade << LibXML::XML::Node.new('branch_length', branch_length.to_s)
283
+ else
284
+ clade["branch_length"] = branch_length.to_s
285
+ end
286
+ end
287
+
288
+ #generate all elements, except clade
289
+ PhyloXML::Writer.generate_xml(clade, self, [
290
+ [:attr, "id_source"],
291
+ [:objarr, 'confidence', 'confidences'],
292
+ [:simple, 'width', (defined? @width) ? @width : nil],
293
+ [:complex, 'branch_color', (defined? @branch_color) ? @branch_color : nil],
294
+ [:simple, 'node_id', (defined? @node_id) ? @node_id : nil],
295
+ [:objarr, 'taxonomy', 'taxonomies'],
296
+ [:objarr, 'sequence', 'sequences'],
297
+ [:complex, 'events', (defined? @events) ? @events : nil],
298
+ [:complex, 'binary_characters', (defined? @binary_characters) ? @binary_characters : nil],
299
+ [:objarr, 'distribution', 'distributions'],
300
+ [:complex, 'date', (defined? @date) ? @date : nil],
301
+ [:objarr, 'reference', 'references'],
302
+ [:objarr, 'propery', 'properties']])
303
+
304
+ return clade
305
+ end
306
+
307
+ end #Node
308
+
309
+ # == Description
310
+ # Events at the root node of a clade (e.g. one gene duplication).
311
+ class Events
312
+ #value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
313
+ attr_reader :type
314
+
315
+ # Integer
316
+ attr_reader :duplications, :speciations, :losses
317
+
318
+ # Confidence object
319
+ attr_reader :confidence
320
+
321
+ #---
322
+ #def confidence=(type, value)
323
+ # @confidence = Confidence.new(type, value)
324
+ #end
325
+ #+++
326
+
327
+ # Confidence object
328
+ def confidence=(conf)
329
+ @confidence = conf
330
+ end
331
+
332
+ # Integer
333
+ def duplications=(str)
334
+ @duplications = str.to_i
335
+ end
336
+
337
+ # Integer
338
+ def losses=(str)
339
+ @losses = str.to_i
340
+ end
341
+
342
+ # Integer
343
+ def speciations=(str)
344
+ @speciations=str.to_i
345
+ end
346
+
347
+ #value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
348
+ def type=(str)
349
+ @type = str
350
+ #@todo add unit test for this
351
+ if not ['transfer','fusion','speciation_or_duplication','other','mixed', 'unassigned'].include?(str)
352
+ raise "Warning #{str} is not one of the allowed values"
353
+ end
354
+ end
355
+
356
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
357
+ def to_xml
358
+ #@todo add unit test
359
+ events = LibXML::XML::Node.new('events')
360
+ PhyloXML::Writer.generate_xml(events, self, [
361
+ [:simple, 'type', (defined? @type) ? @type : nil],
362
+ [:simple, 'duplications', (defined? @duplications) ? @duplications : nil],
363
+ [:simple, 'speciations', (defined? @speciations) ? @speciations : nil],
364
+ [:simple, 'losses', (defined? @losses) ? @losses : nil],
365
+ [:complex, 'confidence', (defined? @confidence) ? @confidence : nil]])
366
+ return events
367
+ end
368
+
369
+ end
370
+
371
+ # A general purpose confidence element. For example this can be used to express
372
+ # the bootstrap support value of a clade (in which case the 'type' attribute
373
+ # is 'bootstrap').
374
+ class Confidence
375
+ # String. The type of confidence measure, for example, bootstrap.
376
+ attr_accessor :type
377
+ # Float. The value of confidence measure.
378
+ attr_accessor :value
379
+
380
+ def initialize(type, value)
381
+ @type = type
382
+ @value = value.to_f
383
+ end
384
+
385
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
386
+ def to_xml
387
+ if @type == nil
388
+ raise "Type is a required attribute for confidence."
389
+ else
390
+ confidence = LibXML::XML::Node.new('confidence', @value.to_s)
391
+ confidence["type"] = @type
392
+ return confidence
393
+ end
394
+ end
395
+ end
396
+
397
+ # == Description
398
+ #
399
+ # The geographic distribution of the items of a clade (species, sequences),
400
+ # intended for phylogeographic applications.
401
+ class Distribution
402
+ # String. Free text description of location.
403
+ attr_accessor :desc
404
+ # Array of Point objects. Holds coordinates of the location.
405
+ attr_accessor :points
406
+ # Array of Polygon objects.
407
+ attr_accessor :polygons
408
+
409
+ def initialize
410
+ @points = []
411
+ @polygons = []
412
+ end
413
+
414
+
415
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
416
+ def to_xml
417
+ distr = LibXML::XML::Node.new('distribution')
418
+ PhyloXML::Writer.generate_xml(distr, self, [
419
+ [:simple, 'desc', @desc],
420
+ [:objarr, 'point', 'points'],
421
+ [:objarr, 'polygon', 'polygons']])
422
+ return distr
423
+ end
424
+
425
+ end #Distribution class
426
+
427
+
428
+ # == Description
429
+ #
430
+ # The coordinates of a point with an optional altitude. Required attribute
431
+ # 'geodetic_datum' is used to indicate the geodetic datum (also called
432
+ # 'map datum'), for example Google's KML uses 'WGS84'.
433
+ class Point
434
+ # Float. Latitude
435
+ attr_reader :lat
436
+
437
+ # Float. Longitute
438
+ attr_reader :long
439
+
440
+ # Float. Altitude
441
+ attr_reader :alt
442
+
443
+ # String. Altitude unit.
444
+ attr_accessor :alt_unit
445
+
446
+ # Geodedic datum / map datum
447
+ attr_accessor :geodetic_datum
448
+
449
+ # Float. Latitude
450
+ def lat=(str)
451
+ @lat = str.to_f unless str.nil?
452
+ end
453
+
454
+ # Float. Longitute
455
+ def long=(str)
456
+ @long = str.to_f unless str.nil?
457
+ end
458
+
459
+ # Float. Altitude
460
+ def alt=(str)
461
+ @alt = str.to_f unless str.nil?
462
+ end
463
+
464
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
465
+ def to_xml
466
+ raise "Geodedic datum is a required attribute of Point element." if @geodetic_datum.nil?
467
+
468
+ p = LibXML::XML::Node.new('point')
469
+ p["geodetic_datum"] = @geodetic_datum
470
+ p["alt_unit"] = @alt_unit if @alt_unit != nil
471
+ PhyloXML::Writer.generate_xml(p, self, [
472
+ [:simple, 'lat', @lat],
473
+ [:simple, 'long', @long],
474
+ [:simple, 'alt', @alt]])
475
+ return p
476
+ #@todo check if characters are correctly generated, like Zuric
477
+ end
478
+
479
+ end
480
+
481
+
482
+ # == Description
483
+ #
484
+ # A polygon defined by a list of Points objects.
485
+ class Polygon
486
+ # Array of Point objects.
487
+ attr_accessor :points
488
+
489
+ def initialize
490
+ @points = []
491
+ end
492
+
493
+
494
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
495
+ def to_xml
496
+ if @points.length > 2
497
+ pol = LibXML::XML::Node.new('polygon')
498
+ @points.each do |p|
499
+ pol << p.to_xml
500
+ end
501
+ return pol
502
+ end
503
+ end
504
+
505
+
506
+ end
507
+
508
+ # == Description
509
+ # Element Sequence is used to represent a molecular sequence (Protein, DNA,
510
+ # RNA) associated with a node.
511
+ class Sequence
512
+ # Type of sequence (rna, dna, protein)
513
+ attr_accessor :type
514
+
515
+ # Full name (e.g. muscle Actin )
516
+ attr_accessor :name
517
+
518
+ # String. Used to link with other elements.
519
+ attr_accessor :id_source
520
+
521
+ # String. One intended use for 'id_ref' is to link a sequence to a taxonomy
522
+ # (via the taxonomy's 'id_source') in the case of multiple sequences and taxonomies per node.
523
+ attr_accessor :id_ref
524
+
525
+ # short (maximal ten characters) symbol of the sequence (e.g. 'ACTM')
526
+ attr_accessor :symbol
527
+ # Accession object. Holds source and identifier for the sequence.
528
+ attr_accessor :accession
529
+ # String. Location of a sequence on a genome/chromosome
530
+ attr_accessor :location
531
+ # String. The actual sequence is stored here.
532
+ attr_reader :mol_seq
533
+
534
+ # Boolean. used to indicated that this molecular sequence is aligned with
535
+ # all other sequences in the same phylogeny for which 'is aligned' is true
536
+ # as well (which, in most cases, means that gaps were introduced, and that
537
+ # all sequences for which 'is aligned' is true must have the same length)
538
+ attr_reader :is_aligned
539
+
540
+ # Uri object
541
+ attr_accessor :uri
542
+ # Array of Annotation objects. Annotations of molecular sequence.
543
+ attr_accessor :annotations
544
+ # DomainArchitecture object. Describes domain architecture of a protein.
545
+ attr_accessor :domain_architecture
546
+
547
+ # Array of Other objects. Used to save additional information from other than
548
+ # PhyloXML namspace.
549
+ attr_accessor :other
550
+
551
+ def initialize
552
+ @annotations = []
553
+ @other = []
554
+ end
555
+
556
+ def is_aligned=(str)
557
+ if str=='true'
558
+ @is_aligned=true
559
+ elsif str=='false'
560
+ @is_aligned = false
561
+ else
562
+ @is_aligned = nil
563
+ end
564
+ end
565
+
566
+ def is_aligned?
567
+ @is_aligned
568
+ end
569
+
570
+ def mol_seq=(str)
571
+ if str =~ /^[a-zA-Z\.\-\?\*_]+$/
572
+ @mol_seq = str
573
+ else
574
+ raise "mol_seq element of Sequence does not follow the pattern."
575
+ end
576
+ end
577
+
578
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
579
+ def to_xml
580
+
581
+ seq = LibXML::XML::Node.new('sequence')
582
+ if (defined? @type) && @type
583
+ if ["dna", "rna", "protein"].include?(@type)
584
+ seq["type"] = @type
585
+ else
586
+ raise "Type attribute of Sequence has to be one of dna, rna or a."
587
+ end
588
+ end
589
+
590
+ PhyloXML::Writer.generate_xml(seq, self, [
591
+ [:attr, 'id_source'],
592
+ [:attr, 'id_ref'],
593
+ [:pattern, 'symbol', (defined? @symbol) ? @symbol : nil, Regexp.new("^\\S{1,10}$")],
594
+ [:complex, 'accession', (defined? @accession) ? @accession : nil],
595
+ [:simple, 'name', (defined? @name) ? @name : nil],
596
+ [:simple, 'location', (defined? @location) ? @location : nil]])
597
+
598
+ if (defined? @mol_seq) && @mol_seq
599
+ molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
600
+ molseq["is_aligned"] = @is_aligned.to_s if (defined? @is_aligned) && @is_aligned != nil
601
+ seq << molseq
602
+ end
603
+
604
+ PhyloXML::Writer.generate_xml(seq, self, [
605
+ #[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
606
+ [:complex, 'uri', (defined? @uri) ? @uri : nil],
607
+ [:objarr, 'annotation', 'annotations'],
608
+ [:complex, 'domain_architecture', (defined? @domain_architecture) ? @domain_architecture : nil]])
609
+ #@todo test domain_architecture
610
+ #any
611
+ return seq
612
+ end
613
+
614
+ # converts Bio::PhyloXML:Sequence to Bio::Sequence object.
615
+ # ---
616
+ # *Returns*:: Bio::Sequence
617
+ def to_biosequence
618
+ #type is not a required attribute in phyloxml (nor any other Sequence
619
+ #element) it might not hold any value, so we will not check what type it is.
620
+ seq = Bio::Sequence.auto(@mol_seq)
621
+
622
+ seq.id_namespace = @accession.source
623
+ seq.entry_id = @accession.value
624
+ # seq.primary_accession = @accession.value could be this
625
+ seq.definition = @name
626
+ #seq.comments = @name //this one?
627
+ if (defined? @uri) && @uri
628
+ h = {'url' => @uri.uri,
629
+ 'title' => @uri.desc }
630
+ ref = Bio::Reference.new(h)
631
+ seq.references << ref
632
+ end
633
+ seq.molecule_type = 'RNA' if @type == 'rna'
634
+ seq.molecule_type = 'DNA' if @type == 'dna'
635
+
636
+ #@todo deal with the properties. There might be properties which look
637
+ #like bio sequence attributes or features
638
+ return seq
639
+ end
640
+
641
+ end
642
+
643
+ # == Description
644
+ # Element Accession is used to capture the local part in a sequence
645
+ # identifier.
646
+ class Accession
647
+ #String. Source of the accession id. Example: "UniProtKB"
648
+ attr_accessor :source
649
+
650
+ #String. Value of the accession id. Example: "P17304"
651
+ attr_accessor :value
652
+
653
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
654
+ def to_xml
655
+ raise "Source attribute is required for Accession object." if @source == nil
656
+ accession = LibXML::XML::Node.new('accession', @value)
657
+ accession['source'] = @source
658
+ return accession
659
+ end
660
+
661
+ end
662
+
663
+ # A uniform resource identifier. In general, this is expected to be an URL
664
+ # (for example, to link to an image on a website, in which case the 'type'
665
+ # attribute might be 'image' and 'desc' might be 'image of a California
666
+ # sea hare')
667
+ class Uri
668
+ # String. Description of the uri. For example, image of a California sea hare'
669
+ attr_accessor :desc
670
+ # String. For example, image.
671
+ attr_accessor :type
672
+ # String. URL of the resource.
673
+ attr_accessor :uri
674
+
675
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
676
+ def to_xml
677
+ if @uri != nil
678
+ xml_node = LibXML::XML::Node.new('uri', @uri)
679
+ Writer.generate_xml(xml_node, self, [
680
+ [:attr, 'desc'],
681
+ [:attr, 'type']])
682
+ return xml_node
683
+ end
684
+ end
685
+ end
686
+
687
+ # == Description
688
+ #
689
+ # The annotation of a molecular sequence. It is recommended to annotate by
690
+ # using the optional 'ref' attribute (some examples of acceptable values
691
+ # for the ref attribute: 'GO:0008270', 'KEGG:Tetrachloroethene degradation',
692
+ # 'EC:1.1.1.1').
693
+ class Annotation
694
+ # String. For example, 'GO:0008270', 'KEGG:Tetrachloroethene degradation',
695
+ # 'EC:1.1.1.1'
696
+ attr_accessor :ref
697
+ # String
698
+ attr_accessor :source
699
+ # String. evidence for a annotation as free text (e.g. 'experimental')
700
+ attr_accessor :evidence
701
+ # String. Type of the annotation.
702
+ attr_accessor :type
703
+ # String. Free text description.
704
+ attr_accessor :desc
705
+ # Confidence object. Type and value of support for a annotation.
706
+ attr_accessor :confidence
707
+ # Array of Property objects. Allows for further, typed and referenced
708
+ # annotations from external resources
709
+ attr_accessor :properties
710
+ # Uri object.
711
+ attr_accessor :uri
712
+
713
+ def initialize
714
+ #@todo add unit test for this, since didn't break anything when changed from property to properties
715
+ @properties = []
716
+ end
717
+
718
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
719
+ def to_xml
720
+ annot = LibXML::XML::Node.new('annotation')
721
+ annot["ref"] = @ref if (defined? @ref) && @ref
722
+ PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', (defined? @desc) ? @desc : nil],
723
+ [:complex, 'confidence', (defined? @confidence) ? @confidence : nil],
724
+ [:objarr, 'property', 'properties'],
725
+ [:complex, 'uri', (defined? @uri) ? @uri : nil]])
726
+ return annot
727
+ end
728
+ end
729
+
730
+ class Id
731
+ # The provider of Id, for example, NCBI.
732
+ attr_accessor :provider
733
+ # The value of Id.
734
+ attr_accessor :value
735
+
736
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
737
+ def to_xml
738
+ xml_node = LibXML::XML::Node.new('id', @value)
739
+ xml_node["provider"] = @provider if @provider != nil
740
+ return xml_node
741
+ end
742
+ end
743
+
744
+ # == Description
745
+ # This indicates the color of a node when rendered (the color applies
746
+ # to the whole node and its children unless overwritten by the
747
+ # color(s) of sub clades).
748
+ class BranchColor
749
+ #Integer
750
+ attr_reader :red, :green, :blue
751
+
752
+ def red=(str)
753
+ @red = str.to_i
754
+ end
755
+
756
+ def green=(str)
757
+ @green = str.to_i
758
+ end
759
+
760
+ def blue=(str)
761
+ @blue = str.to_i
762
+ end
763
+
764
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
765
+ def to_xml
766
+ #@todo add unit test
767
+ if @red == nil
768
+ raise "Subelement red of BranchColor element should not be nil"
769
+ elsif @green == nil
770
+ raise "Subelement green of BranchColor element should not be nil"
771
+ elsif @blue == nil
772
+ raise "Subelement blue of BranchColor element should not be nil"
773
+ end
774
+
775
+ c = LibXML::XML::Node.new('branch_color')
776
+ PhyloXML::Writer.generate_xml(c, self, [
777
+ [:simple, 'red', @red],
778
+ [:simple, 'green', @green],
779
+ [:simple, 'blue', @blue]])
780
+ return c
781
+ end
782
+
783
+ end
784
+
785
+ # == Description
786
+ # A date associated with a clade/node. Its value can be numerical by
787
+ # using the 'value' element and/or free text with the 'desc' element'
788
+ # (e.g. 'Silurian'). If a numerical value is used, it is recommended to
789
+ # employ the 'unit' attribute to indicate the type of the numerical
790
+ # value (e.g. 'mya' for 'million years ago').
791
+ class Date
792
+ # String. Units in which value is stored.
793
+ attr_accessor :unit
794
+
795
+ # Free text description of the date.
796
+ attr_accessor :desc
797
+
798
+ # Integer. Minimum and maximum of the value.
799
+ attr_reader :minimum, :maximum
800
+
801
+ # Integer. Value of the date.
802
+ attr_reader :value
803
+
804
+ def minimum=(str)
805
+ @minimum = str.to_i
806
+ end
807
+
808
+ def maximum=(str)
809
+ @maximum = str.to_i
810
+ end
811
+
812
+ def value= (str)
813
+ @value = str.to_i
814
+ end
815
+
816
+ # Returns value + unit, for exampe "7 mya"
817
+ def to_s
818
+ return "#{value} #{unit}"
819
+ end
820
+
821
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
822
+ def to_xml
823
+ date = LibXML::XML::Node.new('date')
824
+ PhyloXML::Writer.generate_xml(date, self, [
825
+ [:attr, 'unit'],
826
+ [:simple, 'desc', (defined? @desc) ? @desc : nil],
827
+ [:simple, 'value', (defined? @value) ? @value : nil],
828
+ [:simple, 'minimum', (defined? @minimum) ? @minimum : nil],
829
+ [:simple, 'maximum', (defined? @maximum) ? @maximum : nil]])
830
+ return date
831
+ end
832
+
833
+ end
834
+
835
+ # == Description
836
+ # This is used describe the domain architecture of a protein. Attribute
837
+ # 'length' is the total length of the protein
838
+ class DomainArchitecture
839
+ # Integer. Total length of the protein
840
+ attr_reader :length
841
+
842
+ # Array of ProteinDomain objects.
843
+ attr_reader :domains
844
+
845
+ # Integer. Total length of the protein
846
+ def length=(str)
847
+ @length = str.to_i
848
+ end
849
+
850
+ def initialize
851
+ @domains = []
852
+ end
853
+
854
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
855
+ def to_xml
856
+ xml_node = LibXML::XML::Node.new('domain_architecture')
857
+ PhyloXML::Writer.generate_xml(xml_node, self,[
858
+ [:attr, 'length'],
859
+ [:objarr, 'domain', 'domains']])
860
+ return xml_node
861
+ end
862
+ end
863
+
864
+
865
+ # == Description
866
+ # To represent an individual domain in a domain architecture. The
867
+ # name/unique identifier is described via the 'id' attribute.
868
+ class ProteinDomain
869
+ #Float, for example to store E-values 4.7E-14
870
+ attr_reader :confidence
871
+
872
+ # String
873
+ attr_accessor :id, :value
874
+
875
+ # Integer. Beginning of the domain.
876
+ attr_reader :from
877
+
878
+ # Integer. End of the domain.
879
+ attr_reader :to
880
+
881
+ # Integer. Beginning of the domain.
882
+ def from=(str)
883
+ @from = str.to_i
884
+ end
885
+
886
+ # Integer. End of the domain.
887
+ def to=(str)
888
+ @to = str.to_i
889
+ end
890
+
891
+ #Float, for example to store E-values 4.7E-14
892
+ def confidence=(str)
893
+ @confidence = str.to_f
894
+ end
895
+
896
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
897
+ def to_xml
898
+ if @from == nil
899
+ raise "from attribute of ProteinDomain class is required."
900
+ elsif @to == nil
901
+ raise "to attribute of ProteinDomain class is required."
902
+ else
903
+ xml_node = LibXML::XML::Node.new('domain', @value)
904
+ xml_node["from"] = @from.to_s
905
+ xml_node["to"] = @to.to_s
906
+ xml_node["id"] = @id if (defined? @id) && @id
907
+ xml_node["confidence"] = @confidence.to_s
908
+
909
+ return xml_node
910
+ end
911
+
912
+ end
913
+
914
+ end
915
+
916
+
917
+ #Property allows for typed and referenced properties from external resources
918
+ #to be attached to 'Phylogeny', 'Clade', and 'Annotation'. The value of a
919
+ #property is its mixed (free text) content. Attribute 'datatype' indicates
920
+ #the type of a property and is limited to xsd-datatypes (e.g. 'xsd:string',
921
+ #'xsd:boolean', 'xsd:integer', 'xsd:decimal', 'xsd:float', 'xsd:double',
922
+ #'xsd:date', 'xsd:anyURI'). Attribute 'applies_to' indicates the item to
923
+ #which a property applies to (e.g. 'node' for the parent node of a clade,
924
+ #'parent_branch' for the parent branch of a clade). Attribute 'id_ref' allows
925
+ #to attached a property specifically to one element (on the xml-level).
926
+ #Optional attribute 'unit' is used to indicate the unit of the property.
927
+ #An example: <property datatype="xsd:integer" ref="NOAA:depth" applies_to="clade" unit="METRIC:m"> 200 </property>
928
+ class Property
929
+ # String
930
+ attr_accessor :ref, :unit, :id_ref, :value
931
+
932
+ # String
933
+ attr_reader :datatype, :applies_to
934
+
935
+ def datatype=(str)
936
+ #@todo add unit test or maybe remove, if assume that xml is valid.
937
+ unless ['xsd:string','xsd:boolean','xsd:decimal','xsd:float','xsd:double',
938
+ 'xsd:duration','xsd:dateTime','xsd:time','xsd:date','xsd:gYearMonth',
939
+ 'xsd:gYear','xsd:gMonthDay','xsd:gDay','xsd:gMonth','xsd:hexBinary',
940
+ 'xsd:base64Binary','xsd:anyURI','xsd:normalizedString','xsd:token',
941
+ 'xsd:integer','xsd:nonPositiveInteger','xsd:negativeInteger',
942
+ 'xsd:long','xsd:int','xsd:short','xsd:byte','xsd:nonNegativeInteger',
943
+ 'xsd:unsignedLong','xsd:unsignedInt','xsd:unsignedShort',
944
+ 'xsd:unsignedByte','xsd:positiveInteger'].include?(str)
945
+ raise "Warning: #{str} is not in the list of allowed values."
946
+ end
947
+ @datatype = str
948
+ end
949
+
950
+ def applies_to=(str)
951
+ unless ['phylogeny','clade','node','annotation','parent_branch','other'].include?(str)
952
+ puts "Warning: #{str} is not in the list of allowed values."
953
+ end
954
+ @applies_to = str
955
+ end
956
+
957
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
958
+ def to_xml
959
+ #@todo write unit test for this
960
+ raise "ref is an required element of property" if @ref.nil?
961
+ raise "datatype is an required element of property" if @datatype.nil?
962
+ raise "applies_to is an required element of property" if @applies_to.nil?
963
+
964
+ property = LibXML::XML::Node.new('property')
965
+ Writer.generate_xml(property, self, [
966
+ [:attr, 'ref'],
967
+ [:attr, 'unit'],
968
+ [:attr, 'datatype'],
969
+ [:attr, 'applies_to'],
970
+ [:attr, 'id_ref']])
971
+
972
+ property << @value if @value != nil
973
+ return property
974
+ end
975
+ end
976
+
977
+ # == Description
978
+ # A literature reference for a clade. It is recommended to use the 'doi'
979
+ # attribute instead of the free text 'desc' element whenever possible.
980
+ class Reference
981
+ # String. Digital Object Identifier.
982
+ attr_accessor :doi
983
+
984
+ # String. Free text description.
985
+ attr_accessor :desc
986
+
987
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
988
+ def to_xml
989
+ ref = LibXML::XML::Node.new('reference')
990
+ Writer.generate_xml(ref, self, [
991
+ [:attr, 'doi'],
992
+ [:simple, 'desc', (defined? @desc) ? @desc : nil]])
993
+ return ref
994
+ end
995
+
996
+ end
997
+
998
+ # == Description
999
+ #
1000
+ # This is used to express a typed relationship between two clades.
1001
+ # For example it could be used to describe multiple parents of a clade.
1002
+ class CladeRelation
1003
+ # Float
1004
+ attr_reader :distance
1005
+ # String. Id of the referenced parents of a clade.
1006
+ attr_accessor :id_ref_0, :id_ref_1
1007
+ # String
1008
+ attr_accessor :type
1009
+ # Confidence object
1010
+ attr_accessor :confidence
1011
+
1012
+ # Float
1013
+ def distance=(str)
1014
+ @distance = str.to_f
1015
+ end
1016
+
1017
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1018
+ def to_xml
1019
+ if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil
1020
+ raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element."
1021
+ else
1022
+ cr = LibXML::XML::Node.new('clade_relation')
1023
+ Writer.generate_xml(cr, self, [
1024
+ [:attr, 'id_ref_0'],
1025
+ [:attr, 'id_ref_1'],
1026
+ [:attr, 'distance'],
1027
+ [:attr, 'type'],
1028
+ [:complex, 'confidence', (defined? @confidnece) ? @confidnece : nil]])
1029
+
1030
+ return cr
1031
+ end
1032
+ end
1033
+
1034
+ end
1035
+
1036
+
1037
+ # == Description
1038
+ # The names and/or counts of binary characters present, gained, and
1039
+ # lost at the root of a clade.
1040
+ class BinaryCharacters
1041
+ attr_accessor :bc_type, :gained, :lost, :present, :absent
1042
+ attr_reader :gained_count, :lost_count, :present_count, :absent_count
1043
+
1044
+ def gained_count=(str)
1045
+ @gained_count = str.to_i
1046
+ end
1047
+
1048
+ def lost_count=(str)
1049
+ @lost_count = str.to_i
1050
+ end
1051
+
1052
+ def present_count=(str)
1053
+ @present_count = str.to_i
1054
+ end
1055
+
1056
+ def absent_count=(str)
1057
+ @absent_count = str.to_i
1058
+ end
1059
+
1060
+ def initialize
1061
+ @gained = []
1062
+ @lost = []
1063
+ @present = []
1064
+ @absent = []
1065
+ end
1066
+
1067
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1068
+ def to_xml
1069
+ bc = LibXML::XML::Node.new('binary_characters')
1070
+ bc['type'] = @bc_type
1071
+ PhyloXML::Writer.generate_xml(bc, self, [
1072
+ [:attr, 'gained_count'],
1073
+ [:attr, 'lost_count'],
1074
+ [:attr, 'present_count'],
1075
+ [:attr, 'absent_count']])
1076
+
1077
+ if not @gained.empty?
1078
+ gained_xml = LibXML::XML::Node.new('gained')
1079
+ PhyloXML::Writer.generate_xml(gained_xml, self, [[:simplearr, 'bc', @gained]])
1080
+ bc << gained_xml
1081
+ end
1082
+
1083
+ if not @lost.empty?
1084
+ lost_xml = LibXML::XML::Node.new('lost')
1085
+ PhyloXML::Writer.generate_xml(lost_xml, self, [[:simplearr, 'bc', @lost]])
1086
+ bc << lost_xml
1087
+ end
1088
+
1089
+ if not @present.empty?
1090
+ present_xml = LibXML::XML::Node.new('present')
1091
+ PhyloXML::Writer.generate_xml(present_xml, self, [[:simplearr, 'bc', @present]])
1092
+ bc << present_xml
1093
+ end
1094
+
1095
+ if not @absent.empty?
1096
+ absent_xml = LibXML::XML::Node.new('absent')
1097
+ PhyloXML::Writer.generate_xml(absent_xml, self, [[:simplearr, 'bc', @absent]])
1098
+ bc << absent_xml
1099
+ end
1100
+
1101
+ return bc
1102
+ end
1103
+
1104
+
1105
+ end
1106
+
1107
+ # == Description
1108
+ # This is used to express a typed relationship between two sequences.
1109
+ # For example it could be used to describe an orthology (in which case
1110
+ # attribute 'type' is 'orthology').
1111
+ class SequenceRelation
1112
+ # String
1113
+ attr_accessor :id_ref_0, :id_ref_1
1114
+
1115
+ # String. Allowed values: "orthology", "one_to_one_orthology",
1116
+ # "super_orthology", "paralogy", "ultra_paralogy", "xenology",
1117
+ # "unknown", "other"
1118
+ attr_reader :type
1119
+
1120
+ # Float
1121
+ attr_reader :distance
1122
+
1123
+ #@todo it has Confidences objects.
1124
+
1125
+ def distance=(str)
1126
+ @distance = str.to_f if str != nil
1127
+ end
1128
+
1129
+ # String. Allowed values: "orthology", "one_to_one_orthology",
1130
+ # "super_orthology", "paralogy", "ultra_paralogy", "xenology",
1131
+ # "unknown", "other"
1132
+ def type=(str)
1133
+ #@todo do warning instead?
1134
+ #@todo do validation at actually writing xml
1135
+ allowed_values = ["orthology", "one_to_one_orthology", "super_orthology", "paralogy",
1136
+ "ultra_paralogy", "xenology", "unknown", "other"]
1137
+ if not allowed_values.include? str
1138
+ raise "SequenceRelation#type has to be one one of #{allowed_values.join("; ")}"
1139
+ else
1140
+ @type = str
1141
+ end
1142
+ end
1143
+
1144
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1145
+ def to_xml
1146
+ if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil
1147
+ raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element."
1148
+ else
1149
+ sr = LibXML::XML::Node.new('sequence_relation')
1150
+ sr['id_ref_0'] = @id_ref_0
1151
+ sr['id_ref_1'] = @id_ref_1
1152
+ sr['distance'] = @distance.to_s if (defined? @distance) && @distance
1153
+ sr['type'] = @type
1154
+ return sr
1155
+ end
1156
+ end
1157
+
1158
+ end
1159
+
1160
+ class Other
1161
+ attr_accessor :element_name, :attributes, :children, :value
1162
+
1163
+ def initialize
1164
+ @children = []
1165
+ @attributes = Hash.new
1166
+ end
1167
+
1168
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1169
+ def to_xml
1170
+ o = LibXML::XML::Node.new(@element_name)
1171
+ @attributes.each do |key, value|
1172
+ o[key] = value
1173
+ end
1174
+ o << value if value != nil
1175
+ children.each do |child_node|
1176
+ o << child_node.to_xml
1177
+ end
1178
+ return o
1179
+ end
1180
+
1181
+ end
1182
+
1183
+
1184
+ end #module PhyloXML
1185
+
1186
+ end #end module Bio