bioruby-phyloxml 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1186 @@
1
+ #
2
+ # = bio/db/phyloxml_elements.rb - PhyloXML Element classes
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+ # == Description
10
+ #
11
+ # This file containts the classes to represent PhyloXML elements.
12
+ #
13
+ # == References
14
+ #
15
+ # * http://www.phyloxml.org
16
+ #
17
+ # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
18
+
19
+ require 'bio/tree'
20
+ require 'bio/sequence'
21
+ require 'bio/reference'
22
+
23
+ require 'libxml'
24
+
25
+ module Bio
26
+
27
+ module PhyloXML
28
+
29
+ # Taxonomy class for PhyloXML
30
+ class Taxonomy
31
+ #pattern = [a-zA-Z0-9_]{2,10} Can refer to any code/abbreviation/mnemonic, such as Bsu for Bacillus subtilis.
32
+ attr_accessor :code
33
+
34
+ # String.
35
+ attr_accessor :scientific_name
36
+ # An array of strings
37
+ attr_accessor :common_names
38
+
39
+ # value comes from list: domain kingdom, subkingdom, branch, infrakingdom,
40
+ # superphylum, phylum, subphylum, infraphylum, microphylum, superdivision,
41
+ # division, subdivision, infradivision, superclass, class, subclass,
42
+ # infraclass, superlegion, legion, sublegion, infralegion, supercohort,
43
+ # cohort, subcohort, infracohort, superorder, order, suborder,
44
+ # superfamily, family, subfamily, supertribe, tribe, subtribe, infratribe,
45
+ # genus, subgenus, superspecies, species, subspecies, variety, subvariety,
46
+ # form, subform, cultivar, unknown, other
47
+ attr_accessor :rank
48
+
49
+ # is used to keep the authority, such as 'J. G. Cooper, 1863', associated with the 'scientific_name'.
50
+ attr_accessor :authority
51
+
52
+ # An array of strings. Holds synonyms for scientific names or common names.
53
+ attr_accessor :synonyms
54
+
55
+ # creates a new Bio::PhyloXML::Taxonomy object.
56
+ def initialize
57
+ @common_names = []
58
+ @synonyms = []
59
+
60
+ # below attributes may be PhyloXML specific.
61
+ @other = []
62
+ end
63
+
64
+ #---
65
+ # Attributes and methods below may be PhyloXML specific.
66
+ #+++
67
+
68
+ # String. Unique identifier of a taxon.
69
+ attr_accessor :taxonomy_id
70
+ #Used to link other elements to a taxonomy (on the xml-level)
71
+ attr_accessor :id_source
72
+ # Uri object
73
+ attr_accessor :uri
74
+
75
+ # Array of Other objects. Used to save additional information from other than
76
+ # PhyloXML namspace.
77
+ attr_accessor :other
78
+
79
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
80
+ def to_xml
81
+ taxonomy = LibXML::XML::Node.new('taxonomy')
82
+ taxonomy["type"] = @type if (defined? @type) && @type
83
+ taxonomy["id_source"] = @id_source if (defined? @id_source) && @id_source
84
+
85
+ PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', (defined? @taxonomy_id) ? @taxonomy_id : nil],
86
+ [:pattern, 'code', (defined? @code) ? @code : nil, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
87
+ [:simple, 'scientific_name', (defined? @scientific_name) ? @scientific_name : nil],
88
+ [:simple, 'authority', (defined? @authority) ? @authority : nil],
89
+ [:simplearr, 'common_name', (defined? @common_names) ? @common_names : nil],
90
+ [:simplearr, 'synonym', (defined? @synonyms) ? @synonyms : nil],
91
+ [:simple, 'rank', (defined? @rank) ? @rank : nil],
92
+ [:complex, 'uri',(defined? @uri) ? @uri : nil]])
93
+ #@todo anything else
94
+
95
+
96
+ return taxonomy
97
+ end
98
+
99
+ end
100
+
101
+ # Object to hold one phylogeny element (and its subelements.) Extended version of Bio::Tree.
102
+ class Tree < Bio::Tree
103
+ # String. Name of tree (name subelement of phylogeny element).
104
+ attr_accessor :name
105
+
106
+ # Id object.
107
+ attr_accessor :phylogeny_id
108
+
109
+ # String. Description of tree.
110
+ attr_accessor :description
111
+
112
+ # Boolean. Can be used to indicate that the phylogeny is not allowed to be rooted differently (i.e. because it is associated with root dependent data, such as gene duplications).
113
+ attr_accessor :rerootable
114
+
115
+ # Boolean. Required element.
116
+ attr_accessor :rooted
117
+
118
+ # Array of Property object. Allows for typed and referenced properties from external resources to be attached.
119
+ attr_accessor :properties
120
+
121
+ # CladeRelation object. This is used to express a typed relationship between two clades. For example it could be used to describe multiple parents of a clade.
122
+ attr_accessor :clade_relations
123
+
124
+ # SequenceRelation object. This is used to express a typed relationship between two sequences. For example it could be used to describe an orthology.
125
+ attr_accessor :sequence_relations
126
+
127
+ # Array of confidence object
128
+ attr_accessor :confidences
129
+
130
+ # String.
131
+ attr_accessor :branch_length_unit
132
+
133
+ # String. Indicate the type of phylogeny (i.e. 'gene tree').
134
+ attr_accessor :type
135
+
136
+ # String. Date
137
+ attr_accessor :date
138
+
139
+ # Array of Other objects. Used to save additional information from other than
140
+ # PhyloXML namspace.
141
+ attr_accessor :other
142
+
143
+ def initialize
144
+ super
145
+ @sequence_relations = []
146
+ @clade_relations = []
147
+ @confidences = []
148
+ @properties = []
149
+ @other = []
150
+ end
151
+
152
+ end
153
+
154
+
155
+ # == Description
156
+ # Class to hold clade element of phyloXML.
157
+ class Node
158
+
159
+ # Events at the root node of a clade (e.g. one gene duplication).
160
+ attr_accessor :events
161
+
162
+ # String. Used to link other elements to a clade (node) (on the xml-level).
163
+ attr_accessor :id_source
164
+
165
+ # String. Name of the node.
166
+ attr_accessor :name
167
+
168
+ # Float. Branch width for this node (including parent branch). Applies for the whole clade unless overwritten in sub-clades.
169
+ attr_reader :width
170
+
171
+ def width=(str)
172
+ @width = str.to_f
173
+ end
174
+
175
+ # Array of Taxonomy objects. Describes taxonomic information for a clade.
176
+ attr_accessor :taxonomies
177
+
178
+ # Array of Confidence objects. Indicates the support for a clade/parent branch.
179
+ attr_accessor :confidences
180
+
181
+ # BranchColor object. Apply for the whole clade unless overwritten in sub-clade.
182
+ attr_accessor :color
183
+
184
+ # Id object
185
+ attr_accessor :node_id
186
+
187
+ # Array of Sequence objects. Represents a molecular sequence (Protein, DNA, RNA) associated with a node.
188
+ attr_accessor :sequences
189
+
190
+ # BinaryCharacters object. The names and/or counts of binary characters present, gained, and lost at the root of a clade.
191
+ attr_accessor :binary_characters
192
+
193
+ # Array of Distribution objects. The geographic distribution of the items of a clade (species, sequences), intended for phylogeographic applications.
194
+ attr_accessor :distributions
195
+
196
+ # Date object. A date associated with a clade/node.
197
+ attr_accessor :date
198
+
199
+ #Array of Reference objects. A literature reference for a clade.
200
+ attr_accessor :references
201
+
202
+ #An array of Property objects, for example depth for sea animals.
203
+ attr_accessor :properties
204
+
205
+ # Array of Other objects. Used to save additional information from other than
206
+ # PhyloXML namspace.
207
+ attr_accessor :other
208
+
209
+ def initialize
210
+ @confidences = []
211
+ @sequences = []
212
+ @taxonomies = []
213
+ @distributions = []
214
+ @references = []
215
+ @properties = []
216
+ @other = []
217
+ end
218
+
219
+
220
+ # Converts to a Bio::Tree::Node object. If it contains several taxonomies
221
+ # Bio::Tree::Node#scientific name will get the scientific name of the first
222
+ # taxonomy.
223
+ #
224
+ # If there are several confidence values, the first with bootstrap type will
225
+ # be returned as Bio::Tree::Node#bootstrap
226
+ #
227
+ # tree = phyloxmlparser.next_tree
228
+ #
229
+ # node = tree.get_node_by_name("A").to_biotreenode
230
+ #
231
+ # ---
232
+ # *Returns*:: Bio::Tree::Node
233
+ def to_biotreenode
234
+ node = Bio::Tree::Node.new
235
+ node.name = @name
236
+ node.scientific_name = @taxonomies[0].scientific_name if not @taxonomies.empty?
237
+ #@todo what if there are more?
238
+ node.taxonomy_id = @taxonomies[0].taxononmy_id if @taxonomies[0] != nil
239
+
240
+ if not @confidences.empty?
241
+ @confidences.each do |confidence|
242
+ if confidence.type == "bootstrap"
243
+ node.bootstrap = confidence.value
244
+ break
245
+ end
246
+ end
247
+ end
248
+ return node
249
+ end
250
+
251
+ # Extracts the relevant information from node (specifically taxonomy and
252
+ # sequence) to create Bio::Sequence object. Node can have several sequences,
253
+ # so parameter to this method is to specify which sequence to extract.
254
+ #
255
+ # ---
256
+ # *Returns*:: Bio::Sequence
257
+ def extract_biosequence(seq_i=0)
258
+
259
+ seq = @sequences[seq_i].to_biosequence
260
+ seq.classification = []
261
+ @taxonomies.each do |t|
262
+ seq.classification << t.scientific_name
263
+ if t.rank == "species"
264
+ seq.species = t.scientific_name
265
+ end
266
+ end
267
+
268
+ #seq.division => .. http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2
269
+ # It doesn't seem there is anything in PhyloXML corresponding to this.
270
+
271
+ return seq
272
+ end
273
+
274
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
275
+ def to_xml(branch_length, write_branch_length_as_subelement)
276
+ clade = LibXML::XML::Node.new('clade')
277
+
278
+ PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', (defined? @name) ? @name : nil]])
279
+
280
+ if branch_length != nil
281
+ if write_branch_length_as_subelement
282
+ clade << LibXML::XML::Node.new('branch_length', branch_length.to_s)
283
+ else
284
+ clade["branch_length"] = branch_length.to_s
285
+ end
286
+ end
287
+
288
+ #generate all elements, except clade
289
+ PhyloXML::Writer.generate_xml(clade, self, [
290
+ [:attr, "id_source"],
291
+ [:objarr, 'confidence', 'confidences'],
292
+ [:simple, 'width', (defined? @width) ? @width : nil],
293
+ [:complex, 'branch_color', (defined? @branch_color) ? @branch_color : nil],
294
+ [:simple, 'node_id', (defined? @node_id) ? @node_id : nil],
295
+ [:objarr, 'taxonomy', 'taxonomies'],
296
+ [:objarr, 'sequence', 'sequences'],
297
+ [:complex, 'events', (defined? @events) ? @events : nil],
298
+ [:complex, 'binary_characters', (defined? @binary_characters) ? @binary_characters : nil],
299
+ [:objarr, 'distribution', 'distributions'],
300
+ [:complex, 'date', (defined? @date) ? @date : nil],
301
+ [:objarr, 'reference', 'references'],
302
+ [:objarr, 'propery', 'properties']])
303
+
304
+ return clade
305
+ end
306
+
307
+ end #Node
308
+
309
+ # == Description
310
+ # Events at the root node of a clade (e.g. one gene duplication).
311
+ class Events
312
+ #value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
313
+ attr_reader :type
314
+
315
+ # Integer
316
+ attr_reader :duplications, :speciations, :losses
317
+
318
+ # Confidence object
319
+ attr_reader :confidence
320
+
321
+ #---
322
+ #def confidence=(type, value)
323
+ # @confidence = Confidence.new(type, value)
324
+ #end
325
+ #+++
326
+
327
+ # Confidence object
328
+ def confidence=(conf)
329
+ @confidence = conf
330
+ end
331
+
332
+ # Integer
333
+ def duplications=(str)
334
+ @duplications = str.to_i
335
+ end
336
+
337
+ # Integer
338
+ def losses=(str)
339
+ @losses = str.to_i
340
+ end
341
+
342
+ # Integer
343
+ def speciations=(str)
344
+ @speciations=str.to_i
345
+ end
346
+
347
+ #value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
348
+ def type=(str)
349
+ @type = str
350
+ #@todo add unit test for this
351
+ if not ['transfer','fusion','speciation_or_duplication','other','mixed', 'unassigned'].include?(str)
352
+ raise "Warning #{str} is not one of the allowed values"
353
+ end
354
+ end
355
+
356
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
357
+ def to_xml
358
+ #@todo add unit test
359
+ events = LibXML::XML::Node.new('events')
360
+ PhyloXML::Writer.generate_xml(events, self, [
361
+ [:simple, 'type', (defined? @type) ? @type : nil],
362
+ [:simple, 'duplications', (defined? @duplications) ? @duplications : nil],
363
+ [:simple, 'speciations', (defined? @speciations) ? @speciations : nil],
364
+ [:simple, 'losses', (defined? @losses) ? @losses : nil],
365
+ [:complex, 'confidence', (defined? @confidence) ? @confidence : nil]])
366
+ return events
367
+ end
368
+
369
+ end
370
+
371
+ # A general purpose confidence element. For example this can be used to express
372
+ # the bootstrap support value of a clade (in which case the 'type' attribute
373
+ # is 'bootstrap').
374
+ class Confidence
375
+ # String. The type of confidence measure, for example, bootstrap.
376
+ attr_accessor :type
377
+ # Float. The value of confidence measure.
378
+ attr_accessor :value
379
+
380
+ def initialize(type, value)
381
+ @type = type
382
+ @value = value.to_f
383
+ end
384
+
385
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
386
+ def to_xml
387
+ if @type == nil
388
+ raise "Type is a required attribute for confidence."
389
+ else
390
+ confidence = LibXML::XML::Node.new('confidence', @value.to_s)
391
+ confidence["type"] = @type
392
+ return confidence
393
+ end
394
+ end
395
+ end
396
+
397
+ # == Description
398
+ #
399
+ # The geographic distribution of the items of a clade (species, sequences),
400
+ # intended for phylogeographic applications.
401
+ class Distribution
402
+ # String. Free text description of location.
403
+ attr_accessor :desc
404
+ # Array of Point objects. Holds coordinates of the location.
405
+ attr_accessor :points
406
+ # Array of Polygon objects.
407
+ attr_accessor :polygons
408
+
409
+ def initialize
410
+ @points = []
411
+ @polygons = []
412
+ end
413
+
414
+
415
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
416
+ def to_xml
417
+ distr = LibXML::XML::Node.new('distribution')
418
+ PhyloXML::Writer.generate_xml(distr, self, [
419
+ [:simple, 'desc', @desc],
420
+ [:objarr, 'point', 'points'],
421
+ [:objarr, 'polygon', 'polygons']])
422
+ return distr
423
+ end
424
+
425
+ end #Distribution class
426
+
427
+
428
+ # == Description
429
+ #
430
+ # The coordinates of a point with an optional altitude. Required attribute
431
+ # 'geodetic_datum' is used to indicate the geodetic datum (also called
432
+ # 'map datum'), for example Google's KML uses 'WGS84'.
433
+ class Point
434
+ # Float. Latitude
435
+ attr_reader :lat
436
+
437
+ # Float. Longitute
438
+ attr_reader :long
439
+
440
+ # Float. Altitude
441
+ attr_reader :alt
442
+
443
+ # String. Altitude unit.
444
+ attr_accessor :alt_unit
445
+
446
+ # Geodedic datum / map datum
447
+ attr_accessor :geodetic_datum
448
+
449
+ # Float. Latitude
450
+ def lat=(str)
451
+ @lat = str.to_f unless str.nil?
452
+ end
453
+
454
+ # Float. Longitute
455
+ def long=(str)
456
+ @long = str.to_f unless str.nil?
457
+ end
458
+
459
+ # Float. Altitude
460
+ def alt=(str)
461
+ @alt = str.to_f unless str.nil?
462
+ end
463
+
464
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
465
+ def to_xml
466
+ raise "Geodedic datum is a required attribute of Point element." if @geodetic_datum.nil?
467
+
468
+ p = LibXML::XML::Node.new('point')
469
+ p["geodetic_datum"] = @geodetic_datum
470
+ p["alt_unit"] = @alt_unit if @alt_unit != nil
471
+ PhyloXML::Writer.generate_xml(p, self, [
472
+ [:simple, 'lat', @lat],
473
+ [:simple, 'long', @long],
474
+ [:simple, 'alt', @alt]])
475
+ return p
476
+ #@todo check if characters are correctly generated, like Zuric
477
+ end
478
+
479
+ end
480
+
481
+
482
+ # == Description
483
+ #
484
+ # A polygon defined by a list of Points objects.
485
+ class Polygon
486
+ # Array of Point objects.
487
+ attr_accessor :points
488
+
489
+ def initialize
490
+ @points = []
491
+ end
492
+
493
+
494
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
495
+ def to_xml
496
+ if @points.length > 2
497
+ pol = LibXML::XML::Node.new('polygon')
498
+ @points.each do |p|
499
+ pol << p.to_xml
500
+ end
501
+ return pol
502
+ end
503
+ end
504
+
505
+
506
+ end
507
+
508
+ # == Description
509
+ # Element Sequence is used to represent a molecular sequence (Protein, DNA,
510
+ # RNA) associated with a node.
511
+ class Sequence
512
+ # Type of sequence (rna, dna, protein)
513
+ attr_accessor :type
514
+
515
+ # Full name (e.g. muscle Actin )
516
+ attr_accessor :name
517
+
518
+ # String. Used to link with other elements.
519
+ attr_accessor :id_source
520
+
521
+ # String. One intended use for 'id_ref' is to link a sequence to a taxonomy
522
+ # (via the taxonomy's 'id_source') in the case of multiple sequences and taxonomies per node.
523
+ attr_accessor :id_ref
524
+
525
+ # short (maximal ten characters) symbol of the sequence (e.g. 'ACTM')
526
+ attr_accessor :symbol
527
+ # Accession object. Holds source and identifier for the sequence.
528
+ attr_accessor :accession
529
+ # String. Location of a sequence on a genome/chromosome
530
+ attr_accessor :location
531
+ # String. The actual sequence is stored here.
532
+ attr_reader :mol_seq
533
+
534
+ # Boolean. used to indicated that this molecular sequence is aligned with
535
+ # all other sequences in the same phylogeny for which 'is aligned' is true
536
+ # as well (which, in most cases, means that gaps were introduced, and that
537
+ # all sequences for which 'is aligned' is true must have the same length)
538
+ attr_reader :is_aligned
539
+
540
+ # Uri object
541
+ attr_accessor :uri
542
+ # Array of Annotation objects. Annotations of molecular sequence.
543
+ attr_accessor :annotations
544
+ # DomainArchitecture object. Describes domain architecture of a protein.
545
+ attr_accessor :domain_architecture
546
+
547
+ # Array of Other objects. Used to save additional information from other than
548
+ # PhyloXML namspace.
549
+ attr_accessor :other
550
+
551
+ def initialize
552
+ @annotations = []
553
+ @other = []
554
+ end
555
+
556
+ def is_aligned=(str)
557
+ if str=='true'
558
+ @is_aligned=true
559
+ elsif str=='false'
560
+ @is_aligned = false
561
+ else
562
+ @is_aligned = nil
563
+ end
564
+ end
565
+
566
+ def is_aligned?
567
+ @is_aligned
568
+ end
569
+
570
+ def mol_seq=(str)
571
+ if str =~ /^[a-zA-Z\.\-\?\*_]+$/
572
+ @mol_seq = str
573
+ else
574
+ raise "mol_seq element of Sequence does not follow the pattern."
575
+ end
576
+ end
577
+
578
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
579
+ def to_xml
580
+
581
+ seq = LibXML::XML::Node.new('sequence')
582
+ if (defined? @type) && @type
583
+ if ["dna", "rna", "protein"].include?(@type)
584
+ seq["type"] = @type
585
+ else
586
+ raise "Type attribute of Sequence has to be one of dna, rna or a."
587
+ end
588
+ end
589
+
590
+ PhyloXML::Writer.generate_xml(seq, self, [
591
+ [:attr, 'id_source'],
592
+ [:attr, 'id_ref'],
593
+ [:pattern, 'symbol', (defined? @symbol) ? @symbol : nil, Regexp.new("^\\S{1,10}$")],
594
+ [:complex, 'accession', (defined? @accession) ? @accession : nil],
595
+ [:simple, 'name', (defined? @name) ? @name : nil],
596
+ [:simple, 'location', (defined? @location) ? @location : nil]])
597
+
598
+ if (defined? @mol_seq) && @mol_seq
599
+ molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
600
+ molseq["is_aligned"] = @is_aligned.to_s if (defined? @is_aligned) && @is_aligned != nil
601
+ seq << molseq
602
+ end
603
+
604
+ PhyloXML::Writer.generate_xml(seq, self, [
605
+ #[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
606
+ [:complex, 'uri', (defined? @uri) ? @uri : nil],
607
+ [:objarr, 'annotation', 'annotations'],
608
+ [:complex, 'domain_architecture', (defined? @domain_architecture) ? @domain_architecture : nil]])
609
+ #@todo test domain_architecture
610
+ #any
611
+ return seq
612
+ end
613
+
614
+ # converts Bio::PhyloXML:Sequence to Bio::Sequence object.
615
+ # ---
616
+ # *Returns*:: Bio::Sequence
617
+ def to_biosequence
618
+ #type is not a required attribute in phyloxml (nor any other Sequence
619
+ #element) it might not hold any value, so we will not check what type it is.
620
+ seq = Bio::Sequence.auto(@mol_seq)
621
+
622
+ seq.id_namespace = @accession.source
623
+ seq.entry_id = @accession.value
624
+ # seq.primary_accession = @accession.value could be this
625
+ seq.definition = @name
626
+ #seq.comments = @name //this one?
627
+ if (defined? @uri) && @uri
628
+ h = {'url' => @uri.uri,
629
+ 'title' => @uri.desc }
630
+ ref = Bio::Reference.new(h)
631
+ seq.references << ref
632
+ end
633
+ seq.molecule_type = 'RNA' if @type == 'rna'
634
+ seq.molecule_type = 'DNA' if @type == 'dna'
635
+
636
+ #@todo deal with the properties. There might be properties which look
637
+ #like bio sequence attributes or features
638
+ return seq
639
+ end
640
+
641
+ end
642
+
643
+ # == Description
644
+ # Element Accession is used to capture the local part in a sequence
645
+ # identifier.
646
+ class Accession
647
+ #String. Source of the accession id. Example: "UniProtKB"
648
+ attr_accessor :source
649
+
650
+ #String. Value of the accession id. Example: "P17304"
651
+ attr_accessor :value
652
+
653
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
654
+ def to_xml
655
+ raise "Source attribute is required for Accession object." if @source == nil
656
+ accession = LibXML::XML::Node.new('accession', @value)
657
+ accession['source'] = @source
658
+ return accession
659
+ end
660
+
661
+ end
662
+
663
+ # A uniform resource identifier. In general, this is expected to be an URL
664
+ # (for example, to link to an image on a website, in which case the 'type'
665
+ # attribute might be 'image' and 'desc' might be 'image of a California
666
+ # sea hare')
667
+ class Uri
668
+ # String. Description of the uri. For example, image of a California sea hare'
669
+ attr_accessor :desc
670
+ # String. For example, image.
671
+ attr_accessor :type
672
+ # String. URL of the resource.
673
+ attr_accessor :uri
674
+
675
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
676
+ def to_xml
677
+ if @uri != nil
678
+ xml_node = LibXML::XML::Node.new('uri', @uri)
679
+ Writer.generate_xml(xml_node, self, [
680
+ [:attr, 'desc'],
681
+ [:attr, 'type']])
682
+ return xml_node
683
+ end
684
+ end
685
+ end
686
+
687
+ # == Description
688
+ #
689
+ # The annotation of a molecular sequence. It is recommended to annotate by
690
+ # using the optional 'ref' attribute (some examples of acceptable values
691
+ # for the ref attribute: 'GO:0008270', 'KEGG:Tetrachloroethene degradation',
692
+ # 'EC:1.1.1.1').
693
+ class Annotation
694
+ # String. For example, 'GO:0008270', 'KEGG:Tetrachloroethene degradation',
695
+ # 'EC:1.1.1.1'
696
+ attr_accessor :ref
697
+ # String
698
+ attr_accessor :source
699
+ # String. evidence for a annotation as free text (e.g. 'experimental')
700
+ attr_accessor :evidence
701
+ # String. Type of the annotation.
702
+ attr_accessor :type
703
+ # String. Free text description.
704
+ attr_accessor :desc
705
+ # Confidence object. Type and value of support for a annotation.
706
+ attr_accessor :confidence
707
+ # Array of Property objects. Allows for further, typed and referenced
708
+ # annotations from external resources
709
+ attr_accessor :properties
710
+ # Uri object.
711
+ attr_accessor :uri
712
+
713
+ def initialize
714
+ #@todo add unit test for this, since didn't break anything when changed from property to properties
715
+ @properties = []
716
+ end
717
+
718
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
719
+ def to_xml
720
+ annot = LibXML::XML::Node.new('annotation')
721
+ annot["ref"] = @ref if (defined? @ref) && @ref
722
+ PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', (defined? @desc) ? @desc : nil],
723
+ [:complex, 'confidence', (defined? @confidence) ? @confidence : nil],
724
+ [:objarr, 'property', 'properties'],
725
+ [:complex, 'uri', (defined? @uri) ? @uri : nil]])
726
+ return annot
727
+ end
728
+ end
729
+
730
+ class Id
731
+ # The provider of Id, for example, NCBI.
732
+ attr_accessor :provider
733
+ # The value of Id.
734
+ attr_accessor :value
735
+
736
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
737
+ def to_xml
738
+ xml_node = LibXML::XML::Node.new('id', @value)
739
+ xml_node["provider"] = @provider if @provider != nil
740
+ return xml_node
741
+ end
742
+ end
743
+
744
+ # == Description
745
+ # This indicates the color of a node when rendered (the color applies
746
+ # to the whole node and its children unless overwritten by the
747
+ # color(s) of sub clades).
748
+ class BranchColor
749
+ #Integer
750
+ attr_reader :red, :green, :blue
751
+
752
+ def red=(str)
753
+ @red = str.to_i
754
+ end
755
+
756
+ def green=(str)
757
+ @green = str.to_i
758
+ end
759
+
760
+ def blue=(str)
761
+ @blue = str.to_i
762
+ end
763
+
764
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
765
+ def to_xml
766
+ #@todo add unit test
767
+ if @red == nil
768
+ raise "Subelement red of BranchColor element should not be nil"
769
+ elsif @green == nil
770
+ raise "Subelement green of BranchColor element should not be nil"
771
+ elsif @blue == nil
772
+ raise "Subelement blue of BranchColor element should not be nil"
773
+ end
774
+
775
+ c = LibXML::XML::Node.new('branch_color')
776
+ PhyloXML::Writer.generate_xml(c, self, [
777
+ [:simple, 'red', @red],
778
+ [:simple, 'green', @green],
779
+ [:simple, 'blue', @blue]])
780
+ return c
781
+ end
782
+
783
+ end
784
+
785
+ # == Description
786
+ # A date associated with a clade/node. Its value can be numerical by
787
+ # using the 'value' element and/or free text with the 'desc' element'
788
+ # (e.g. 'Silurian'). If a numerical value is used, it is recommended to
789
+ # employ the 'unit' attribute to indicate the type of the numerical
790
+ # value (e.g. 'mya' for 'million years ago').
791
+ class Date
792
+ # String. Units in which value is stored.
793
+ attr_accessor :unit
794
+
795
+ # Free text description of the date.
796
+ attr_accessor :desc
797
+
798
+ # Integer. Minimum and maximum of the value.
799
+ attr_reader :minimum, :maximum
800
+
801
+ # Integer. Value of the date.
802
+ attr_reader :value
803
+
804
+ def minimum=(str)
805
+ @minimum = str.to_i
806
+ end
807
+
808
+ def maximum=(str)
809
+ @maximum = str.to_i
810
+ end
811
+
812
+ def value= (str)
813
+ @value = str.to_i
814
+ end
815
+
816
+ # Returns value + unit, for exampe "7 mya"
817
+ def to_s
818
+ return "#{value} #{unit}"
819
+ end
820
+
821
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
822
+ def to_xml
823
+ date = LibXML::XML::Node.new('date')
824
+ PhyloXML::Writer.generate_xml(date, self, [
825
+ [:attr, 'unit'],
826
+ [:simple, 'desc', (defined? @desc) ? @desc : nil],
827
+ [:simple, 'value', (defined? @value) ? @value : nil],
828
+ [:simple, 'minimum', (defined? @minimum) ? @minimum : nil],
829
+ [:simple, 'maximum', (defined? @maximum) ? @maximum : nil]])
830
+ return date
831
+ end
832
+
833
+ end
834
+
835
+ # == Description
836
+ # This is used describe the domain architecture of a protein. Attribute
837
+ # 'length' is the total length of the protein
838
+ class DomainArchitecture
839
+ # Integer. Total length of the protein
840
+ attr_reader :length
841
+
842
+ # Array of ProteinDomain objects.
843
+ attr_reader :domains
844
+
845
+ # Integer. Total length of the protein
846
+ def length=(str)
847
+ @length = str.to_i
848
+ end
849
+
850
+ def initialize
851
+ @domains = []
852
+ end
853
+
854
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
855
+ def to_xml
856
+ xml_node = LibXML::XML::Node.new('domain_architecture')
857
+ PhyloXML::Writer.generate_xml(xml_node, self,[
858
+ [:attr, 'length'],
859
+ [:objarr, 'domain', 'domains']])
860
+ return xml_node
861
+ end
862
+ end
863
+
864
+
865
+ # == Description
866
+ # To represent an individual domain in a domain architecture. The
867
+ # name/unique identifier is described via the 'id' attribute.
868
+ class ProteinDomain
869
+ #Float, for example to store E-values 4.7E-14
870
+ attr_reader :confidence
871
+
872
+ # String
873
+ attr_accessor :id, :value
874
+
875
+ # Integer. Beginning of the domain.
876
+ attr_reader :from
877
+
878
+ # Integer. End of the domain.
879
+ attr_reader :to
880
+
881
+ # Integer. Beginning of the domain.
882
+ def from=(str)
883
+ @from = str.to_i
884
+ end
885
+
886
+ # Integer. End of the domain.
887
+ def to=(str)
888
+ @to = str.to_i
889
+ end
890
+
891
+ #Float, for example to store E-values 4.7E-14
892
+ def confidence=(str)
893
+ @confidence = str.to_f
894
+ end
895
+
896
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
897
+ def to_xml
898
+ if @from == nil
899
+ raise "from attribute of ProteinDomain class is required."
900
+ elsif @to == nil
901
+ raise "to attribute of ProteinDomain class is required."
902
+ else
903
+ xml_node = LibXML::XML::Node.new('domain', @value)
904
+ xml_node["from"] = @from.to_s
905
+ xml_node["to"] = @to.to_s
906
+ xml_node["id"] = @id if (defined? @id) && @id
907
+ xml_node["confidence"] = @confidence.to_s
908
+
909
+ return xml_node
910
+ end
911
+
912
+ end
913
+
914
+ end
915
+
916
+
917
+ #Property allows for typed and referenced properties from external resources
918
+ #to be attached to 'Phylogeny', 'Clade', and 'Annotation'. The value of a
919
+ #property is its mixed (free text) content. Attribute 'datatype' indicates
920
+ #the type of a property and is limited to xsd-datatypes (e.g. 'xsd:string',
921
+ #'xsd:boolean', 'xsd:integer', 'xsd:decimal', 'xsd:float', 'xsd:double',
922
+ #'xsd:date', 'xsd:anyURI'). Attribute 'applies_to' indicates the item to
923
+ #which a property applies to (e.g. 'node' for the parent node of a clade,
924
+ #'parent_branch' for the parent branch of a clade). Attribute 'id_ref' allows
925
+ #to attached a property specifically to one element (on the xml-level).
926
+ #Optional attribute 'unit' is used to indicate the unit of the property.
927
+ #An example: <property datatype="xsd:integer" ref="NOAA:depth" applies_to="clade" unit="METRIC:m"> 200 </property>
928
+ class Property
929
+ # String
930
+ attr_accessor :ref, :unit, :id_ref, :value
931
+
932
+ # String
933
+ attr_reader :datatype, :applies_to
934
+
935
+ def datatype=(str)
936
+ #@todo add unit test or maybe remove, if assume that xml is valid.
937
+ unless ['xsd:string','xsd:boolean','xsd:decimal','xsd:float','xsd:double',
938
+ 'xsd:duration','xsd:dateTime','xsd:time','xsd:date','xsd:gYearMonth',
939
+ 'xsd:gYear','xsd:gMonthDay','xsd:gDay','xsd:gMonth','xsd:hexBinary',
940
+ 'xsd:base64Binary','xsd:anyURI','xsd:normalizedString','xsd:token',
941
+ 'xsd:integer','xsd:nonPositiveInteger','xsd:negativeInteger',
942
+ 'xsd:long','xsd:int','xsd:short','xsd:byte','xsd:nonNegativeInteger',
943
+ 'xsd:unsignedLong','xsd:unsignedInt','xsd:unsignedShort',
944
+ 'xsd:unsignedByte','xsd:positiveInteger'].include?(str)
945
+ raise "Warning: #{str} is not in the list of allowed values."
946
+ end
947
+ @datatype = str
948
+ end
949
+
950
+ def applies_to=(str)
951
+ unless ['phylogeny','clade','node','annotation','parent_branch','other'].include?(str)
952
+ puts "Warning: #{str} is not in the list of allowed values."
953
+ end
954
+ @applies_to = str
955
+ end
956
+
957
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
958
+ def to_xml
959
+ #@todo write unit test for this
960
+ raise "ref is an required element of property" if @ref.nil?
961
+ raise "datatype is an required element of property" if @datatype.nil?
962
+ raise "applies_to is an required element of property" if @applies_to.nil?
963
+
964
+ property = LibXML::XML::Node.new('property')
965
+ Writer.generate_xml(property, self, [
966
+ [:attr, 'ref'],
967
+ [:attr, 'unit'],
968
+ [:attr, 'datatype'],
969
+ [:attr, 'applies_to'],
970
+ [:attr, 'id_ref']])
971
+
972
+ property << @value if @value != nil
973
+ return property
974
+ end
975
+ end
976
+
977
+ # == Description
978
+ # A literature reference for a clade. It is recommended to use the 'doi'
979
+ # attribute instead of the free text 'desc' element whenever possible.
980
+ class Reference
981
+ # String. Digital Object Identifier.
982
+ attr_accessor :doi
983
+
984
+ # String. Free text description.
985
+ attr_accessor :desc
986
+
987
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
988
+ def to_xml
989
+ ref = LibXML::XML::Node.new('reference')
990
+ Writer.generate_xml(ref, self, [
991
+ [:attr, 'doi'],
992
+ [:simple, 'desc', (defined? @desc) ? @desc : nil]])
993
+ return ref
994
+ end
995
+
996
+ end
997
+
998
+ # == Description
999
+ #
1000
+ # This is used to express a typed relationship between two clades.
1001
+ # For example it could be used to describe multiple parents of a clade.
1002
+ class CladeRelation
1003
+ # Float
1004
+ attr_reader :distance
1005
+ # String. Id of the referenced parents of a clade.
1006
+ attr_accessor :id_ref_0, :id_ref_1
1007
+ # String
1008
+ attr_accessor :type
1009
+ # Confidence object
1010
+ attr_accessor :confidence
1011
+
1012
+ # Float
1013
+ def distance=(str)
1014
+ @distance = str.to_f
1015
+ end
1016
+
1017
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1018
+ def to_xml
1019
+ if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil
1020
+ raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element."
1021
+ else
1022
+ cr = LibXML::XML::Node.new('clade_relation')
1023
+ Writer.generate_xml(cr, self, [
1024
+ [:attr, 'id_ref_0'],
1025
+ [:attr, 'id_ref_1'],
1026
+ [:attr, 'distance'],
1027
+ [:attr, 'type'],
1028
+ [:complex, 'confidence', (defined? @confidnece) ? @confidnece : nil]])
1029
+
1030
+ return cr
1031
+ end
1032
+ end
1033
+
1034
+ end
1035
+
1036
+
1037
+ # == Description
1038
+ # The names and/or counts of binary characters present, gained, and
1039
+ # lost at the root of a clade.
1040
+ class BinaryCharacters
1041
+ attr_accessor :bc_type, :gained, :lost, :present, :absent
1042
+ attr_reader :gained_count, :lost_count, :present_count, :absent_count
1043
+
1044
+ def gained_count=(str)
1045
+ @gained_count = str.to_i
1046
+ end
1047
+
1048
+ def lost_count=(str)
1049
+ @lost_count = str.to_i
1050
+ end
1051
+
1052
+ def present_count=(str)
1053
+ @present_count = str.to_i
1054
+ end
1055
+
1056
+ def absent_count=(str)
1057
+ @absent_count = str.to_i
1058
+ end
1059
+
1060
+ def initialize
1061
+ @gained = []
1062
+ @lost = []
1063
+ @present = []
1064
+ @absent = []
1065
+ end
1066
+
1067
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1068
+ def to_xml
1069
+ bc = LibXML::XML::Node.new('binary_characters')
1070
+ bc['type'] = @bc_type
1071
+ PhyloXML::Writer.generate_xml(bc, self, [
1072
+ [:attr, 'gained_count'],
1073
+ [:attr, 'lost_count'],
1074
+ [:attr, 'present_count'],
1075
+ [:attr, 'absent_count']])
1076
+
1077
+ if not @gained.empty?
1078
+ gained_xml = LibXML::XML::Node.new('gained')
1079
+ PhyloXML::Writer.generate_xml(gained_xml, self, [[:simplearr, 'bc', @gained]])
1080
+ bc << gained_xml
1081
+ end
1082
+
1083
+ if not @lost.empty?
1084
+ lost_xml = LibXML::XML::Node.new('lost')
1085
+ PhyloXML::Writer.generate_xml(lost_xml, self, [[:simplearr, 'bc', @lost]])
1086
+ bc << lost_xml
1087
+ end
1088
+
1089
+ if not @present.empty?
1090
+ present_xml = LibXML::XML::Node.new('present')
1091
+ PhyloXML::Writer.generate_xml(present_xml, self, [[:simplearr, 'bc', @present]])
1092
+ bc << present_xml
1093
+ end
1094
+
1095
+ if not @absent.empty?
1096
+ absent_xml = LibXML::XML::Node.new('absent')
1097
+ PhyloXML::Writer.generate_xml(absent_xml, self, [[:simplearr, 'bc', @absent]])
1098
+ bc << absent_xml
1099
+ end
1100
+
1101
+ return bc
1102
+ end
1103
+
1104
+
1105
+ end
1106
+
1107
+ # == Description
1108
+ # This is used to express a typed relationship between two sequences.
1109
+ # For example it could be used to describe an orthology (in which case
1110
+ # attribute 'type' is 'orthology').
1111
+ class SequenceRelation
1112
+ # String
1113
+ attr_accessor :id_ref_0, :id_ref_1
1114
+
1115
+ # String. Allowed values: "orthology", "one_to_one_orthology",
1116
+ # "super_orthology", "paralogy", "ultra_paralogy", "xenology",
1117
+ # "unknown", "other"
1118
+ attr_reader :type
1119
+
1120
+ # Float
1121
+ attr_reader :distance
1122
+
1123
+ #@todo it has Confidences objects.
1124
+
1125
+ def distance=(str)
1126
+ @distance = str.to_f if str != nil
1127
+ end
1128
+
1129
+ # String. Allowed values: "orthology", "one_to_one_orthology",
1130
+ # "super_orthology", "paralogy", "ultra_paralogy", "xenology",
1131
+ # "unknown", "other"
1132
+ def type=(str)
1133
+ #@todo do warning instead?
1134
+ #@todo do validation at actually writing xml
1135
+ allowed_values = ["orthology", "one_to_one_orthology", "super_orthology", "paralogy",
1136
+ "ultra_paralogy", "xenology", "unknown", "other"]
1137
+ if not allowed_values.include? str
1138
+ raise "SequenceRelation#type has to be one one of #{allowed_values.join("; ")}"
1139
+ else
1140
+ @type = str
1141
+ end
1142
+ end
1143
+
1144
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1145
+ def to_xml
1146
+ if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil
1147
+ raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element."
1148
+ else
1149
+ sr = LibXML::XML::Node.new('sequence_relation')
1150
+ sr['id_ref_0'] = @id_ref_0
1151
+ sr['id_ref_1'] = @id_ref_1
1152
+ sr['distance'] = @distance.to_s if (defined? @distance) && @distance
1153
+ sr['type'] = @type
1154
+ return sr
1155
+ end
1156
+ end
1157
+
1158
+ end
1159
+
1160
+ class Other
1161
+ attr_accessor :element_name, :attributes, :children, :value
1162
+
1163
+ def initialize
1164
+ @children = []
1165
+ @attributes = Hash.new
1166
+ end
1167
+
1168
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1169
+ def to_xml
1170
+ o = LibXML::XML::Node.new(@element_name)
1171
+ @attributes.each do |key, value|
1172
+ o[key] = value
1173
+ end
1174
+ o << value if value != nil
1175
+ children.each do |child_node|
1176
+ o << child_node.to_xml
1177
+ end
1178
+ return o
1179
+ end
1180
+
1181
+ end
1182
+
1183
+
1184
+ end #module PhyloXML
1185
+
1186
+ end #end module Bio