bio-phyloxml 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.travis.yml +12 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +20 -0
- data/README.md +199 -0
- data/README.rdoc +48 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/lib/bio-phyloxml.rb +12 -0
- data/lib/bio/phyloxml.rb +3 -0
- data/lib/bio/phyloxml/elements.rb +1166 -0
- data/lib/bio/phyloxml/parser.rb +1000 -0
- data/lib/bio/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/phyloxml/writer.rb +227 -0
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/helper.rb +25 -0
- data/test/unit/bio/test_phyloxml.rb +821 -0
- data/test/unit/bio/test_phyloxml_writer.rb +334 -0
- metadata +155 -0
data/.document
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.2
|
4
|
+
- 1.9.3
|
5
|
+
# - jruby-19mode # JRuby in 1.9 mode
|
6
|
+
# - rbx-19mode
|
7
|
+
# - 1.8.7
|
8
|
+
# - jruby-18mode # JRuby in 1.8 mode
|
9
|
+
# - rbx-18mode
|
10
|
+
|
11
|
+
# uncomment this line if your project needs to run something other than `rake`:
|
12
|
+
# script: bundle exec rspec spec
|
data/Gemfile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
|
4
|
+
gem "bio", "~> 1.4.2"
|
5
|
+
gem "libxml-ruby", "~> 2.3.2"
|
6
|
+
|
7
|
+
# Add dependencies to develop your gem here.
|
8
|
+
# Include everything needed to run rake, tests, features, etc.
|
9
|
+
group :development do
|
10
|
+
gem "rdoc", "~> 3.12"
|
11
|
+
gem "bundler", "~> 1.1.0"
|
12
|
+
gem "jeweler", "~> 1.8.3"
|
13
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Clayton Wheeler
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
# bio-phyloxml
|
2
|
+
|
3
|
+
[](http://travis-ci.org/csw/bioruby-phyloxml)
|
4
|
+
|
5
|
+
bio-phyloxml is a [phyloXML](http://www.phyloxml.org/) plugin for
|
6
|
+
[BioRuby](http://bioruby.open-bio.org/), an open source bioinformatics
|
7
|
+
library for Ruby.
|
8
|
+
|
9
|
+
phyloXML is an XML language for saving, analyzing and exchanging data
|
10
|
+
of annotated phylogenetic trees. The phyloXML parser in BioRuby is
|
11
|
+
implemented in Bio::PhyloXML::Parser, and its writer in
|
12
|
+
Bio::PhyloXML::Writer. More information can be found at
|
13
|
+
[phyloxml.org](http://www.phyloxml.org).
|
14
|
+
|
15
|
+
This phyloXML code has historically been part of the core BioRuby
|
16
|
+
[gem](https://github.com/bioruby/bioruby), but has been split into its
|
17
|
+
own gem as part of an effort to
|
18
|
+
[modularize](http://bioruby.open-bio.org/wiki/Plugins)
|
19
|
+
BioRuby. bio-phyloxml and many more plugins are available at
|
20
|
+
[biogems.info](http://www.biogems.info/).
|
21
|
+
|
22
|
+
This code was originally written by Diana Jaunzeikare during the
|
23
|
+
Google Summer of Code 2009 for the
|
24
|
+
[Implementing phyloXML support in BioRuby](http://informatics.nescent.org/wiki/Phyloinformatics_Summer_of_Code_2009#Implementing_phyloXML_support_in_BioRuby)
|
25
|
+
project with NESCent, mentored by Christian Zmasek et al. For details
|
26
|
+
of development, see
|
27
|
+
[github.com/latvianlinuxgirl/bioruby](https://github.com/latvianlinuxgirl/bioruby)
|
28
|
+
and the BioRuby mailing list archives.
|
29
|
+
|
30
|
+
*NOTE:* this is currently in the repackaging process and is not yet
|
31
|
+
released! Production users should use the phyloXML support provided
|
32
|
+
with BioRuby for the time being.
|
33
|
+
|
34
|
+
## Requirements
|
35
|
+
|
36
|
+
bio-phyloxml uses [libxml-ruby](http://xml4r.github.com/libxml-ruby/),
|
37
|
+
which requires several C libraries and their headers to be installed:
|
38
|
+
* `zlib`
|
39
|
+
* `libiconv`
|
40
|
+
* `libxml`
|
41
|
+
|
42
|
+
With these installed, the `bio` and `libxml-ruby` gems should be installed
|
43
|
+
|
44
|
+
```sh
|
45
|
+
gem install -r bio libxml-ruby
|
46
|
+
```
|
47
|
+
|
48
|
+
For more information see the
|
49
|
+
[libxml installer page](http://libxml.rubyforge.org/install.xml) and
|
50
|
+
the [BioRuby installation page](http://bioruby.open-bio.org/wiki/Installation).
|
51
|
+
|
52
|
+
|
53
|
+
## Installation
|
54
|
+
|
55
|
+
```sh
|
56
|
+
gem install bio-phyloxml
|
57
|
+
```
|
58
|
+
|
59
|
+
## Migration
|
60
|
+
|
61
|
+
Users who were previously using the phyloXML support in the core
|
62
|
+
BioRuby gem should be able to migrate to using this gem very
|
63
|
+
easily. Simply install the `bio-phyloxml` gem as described below, and
|
64
|
+
add `require 'bio-phyloxml'` to the relevant application code.
|
65
|
+
|
66
|
+
## Usage
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
require 'bio-phyloxml'
|
70
|
+
```
|
71
|
+
|
72
|
+
### Parsing a file
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
require 'bio-phyloxml'
|
76
|
+
|
77
|
+
# Create new phyloxml parser
|
78
|
+
phyloxml = Bio::PhyloXML::Parser.open('example.xml')
|
79
|
+
|
80
|
+
# Print the names of all trees in the file
|
81
|
+
phyloxml.each do |tree|
|
82
|
+
puts tree.name
|
83
|
+
end
|
84
|
+
```
|
85
|
+
|
86
|
+
If there are several trees in the file, you can access the one you wish by specifying its index:
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
tree = phyloxml[3]
|
90
|
+
```
|
91
|
+
You can use all Bio::Tree methods on the tree, since PhyloXML::Tree inherits from Bio::Tree. For example,
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
tree.leaves.each do |node|
|
95
|
+
puts node.name
|
96
|
+
end
|
97
|
+
```
|
98
|
+
|
99
|
+
PhyloXML files can hold additional information besides phylogenies at the end of the file. This info can be accessed through the 'other' array of the parser object.
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
phyloxml = Bio::PhyloXML::Parser.open('example.xml')
|
103
|
+
while tree = phyloxml.next_tree
|
104
|
+
# do stuff with trees
|
105
|
+
end
|
106
|
+
|
107
|
+
puts phyloxml.other
|
108
|
+
```
|
109
|
+
|
110
|
+
### Writing a file
|
111
|
+
|
112
|
+
```ruby
|
113
|
+
# Create new phyloxml writer
|
114
|
+
writer = Bio::PhyloXML::Writer.new('tree.xml')
|
115
|
+
|
116
|
+
# Write tree to the file tree.xml
|
117
|
+
writer.write(tree1)
|
118
|
+
|
119
|
+
# Add another tree to the file
|
120
|
+
writer.write(tree2)
|
121
|
+
```
|
122
|
+
|
123
|
+
### Retrieving data
|
124
|
+
|
125
|
+
Here is an example of how to retrieve the scientific name of the clades included in each tree.
|
126
|
+
|
127
|
+
```ruby
|
128
|
+
require 'bio-phyloxml'
|
129
|
+
|
130
|
+
phyloxml = Bio::PhyloXML::Parser.open('ncbi_taxonomy_mollusca.xml')
|
131
|
+
phyloxml.each do |tree|
|
132
|
+
tree.each_node do |node|
|
133
|
+
print "Scientific name: ", node.taxonomies[0].scientific_name, "\n"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
```
|
137
|
+
|
138
|
+
### Retrieving 'other' data
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
require 'bio'
|
142
|
+
|
143
|
+
phyloxml = Bio::PhyloXML::Parser.open('phyloxml_examples.xml')
|
144
|
+
while tree = phyloxml.next_tree
|
145
|
+
#do something with the trees
|
146
|
+
end
|
147
|
+
|
148
|
+
p phyloxml.other
|
149
|
+
puts "\n"
|
150
|
+
#=> output is an object representation
|
151
|
+
|
152
|
+
#Print in a readable way
|
153
|
+
puts phyloxml.other[0].to_xml, "\n"
|
154
|
+
#=>:
|
155
|
+
#
|
156
|
+
#<align:alignment xmlns:align="http://example.org/align">
|
157
|
+
# <seq name="A">acgtcgcggcccgtggaagtcctctcct</seq>
|
158
|
+
# <seq name="B">aggtcgcggcctgtggaagtcctctcct</seq>
|
159
|
+
# <seq name="C">taaatcgc--cccgtgg-agtccc-cct</seq>
|
160
|
+
#</align:alignment>
|
161
|
+
|
162
|
+
#Once we know whats there, lets output just sequences
|
163
|
+
phyloxml.other[0].children.each do |node|
|
164
|
+
puts node.value
|
165
|
+
end
|
166
|
+
#=>
|
167
|
+
#
|
168
|
+
#acgtcgcggcccgtggaagtcctctcct
|
169
|
+
#aggtcgcggcctgtggaagtcctctcct
|
170
|
+
#taaatcgc--cccgtgg-agtccc-cct
|
171
|
+
```
|
172
|
+
|
173
|
+
The API doc is online. (TODO: generate and link) For more code
|
174
|
+
examples see the test files in the source tree.
|
175
|
+
|
176
|
+
## Project home page
|
177
|
+
|
178
|
+
Information on the source tree, documentation, examples, issues and
|
179
|
+
how to contribute, see
|
180
|
+
|
181
|
+
http://github.com/csw/bioruby-phyloxml
|
182
|
+
|
183
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
184
|
+
|
185
|
+
## Cite
|
186
|
+
|
187
|
+
If you use this software, please cite one of
|
188
|
+
|
189
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
190
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
191
|
+
|
192
|
+
## Biogems.info
|
193
|
+
|
194
|
+
This Biogem is published at [#bio-phyloxml](http://biogems.info/index.html)
|
195
|
+
|
196
|
+
## Copyright
|
197
|
+
|
198
|
+
Copyright (c) 2009 Diana Jaunzeikare. See LICENSE.txt for further details.
|
199
|
+
|
data/README.rdoc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
= bio-phyloxml
|
2
|
+
|
3
|
+
{<img
|
4
|
+
src="https://secure.travis-ci.org/csw/bioruby-phyloxml.png"
|
5
|
+
/>}[http://travis-ci.org/#!/csw/bioruby-phyloxml]
|
6
|
+
|
7
|
+
Full description goes here
|
8
|
+
|
9
|
+
Note: this software is under active development!
|
10
|
+
|
11
|
+
== Installation
|
12
|
+
|
13
|
+
gem install bio-phyloxml
|
14
|
+
|
15
|
+
== Usage
|
16
|
+
|
17
|
+
== Developers
|
18
|
+
|
19
|
+
To use the library
|
20
|
+
|
21
|
+
require 'bio-phyloxml'
|
22
|
+
|
23
|
+
The API doc is online. For more code examples see also the test files in
|
24
|
+
the source tree.
|
25
|
+
|
26
|
+
== Project home page
|
27
|
+
|
28
|
+
Information on the source tree, documentation, issues and how to contribute, see
|
29
|
+
|
30
|
+
http://github.com/csw/bioruby-phyloxml
|
31
|
+
|
32
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
33
|
+
|
34
|
+
== Cite
|
35
|
+
|
36
|
+
If you use this software, please cite one of
|
37
|
+
|
38
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
39
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
40
|
+
|
41
|
+
== Biogems.info
|
42
|
+
|
43
|
+
This Biogem is published at http://biogems.info/index.html#bio-phyloxml
|
44
|
+
|
45
|
+
== Copyright
|
46
|
+
|
47
|
+
Copyright (c) 2012 Clayton Wheeler. See LICENSE.txt for further details.
|
48
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-phyloxml"
|
18
|
+
gem.homepage = "http://github.com/csw/bioruby-phyloxml"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{PhyloXML plugin for BioRuby}
|
21
|
+
gem.description = %Q{Provides PhyloXML support for BioRuby.}
|
22
|
+
gem.email = "cswh@umich.edu"
|
23
|
+
gem.authors = ["Diana Jaunzeikare", "Clayton Wheeler"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
task :default => :test
|
36
|
+
|
37
|
+
require 'rdoc/task'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "bio-phyloxml #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.9.0
|
data/lib/bio-phyloxml.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# Please require your code below, respecting the naming conventions in the
|
2
|
+
# bioruby directory tree.
|
3
|
+
#
|
4
|
+
# For example, say you have a plugin named bio-plugin, the only uncommented
|
5
|
+
# line in this file would be
|
6
|
+
#
|
7
|
+
# require 'bio/bio-plugin/plugin'
|
8
|
+
#
|
9
|
+
# In this file only require other files. Avoid other source code.
|
10
|
+
|
11
|
+
require 'bio'
|
12
|
+
require 'bio/phyloxml'
|
data/lib/bio/phyloxml.rb
ADDED
@@ -0,0 +1,1166 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/phyloxml_elements.rb - PhyloXML Element classes
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009
|
5
|
+
# Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
# == Description
|
11
|
+
#
|
12
|
+
# This file containts the classes to represent PhyloXML elements.
|
13
|
+
#
|
14
|
+
# == References
|
15
|
+
#
|
16
|
+
# * http://www.phyloxml.org
|
17
|
+
#
|
18
|
+
# * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
|
19
|
+
|
20
|
+
require 'bio/tree'
|
21
|
+
require 'bio/sequence'
|
22
|
+
require 'bio/reference'
|
23
|
+
|
24
|
+
require 'libxml'
|
25
|
+
|
26
|
+
module Bio
|
27
|
+
|
28
|
+
# This is general Taxonomy class.
|
29
|
+
|
30
|
+
class Taxonomy
|
31
|
+
#pattern = [a-zA-Z0-9_]{2,10} Can refer to any code/abbreviation/mnemonic, such as Bsu for Bacillus subtilis.
|
32
|
+
attr_accessor :code
|
33
|
+
|
34
|
+
# String.
|
35
|
+
attr_accessor :scientific_name
|
36
|
+
# An array of strings
|
37
|
+
attr_accessor :common_names
|
38
|
+
|
39
|
+
# value comes from list: domain kingdom, subkingdom, branch, infrakingdom,
|
40
|
+
# superphylum, phylum, subphylum, infraphylum, microphylum, superdivision,
|
41
|
+
# division, subdivision, infradivision, superclass, class, subclass,
|
42
|
+
# infraclass, superlegion, legion, sublegion, infralegion, supercohort,
|
43
|
+
# cohort, subcohort, infracohort, superorder, order, suborder,
|
44
|
+
# superfamily, family, subfamily, supertribe, tribe, subtribe, infratribe,
|
45
|
+
# genus, subgenus, superspecies, species, subspecies, variety, subvariety,
|
46
|
+
# form, subform, cultivar, unknown, other
|
47
|
+
attr_accessor :rank
|
48
|
+
|
49
|
+
# is used to keep the authority, such as 'J. G. Cooper, 1863', associated with the 'scientific_name'.
|
50
|
+
attr_accessor :authority
|
51
|
+
|
52
|
+
# An array of strings. Holds synonyms for scientific names or common names.
|
53
|
+
attr_accessor :synonyms
|
54
|
+
|
55
|
+
|
56
|
+
def initialize
|
57
|
+
@common_names = []
|
58
|
+
@synonyms = []
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
module PhyloXML
|
63
|
+
|
64
|
+
|
65
|
+
# Taxonomy class
|
66
|
+
class Taxonomy < Bio::Taxonomy
|
67
|
+
# String. Unique identifier of a taxon.
|
68
|
+
attr_accessor :taxonomy_id
|
69
|
+
#Used to link other elements to a taxonomy (on the xml-level)
|
70
|
+
attr_accessor :id_source
|
71
|
+
# Uri object
|
72
|
+
attr_accessor :uri
|
73
|
+
|
74
|
+
# Array of Other objects. Used to save additional information from other than
|
75
|
+
# PhyloXML namspace.
|
76
|
+
attr_accessor :other
|
77
|
+
|
78
|
+
def initialize
|
79
|
+
super
|
80
|
+
@other = []
|
81
|
+
end
|
82
|
+
|
83
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
84
|
+
def to_xml
|
85
|
+
taxonomy = LibXML::XML::Node.new('taxonomy')
|
86
|
+
taxonomy["type"] = @type if @type != nil
|
87
|
+
taxonomy["id_source"] = @id_source if @id_source != nil
|
88
|
+
|
89
|
+
PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', @taxonomy_id],
|
90
|
+
[:pattern, 'code', @code, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
|
91
|
+
[:simple, 'scientific_name', @scientific_name],
|
92
|
+
[:simple, 'authority', @authority],
|
93
|
+
[:simplearr, 'common_name', @common_names],
|
94
|
+
[:simplearr, 'synonym', @synonyms],
|
95
|
+
[:simple, 'rank', @rank],
|
96
|
+
[:complex, 'uri',@uri]])
|
97
|
+
#@todo anything else
|
98
|
+
|
99
|
+
|
100
|
+
return taxonomy
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
# Object to hold one phylogeny element (and its subelements.) Extended version of Bio::Tree.
|
106
|
+
class Tree < Bio::Tree
|
107
|
+
# String. Name of tree (name subelement of phylogeny element).
|
108
|
+
attr_accessor :name
|
109
|
+
|
110
|
+
# Id object.
|
111
|
+
attr_accessor :phylogeny_id
|
112
|
+
|
113
|
+
# String. Description of tree.
|
114
|
+
attr_accessor :description
|
115
|
+
|
116
|
+
# Boolean. Can be used to indicate that the phylogeny is not allowed to be rooted differently (i.e. because it is associated with root dependent data, such as gene duplications).
|
117
|
+
attr_accessor :rerootable
|
118
|
+
|
119
|
+
# Boolean. Required element.
|
120
|
+
attr_accessor :rooted
|
121
|
+
|
122
|
+
# Array of Property object. Allows for typed and referenced properties from external resources to be attached.
|
123
|
+
attr_accessor :properties
|
124
|
+
|
125
|
+
# CladeRelation object. This is used to express a typed relationship between two clades. For example it could be used to describe multiple parents of a clade.
|
126
|
+
attr_accessor :clade_relations
|
127
|
+
|
128
|
+
# SequenceRelation object. This is used to express a typed relationship between two sequences. For example it could be used to describe an orthology.
|
129
|
+
attr_accessor :sequence_relations
|
130
|
+
|
131
|
+
# Array of confidence object
|
132
|
+
attr_accessor :confidences
|
133
|
+
|
134
|
+
# String.
|
135
|
+
attr_accessor :branch_length_unit
|
136
|
+
|
137
|
+
# String. Indicate the type of phylogeny (i.e. 'gene tree').
|
138
|
+
attr_accessor :type
|
139
|
+
|
140
|
+
# String. Date
|
141
|
+
attr_accessor :date
|
142
|
+
|
143
|
+
# Array of Other objects. Used to save additional information from other than
|
144
|
+
# PhyloXML namspace.
|
145
|
+
attr_accessor :other
|
146
|
+
|
147
|
+
def initialize
|
148
|
+
super
|
149
|
+
@sequence_relations = []
|
150
|
+
@clade_relations = []
|
151
|
+
@confidences = []
|
152
|
+
@properties = []
|
153
|
+
@other = []
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
|
159
|
+
# == Description
|
160
|
+
# Class to hold clade element of phyloXML.
|
161
|
+
class Node
|
162
|
+
|
163
|
+
# Events at the root node of a clade (e.g. one gene duplication).
|
164
|
+
attr_accessor :events
|
165
|
+
|
166
|
+
# String. Used to link other elements to a clade (node) (on the xml-level).
|
167
|
+
attr_accessor :id_source
|
168
|
+
|
169
|
+
# String. Name of the node.
|
170
|
+
attr_accessor :name
|
171
|
+
|
172
|
+
# Float. Branch width for this node (including parent branch). Applies for the whole clade unless overwritten in sub-clades.
|
173
|
+
attr_reader :width
|
174
|
+
|
175
|
+
def width=(str)
|
176
|
+
@width = str.to_f
|
177
|
+
end
|
178
|
+
|
179
|
+
# Array of Taxonomy objects. Describes taxonomic information for a clade.
|
180
|
+
attr_accessor :taxonomies
|
181
|
+
|
182
|
+
# Array of Confidence objects. Indicates the support for a clade/parent branch.
|
183
|
+
attr_accessor :confidences
|
184
|
+
|
185
|
+
# BranchColor object. Apply for the whole clade unless overwritten in sub-clade.
|
186
|
+
attr_accessor :color
|
187
|
+
|
188
|
+
# Id object
|
189
|
+
attr_accessor :node_id
|
190
|
+
|
191
|
+
# Array of Sequence objects. Represents a molecular sequence (Protein, DNA, RNA) associated with a node.
|
192
|
+
attr_accessor :sequences
|
193
|
+
|
194
|
+
# BinaryCharacters object. The names and/or counts of binary characters present, gained, and lost at the root of a clade.
|
195
|
+
attr_accessor :binary_characters
|
196
|
+
|
197
|
+
# Array of Distribution objects. The geographic distribution of the items of a clade (species, sequences), intended for phylogeographic applications.
|
198
|
+
attr_accessor :distributions
|
199
|
+
|
200
|
+
# Date object. A date associated with a clade/node.
|
201
|
+
attr_accessor :date
|
202
|
+
|
203
|
+
#Array of Reference objects. A literature reference for a clade.
|
204
|
+
attr_accessor :references
|
205
|
+
|
206
|
+
#An array of Property objects, for example depth for sea animals.
|
207
|
+
attr_accessor :properties
|
208
|
+
|
209
|
+
# Array of Other objects. Used to save additional information from other than
|
210
|
+
# PhyloXML namspace.
|
211
|
+
attr_accessor :other
|
212
|
+
|
213
|
+
def initialize
|
214
|
+
@confidences = []
|
215
|
+
@sequences = []
|
216
|
+
@taxonomies = []
|
217
|
+
@distributions = []
|
218
|
+
@references = []
|
219
|
+
@properties = []
|
220
|
+
@other = []
|
221
|
+
end
|
222
|
+
|
223
|
+
|
224
|
+
# Converts to a Bio::Tree::Node object. If it contains several taxonomies
|
225
|
+
# Bio::Tree::Node#scientific name will get the scientific name of the first
|
226
|
+
# taxonomy.
|
227
|
+
#
|
228
|
+
# If there are several confidence values, the first with bootstrap type will
|
229
|
+
# be returned as Bio::Tree::Node#bootstrap
|
230
|
+
#
|
231
|
+
# tree = phyloxmlparser.next_tree
|
232
|
+
#
|
233
|
+
# node = tree.get_node_by_name("A").to_biotreenode
|
234
|
+
#
|
235
|
+
# ---
|
236
|
+
# *Returns*:: Bio::Tree::Node
|
237
|
+
def to_biotreenode
|
238
|
+
node = Bio::Tree::Node.new
|
239
|
+
node.name = @name
|
240
|
+
node.scientific_name = @taxonomies[0].scientific_name if not @taxonomies.empty?
|
241
|
+
#@todo what if there are more?
|
242
|
+
node.taxonomy_id = @taxonomies[0].taxononmy_id if @taxonomies[0] != nil
|
243
|
+
|
244
|
+
if not @confidences.empty?
|
245
|
+
@confidences.each do |confidence|
|
246
|
+
if confidence.type == "bootstrap"
|
247
|
+
node.bootstrap = confidence.value
|
248
|
+
break
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
return node
|
253
|
+
end
|
254
|
+
|
255
|
+
# Extracts the relevant information from node (specifically taxonomy and
|
256
|
+
# sequence) to create Bio::Sequence object. Node can have several sequences,
|
257
|
+
# so parameter to this method is to specify which sequence to extract.
|
258
|
+
#
|
259
|
+
# ---
|
260
|
+
# *Returns*:: Bio::Sequence
|
261
|
+
def extract_biosequence(seq_i=0)
|
262
|
+
|
263
|
+
seq = @sequences[seq_i].to_biosequence
|
264
|
+
seq.classification = []
|
265
|
+
@taxonomies.each do |t|
|
266
|
+
seq.classification << t.scientific_name
|
267
|
+
if t.rank == "species"
|
268
|
+
seq.species = t.scientific_name
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
#seq.division => .. http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2
|
273
|
+
# It doesn't seem there is anything in PhyloXML corresponding to this.
|
274
|
+
|
275
|
+
return seq
|
276
|
+
end
|
277
|
+
|
278
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
279
|
+
def to_xml(branch_length, write_branch_length_as_subelement)
|
280
|
+
clade = LibXML::XML::Node.new('clade')
|
281
|
+
|
282
|
+
PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', @name]])
|
283
|
+
|
284
|
+
if branch_length != nil
|
285
|
+
if write_branch_length_as_subelement
|
286
|
+
clade << LibXML::XML::Node.new('branch_length', branch_length.to_s)
|
287
|
+
else
|
288
|
+
clade["branch_length"] = branch_length.to_s
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
#generate all elements, except clade
|
293
|
+
PhyloXML::Writer.generate_xml(clade, self, [
|
294
|
+
[:attr, "id_source"],
|
295
|
+
[:objarr, 'confidence', 'confidences'],
|
296
|
+
[:simple, 'width', @width],
|
297
|
+
[:complex, 'branch_color', @branch_color],
|
298
|
+
[:simple, 'node_id', @node_id],
|
299
|
+
[:objarr, 'taxonomy', 'taxonomies'],
|
300
|
+
[:objarr, 'sequence', 'sequences'],
|
301
|
+
[:complex, 'events', @events],
|
302
|
+
[:complex, 'binary_characters', @binary_characters],
|
303
|
+
[:objarr, 'distribution', 'distributions'],
|
304
|
+
[:complex, 'date', @date],
|
305
|
+
[:objarr, 'reference', 'references'],
|
306
|
+
[:objarr, 'propery', 'properties']])
|
307
|
+
|
308
|
+
return clade
|
309
|
+
end
|
310
|
+
|
311
|
+
end #Node
|
312
|
+
|
313
|
+
# == Description
|
314
|
+
# Events at the root node of a clade (e.g. one gene duplication).
|
315
|
+
class Events
|
316
|
+
#value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
|
317
|
+
attr_accessor :type
|
318
|
+
|
319
|
+
# Integer
|
320
|
+
attr_reader :duplications, :speciations, :losses
|
321
|
+
|
322
|
+
# Confidence object
|
323
|
+
attr_reader :confidence
|
324
|
+
|
325
|
+
def confidence=(type, value)
|
326
|
+
@confidence = Confidence.new(type, value)
|
327
|
+
end
|
328
|
+
|
329
|
+
def confidence=(conf)
|
330
|
+
@confidence = conf
|
331
|
+
end
|
332
|
+
|
333
|
+
def duplications=(str)
|
334
|
+
@duplications = str.to_i
|
335
|
+
end
|
336
|
+
|
337
|
+
def losses=(str)
|
338
|
+
@losses = str.to_i
|
339
|
+
end
|
340
|
+
|
341
|
+
def speciations=(str)
|
342
|
+
@speciations=str.to_i
|
343
|
+
end
|
344
|
+
|
345
|
+
def type=(str)
|
346
|
+
@type = str
|
347
|
+
#@todo add unit test for this
|
348
|
+
if not ['transfer','fusion','speciation_or_duplication','other','mixed', 'unassigned'].include?(str)
|
349
|
+
raise "Warning #{str} is not one of the allowed values"
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
354
|
+
def to_xml
|
355
|
+
#@todo add unit test
|
356
|
+
events = LibXML::XML::Node.new('events')
|
357
|
+
PhyloXML::Writer.generate_xml(events, self, [
|
358
|
+
[:simple, 'type', @type],
|
359
|
+
[:simple, 'duplications', @duplications],
|
360
|
+
[:simple, 'speciations', @speciations],
|
361
|
+
[:simple, 'losses', @losses],
|
362
|
+
[:complex, 'confidence', @confidence]])
|
363
|
+
return events
|
364
|
+
end
|
365
|
+
|
366
|
+
end
|
367
|
+
|
368
|
+
# A general purpose confidence element. For example this can be used to express
|
369
|
+
# the bootstrap support value of a clade (in which case the 'type' attribute
|
370
|
+
# is 'bootstrap').
|
371
|
+
class Confidence
|
372
|
+
# String. The type of confidence measure, for example, bootstrap.
|
373
|
+
attr_accessor :type
|
374
|
+
# Float. The value of confidence measure.
|
375
|
+
attr_accessor :value
|
376
|
+
|
377
|
+
def initialize(type, value)
|
378
|
+
@type = type
|
379
|
+
@value = value.to_f
|
380
|
+
end
|
381
|
+
|
382
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
383
|
+
def to_xml
|
384
|
+
if @type == nil
|
385
|
+
raise "Type is a required attribute for confidence."
|
386
|
+
else
|
387
|
+
confidence = LibXML::XML::Node.new('confidence', @value.to_s)
|
388
|
+
confidence["type"] = @type
|
389
|
+
return confidence
|
390
|
+
end
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
# == Description
|
395
|
+
#
|
396
|
+
# The geographic distribution of the items of a clade (species, sequences),
|
397
|
+
# intended for phylogeographic applications.
|
398
|
+
class Distribution
|
399
|
+
# String. Free text description of location.
|
400
|
+
attr_accessor :desc
|
401
|
+
# Array of Point objects. Holds coordinates of the location.
|
402
|
+
attr_accessor :points
|
403
|
+
# Array of Polygon objects.
|
404
|
+
attr_accessor :polygons
|
405
|
+
|
406
|
+
def initialize
|
407
|
+
@points = []
|
408
|
+
@polygons = []
|
409
|
+
end
|
410
|
+
|
411
|
+
|
412
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
413
|
+
def to_xml
|
414
|
+
distr = LibXML::XML::Node.new('distribution')
|
415
|
+
PhyloXML::Writer.generate_xml(distr, self, [
|
416
|
+
[:simple, 'desc', @desc],
|
417
|
+
[:objarr, 'point', 'points'],
|
418
|
+
[:objarr, 'polygon', 'polygons']])
|
419
|
+
return distr
|
420
|
+
end
|
421
|
+
|
422
|
+
end #Distribution class
|
423
|
+
|
424
|
+
|
425
|
+
# == Description
|
426
|
+
#
|
427
|
+
# The coordinates of a point with an optional altitude. Required attribute
|
428
|
+
# 'geodetic_datum' is used to indicate the geodetic datum (also called
|
429
|
+
# 'map datum'), for example Google's KML uses 'WGS84'.
|
430
|
+
class Point
|
431
|
+
# Float. Latitude
|
432
|
+
attr_accessor :lat
|
433
|
+
|
434
|
+
# Float. Longitute
|
435
|
+
attr_accessor :long
|
436
|
+
|
437
|
+
# Float. Altitude
|
438
|
+
attr_accessor :alt
|
439
|
+
|
440
|
+
# String. Altitude unit.
|
441
|
+
attr_accessor :alt_unit
|
442
|
+
|
443
|
+
# Geodedic datum / map datum
|
444
|
+
attr_accessor :geodetic_datum
|
445
|
+
|
446
|
+
def lat=(str)
|
447
|
+
@lat = str.to_f unless str.nil?
|
448
|
+
end
|
449
|
+
|
450
|
+
def long=(str)
|
451
|
+
@long = str.to_f unless str.nil?
|
452
|
+
end
|
453
|
+
|
454
|
+
def alt=(str)
|
455
|
+
@alt = str.to_f unless str.nil?
|
456
|
+
end
|
457
|
+
|
458
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
459
|
+
def to_xml
|
460
|
+
raise "Geodedic datum is a required attribute of Point element." if @geodetic_datum.nil?
|
461
|
+
|
462
|
+
p = LibXML::XML::Node.new('point')
|
463
|
+
p["geodetic_datum"] = @geodetic_datum
|
464
|
+
p["alt_unit"] = @alt_unit if @alt_unit != nil
|
465
|
+
PhyloXML::Writer.generate_xml(p, self, [
|
466
|
+
[:simple, 'lat', @lat],
|
467
|
+
[:simple, 'long', @long],
|
468
|
+
[:simple, 'alt', @alt]])
|
469
|
+
return p
|
470
|
+
#@todo check if characters are correctly generated, like Zuric
|
471
|
+
end
|
472
|
+
|
473
|
+
end
|
474
|
+
|
475
|
+
|
476
|
+
# == Description
|
477
|
+
#
|
478
|
+
# A polygon defined by a list of Points objects.
|
479
|
+
class Polygon
|
480
|
+
# Array of Point objects.
|
481
|
+
attr_accessor :points
|
482
|
+
|
483
|
+
def initialize
|
484
|
+
@points = []
|
485
|
+
end
|
486
|
+
|
487
|
+
|
488
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
489
|
+
def to_xml
|
490
|
+
if @points.length > 2
|
491
|
+
pol = LibXML::XML::Node.new('polygon')
|
492
|
+
@points.each do |p|
|
493
|
+
pol << p.to_xml
|
494
|
+
end
|
495
|
+
return pol
|
496
|
+
end
|
497
|
+
end
|
498
|
+
|
499
|
+
|
500
|
+
end
|
501
|
+
|
502
|
+
# == Description
|
503
|
+
# Element Sequence is used to represent a molecular sequence (Protein, DNA,
|
504
|
+
# RNA) associated with a node.
|
505
|
+
class Sequence
|
506
|
+
# Type of sequence (rna, dna, protein)
|
507
|
+
attr_accessor :type
|
508
|
+
|
509
|
+
# Full name (e.g. muscle Actin )
|
510
|
+
attr_accessor :name
|
511
|
+
|
512
|
+
# String. Used to link with other elements.
|
513
|
+
attr_accessor :id_source
|
514
|
+
|
515
|
+
# String. One intended use for 'id_ref' is to link a sequence to a taxonomy
|
516
|
+
# (via the taxonomy's 'id_source') in the case of multiple sequences and taxonomies per node.
|
517
|
+
attr_accessor :id_ref
|
518
|
+
|
519
|
+
# short (maximal ten characters) symbol of the sequence (e.g. 'ACTM')
|
520
|
+
attr_accessor :symbol
|
521
|
+
# Accession object. Holds source and identifier for the sequence.
|
522
|
+
attr_accessor :accession
|
523
|
+
# String. Location of a sequence on a genome/chromosome
|
524
|
+
attr_accessor :location
|
525
|
+
# String. The actual sequence is stored here.
|
526
|
+
attr_reader :mol_seq
|
527
|
+
|
528
|
+
# Boolean. used to indicated that this molecular sequence is aligned with
|
529
|
+
# all other sequences in the same phylogeny for which 'is aligned' is true
|
530
|
+
# as well (which, in most cases, means that gaps were introduced, and that
|
531
|
+
# all sequences for which 'is aligned' is true must have the same length)
|
532
|
+
attr_reader :is_aligned
|
533
|
+
|
534
|
+
# Uri object
|
535
|
+
attr_accessor :uri
|
536
|
+
# Array of Annotation objects. Annotations of molecular sequence.
|
537
|
+
attr_accessor :annotations
|
538
|
+
# DomainArchitecture object. Describes domain architecture of a protein.
|
539
|
+
attr_accessor :domain_architecture
|
540
|
+
|
541
|
+
# Array of Other objects. Used to save additional information from other than
|
542
|
+
# PhyloXML namspace.
|
543
|
+
attr_accessor :other
|
544
|
+
|
545
|
+
def initialize
|
546
|
+
@annotations = []
|
547
|
+
@other = []
|
548
|
+
end
|
549
|
+
|
550
|
+
def is_aligned=(str)
|
551
|
+
if str=='true'
|
552
|
+
@is_aligned=true
|
553
|
+
elsif str=='false'
|
554
|
+
@is_aligned = false
|
555
|
+
else
|
556
|
+
@is_aligned = nil
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
def is_aligned?
|
561
|
+
@is_aligned
|
562
|
+
end
|
563
|
+
|
564
|
+
def mol_seq=(str)
|
565
|
+
if str =~ /^[a-zA-Z\.\-\?\*_]+$/
|
566
|
+
@mol_seq = str
|
567
|
+
else
|
568
|
+
raise "mol_seq element of Sequence does not follow the pattern."
|
569
|
+
end
|
570
|
+
end
|
571
|
+
|
572
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
573
|
+
def to_xml
|
574
|
+
|
575
|
+
seq = LibXML::XML::Node.new('sequence')
|
576
|
+
if @type != nil
|
577
|
+
if ["dna", "rna", "protein"].include?(@type)
|
578
|
+
seq["type"] = @type
|
579
|
+
else
|
580
|
+
raise "Type attribute of Sequence has to be one of dna, rna or a."
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
PhyloXML::Writer.generate_xml(seq, self, [
|
585
|
+
[:attr, 'id_source'],
|
586
|
+
[:attr, 'id_ref'],
|
587
|
+
[:pattern, 'symbol', @symbol, Regexp.new("^\\S{1,10}$")],
|
588
|
+
[:complex, 'accession', @accession],
|
589
|
+
[:simple, 'name', @name],
|
590
|
+
[:simple, 'location', @location]])
|
591
|
+
|
592
|
+
if @mol_seq != nil
|
593
|
+
molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
|
594
|
+
molseq["is_aligned"] = @is_aligned.to_s if @is_aligned != nil
|
595
|
+
seq << molseq
|
596
|
+
end
|
597
|
+
|
598
|
+
PhyloXML::Writer.generate_xml(seq, self, [
|
599
|
+
#[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
|
600
|
+
[:complex, 'uri', @uri],
|
601
|
+
[:objarr, 'annotation', 'annotations'],
|
602
|
+
[:complex, 'domain_architecture', @domain_architecture]])
|
603
|
+
#@todo test domain_architecture
|
604
|
+
#any
|
605
|
+
return seq
|
606
|
+
end
|
607
|
+
|
608
|
+
# converts Bio::PhyloXML:Sequence to Bio::Sequence object.
|
609
|
+
# ---
|
610
|
+
# *Returns*:: Bio::Sequence
|
611
|
+
def to_biosequence
|
612
|
+
#type is not a required attribute in phyloxml (nor any other Sequence
|
613
|
+
#element) it might not hold any value, so we will not check what type it is.
|
614
|
+
seq = Bio::Sequence.auto(@mol_seq)
|
615
|
+
|
616
|
+
seq.id_namespace = @accession.source
|
617
|
+
seq.entry_id = @accession.value
|
618
|
+
# seq.primary_accession = @accession.value could be this
|
619
|
+
seq.definition = @name
|
620
|
+
#seq.comments = @name //this one?
|
621
|
+
if @uri != nil
|
622
|
+
h = {'url' => @uri.uri,
|
623
|
+
'title' => @uri.desc }
|
624
|
+
ref = Bio::Reference.new(h)
|
625
|
+
seq.references << ref
|
626
|
+
end
|
627
|
+
seq.molecule_type = 'RNA' if @type == 'rna'
|
628
|
+
seq.molecule_type = 'DNA' if @type == 'dna'
|
629
|
+
|
630
|
+
#@todo deal with the properties. There might be properties which look
|
631
|
+
#like bio sequence attributes or features
|
632
|
+
return seq
|
633
|
+
end
|
634
|
+
|
635
|
+
end
|
636
|
+
|
637
|
+
# == Description
|
638
|
+
# Element Accession is used to capture the local part in a sequence
|
639
|
+
# identifier.
|
640
|
+
class Accession
|
641
|
+
#String. Source of the accession id. Example: "UniProtKB"
|
642
|
+
attr_accessor :source
|
643
|
+
|
644
|
+
#String. Value of the accession id. Example: "P17304"
|
645
|
+
attr_accessor :value
|
646
|
+
|
647
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
648
|
+
def to_xml
|
649
|
+
raise "Source attribute is required for Accession object." if @source == nil
|
650
|
+
accession = LibXML::XML::Node.new('accession', @value)
|
651
|
+
accession['source'] = @source
|
652
|
+
return accession
|
653
|
+
end
|
654
|
+
|
655
|
+
end
|
656
|
+
|
657
|
+
# A uniform resource identifier. In general, this is expected to be an URL
|
658
|
+
# (for example, to link to an image on a website, in which case the 'type'
|
659
|
+
# attribute might be 'image' and 'desc' might be 'image of a California
|
660
|
+
# sea hare')
|
661
|
+
class Uri
|
662
|
+
# String. Description of the uri. For example, image of a California sea hare'
|
663
|
+
attr_accessor :desc
|
664
|
+
# String. For example, image.
|
665
|
+
attr_accessor :type
|
666
|
+
# String. URL of the resource.
|
667
|
+
attr_accessor :uri
|
668
|
+
|
669
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
670
|
+
def to_xml
|
671
|
+
if @uri != nil
|
672
|
+
xml_node = LibXML::XML::Node.new('uri', @uri)
|
673
|
+
Writer.generate_xml(xml_node, self, [
|
674
|
+
[:attr, 'desc'],
|
675
|
+
[:attr, 'type']])
|
676
|
+
return xml_node
|
677
|
+
end
|
678
|
+
end
|
679
|
+
end
|
680
|
+
|
681
|
+
# == Description
|
682
|
+
#
|
683
|
+
# The annotation of a molecular sequence. It is recommended to annotate by
|
684
|
+
# using the optional 'ref' attribute (some examples of acceptable values
|
685
|
+
# for the ref attribute: 'GO:0008270', 'KEGG:Tetrachloroethene degradation',
|
686
|
+
# 'EC:1.1.1.1').
|
687
|
+
class Annotation
|
688
|
+
# String. For example, 'GO:0008270', 'KEGG:Tetrachloroethene degradation',
|
689
|
+
# 'EC:1.1.1.1'
|
690
|
+
attr_accessor :ref
|
691
|
+
# String
|
692
|
+
attr_accessor :source
|
693
|
+
# String. evidence for a annotation as free text (e.g. 'experimental')
|
694
|
+
attr_accessor :evidence
|
695
|
+
# String. Type of the annotation.
|
696
|
+
attr_accessor :type
|
697
|
+
# String. Free text description.
|
698
|
+
attr_accessor :desc
|
699
|
+
# Confidence object. Type and value of support for a annotation.
|
700
|
+
attr_accessor :confidence
|
701
|
+
# Array of Property objects. Allows for further, typed and referenced
|
702
|
+
# annotations from external resources
|
703
|
+
attr_accessor :properties
|
704
|
+
# Uri object.
|
705
|
+
attr_accessor :uri
|
706
|
+
|
707
|
+
def initialize
|
708
|
+
#@todo add unit test for this, since didn't break anything when changed from property to properties
|
709
|
+
@properties = []
|
710
|
+
end
|
711
|
+
|
712
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
713
|
+
def to_xml
|
714
|
+
annot = LibXML::XML::Node.new('annotation')
|
715
|
+
annot["ref"] = @ref if @ref != nil
|
716
|
+
PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', @desc],
|
717
|
+
[:complex, 'confidence', @confidence],
|
718
|
+
[:objarr, 'property', 'properties'],
|
719
|
+
[:complex, 'uri', @uri]])
|
720
|
+
return annot
|
721
|
+
end
|
722
|
+
end
|
723
|
+
|
724
|
+
class Id
|
725
|
+
# The provider of Id, for example, NCBI.
|
726
|
+
attr_accessor :provider
|
727
|
+
# The value of Id.
|
728
|
+
attr_accessor :value
|
729
|
+
|
730
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
731
|
+
def to_xml
|
732
|
+
xml_node = LibXML::XML::Node.new('id', @value)
|
733
|
+
xml_node["provider"] = @provider if @provider != nil
|
734
|
+
return xml_node
|
735
|
+
end
|
736
|
+
end
|
737
|
+
|
738
|
+
# == Description
|
739
|
+
# This indicates the color of a node when rendered (the color applies
|
740
|
+
# to the whole node and its children unless overwritten by the
|
741
|
+
# color(s) of sub clades).
|
742
|
+
class BranchColor
|
743
|
+
#Integer
|
744
|
+
attr_reader :red, :green, :blue
|
745
|
+
|
746
|
+
def red=(str)
|
747
|
+
@red = str.to_i
|
748
|
+
end
|
749
|
+
|
750
|
+
def green=(str)
|
751
|
+
@green = str.to_i
|
752
|
+
end
|
753
|
+
|
754
|
+
def blue=(str)
|
755
|
+
@blue = str.to_i
|
756
|
+
end
|
757
|
+
|
758
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
759
|
+
def to_xml
|
760
|
+
#@todo add unit test
|
761
|
+
if @red == nil
|
762
|
+
raise "Subelement red of BranchColor element should not be nil"
|
763
|
+
elsif @green == nil
|
764
|
+
raise "Subelement green of BranchColor element should not be nil"
|
765
|
+
elsif @blue == nil
|
766
|
+
raise "Subelement blue of BranchColor element should not be nil"
|
767
|
+
end
|
768
|
+
|
769
|
+
c = LibXML::XML::Node.new('branch_color')
|
770
|
+
PhyloXML::Writer.generate_xml(c, self, [
|
771
|
+
[:simple, 'red', @red],
|
772
|
+
[:simple, 'green', @green],
|
773
|
+
[:simple, 'blue', @blue]])
|
774
|
+
return c
|
775
|
+
end
|
776
|
+
|
777
|
+
end
|
778
|
+
|
779
|
+
# == Description
|
780
|
+
# A date associated with a clade/node. Its value can be numerical by
|
781
|
+
# using the 'value' element and/or free text with the 'desc' element'
|
782
|
+
# (e.g. 'Silurian'). If a numerical value is used, it is recommended to
|
783
|
+
# employ the 'unit' attribute to indicate the type of the numerical
|
784
|
+
# value (e.g. 'mya' for 'million years ago').
|
785
|
+
class Date
|
786
|
+
# String. Units in which value is stored.
|
787
|
+
attr_accessor :unit
|
788
|
+
|
789
|
+
# Free text description of the date.
|
790
|
+
attr_accessor :desc
|
791
|
+
|
792
|
+
# Integer. Minimum and maximum of the value.
|
793
|
+
attr_reader :minimum, :maximum
|
794
|
+
|
795
|
+
# Integer. Value of the date.
|
796
|
+
attr_reader :value
|
797
|
+
|
798
|
+
def minimum=(str)
|
799
|
+
@minimum = str.to_i
|
800
|
+
end
|
801
|
+
|
802
|
+
def maximum=(str)
|
803
|
+
@maximum = str.to_i
|
804
|
+
end
|
805
|
+
|
806
|
+
def value= (str)
|
807
|
+
@value = str.to_i
|
808
|
+
end
|
809
|
+
|
810
|
+
# Returns value + unit, for exampe "7 mya"
|
811
|
+
def to_s
|
812
|
+
return "#{value} #{unit}"
|
813
|
+
end
|
814
|
+
|
815
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
816
|
+
def to_xml
|
817
|
+
date = LibXML::XML::Node.new('date')
|
818
|
+
PhyloXML::Writer.generate_xml(date, self, [
|
819
|
+
[:attr, 'unit'],
|
820
|
+
[:simple, 'desc', @desc],
|
821
|
+
[:simple, 'value', @value],
|
822
|
+
[:simple, 'minimum', @minimum],
|
823
|
+
[:simple, 'maximum', @maximum]])
|
824
|
+
return date
|
825
|
+
end
|
826
|
+
|
827
|
+
end
|
828
|
+
|
829
|
+
# == Description
|
830
|
+
# This is used describe the domain architecture of a protein. Attribute
|
831
|
+
# 'length' is the total length of the protein
|
832
|
+
class DomainArchitecture
|
833
|
+
# Integer. Total length of the protein
|
834
|
+
attr_accessor :length
|
835
|
+
|
836
|
+
# Array of ProteinDomain objects.
|
837
|
+
attr_reader :domains
|
838
|
+
|
839
|
+
def length=(str)
|
840
|
+
@length = str.to_i
|
841
|
+
end
|
842
|
+
|
843
|
+
def initialize
|
844
|
+
@domains = []
|
845
|
+
end
|
846
|
+
|
847
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
848
|
+
def to_xml
|
849
|
+
xml_node = LibXML::XML::Node.new('domain_architecture')
|
850
|
+
PhyloXML::Writer.generate_xml(xml_node, self,[
|
851
|
+
[:attr, 'length'],
|
852
|
+
[:objarr, 'domain', 'domains']])
|
853
|
+
return xml_node
|
854
|
+
end
|
855
|
+
end
|
856
|
+
|
857
|
+
|
858
|
+
# == Description
|
859
|
+
# To represent an individual domain in a domain architecture. The
|
860
|
+
# name/unique identifier is described via the 'id' attribute.
|
861
|
+
class ProteinDomain
|
862
|
+
#Float, for example to store E-values 4.7E-14
|
863
|
+
attr_accessor :confidence
|
864
|
+
|
865
|
+
# String
|
866
|
+
attr_accessor :id, :value
|
867
|
+
|
868
|
+
# Integer. Beginning of the domain.
|
869
|
+
attr_reader :from
|
870
|
+
|
871
|
+
# Integer. End of the domain.
|
872
|
+
attr_reader :to
|
873
|
+
|
874
|
+
def from=(str)
|
875
|
+
@from = str.to_i
|
876
|
+
end
|
877
|
+
|
878
|
+
def to=(str)
|
879
|
+
@to = str.to_i
|
880
|
+
end
|
881
|
+
|
882
|
+
def confidence=(str)
|
883
|
+
@confidence = str.to_f
|
884
|
+
end
|
885
|
+
|
886
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
887
|
+
def to_xml
|
888
|
+
if @from == nil
|
889
|
+
raise "from attribute of ProteinDomain class is required."
|
890
|
+
elsif @to == nil
|
891
|
+
raise "to attribute of ProteinDomain class is required."
|
892
|
+
else
|
893
|
+
xml_node = LibXML::XML::Node.new('domain', @value)
|
894
|
+
xml_node["from"] = @from.to_s
|
895
|
+
xml_node["to"] = @to.to_s
|
896
|
+
xml_node["id"] = @id if @id != nil
|
897
|
+
xml_node["confidence"] = @confidence.to_s
|
898
|
+
|
899
|
+
return xml_node
|
900
|
+
end
|
901
|
+
|
902
|
+
end
|
903
|
+
|
904
|
+
end
|
905
|
+
|
906
|
+
|
907
|
+
#Property allows for typed and referenced properties from external resources
|
908
|
+
#to be attached to 'Phylogeny', 'Clade', and 'Annotation'. The value of a
|
909
|
+
#property is its mixed (free text) content. Attribute 'datatype' indicates
|
910
|
+
#the type of a property and is limited to xsd-datatypes (e.g. 'xsd:string',
|
911
|
+
#'xsd:boolean', 'xsd:integer', 'xsd:decimal', 'xsd:float', 'xsd:double',
|
912
|
+
#'xsd:date', 'xsd:anyURI'). Attribute 'applies_to' indicates the item to
|
913
|
+
#which a property applies to (e.g. 'node' for the parent node of a clade,
|
914
|
+
#'parent_branch' for the parent branch of a clade). Attribute 'id_ref' allows
|
915
|
+
#to attached a property specifically to one element (on the xml-level).
|
916
|
+
#Optional attribute 'unit' is used to indicate the unit of the property.
|
917
|
+
#An example: <property datatype="xsd:integer" ref="NOAA:depth" applies_to="clade" unit="METRIC:m"> 200 </property>
|
918
|
+
class Property
|
919
|
+
# String
|
920
|
+
attr_accessor :ref, :unit, :id_ref, :value
|
921
|
+
|
922
|
+
# String
|
923
|
+
attr_reader :datatype, :applies_to
|
924
|
+
|
925
|
+
def datatype=(str)
|
926
|
+
#@todo add unit test or maybe remove, if assume that xml is valid.
|
927
|
+
unless ['xsd:string','xsd:boolean','xsd:decimal','xsd:float','xsd:double',
|
928
|
+
'xsd:duration','xsd:dateTime','xsd:time','xsd:date','xsd:gYearMonth',
|
929
|
+
'xsd:gYear','xsd:gMonthDay','xsd:gDay','xsd:gMonth','xsd:hexBinary',
|
930
|
+
'xsd:base64Binary','xsd:anyURI','xsd:normalizedString','xsd:token',
|
931
|
+
'xsd:integer','xsd:nonPositiveInteger','xsd:negativeInteger',
|
932
|
+
'xsd:long','xsd:int','xsd:short','xsd:byte','xsd:nonNegativeInteger',
|
933
|
+
'xsd:unsignedLong','xsd:unsignedInt','xsd:unsignedShort',
|
934
|
+
'xsd:unsignedByte','xsd:positiveInteger'].include?(str)
|
935
|
+
raise "Warning: #{str} is not in the list of allowed values."
|
936
|
+
end
|
937
|
+
@datatype = str
|
938
|
+
end
|
939
|
+
|
940
|
+
def applies_to=(str)
|
941
|
+
unless ['phylogeny','clade','node','annotation','parent_branch','other'].include?(str)
|
942
|
+
puts "Warning: #{str} is not in the list of allowed values."
|
943
|
+
end
|
944
|
+
@applies_to = str
|
945
|
+
end
|
946
|
+
|
947
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
948
|
+
def to_xml
|
949
|
+
#@todo write unit test for this
|
950
|
+
raise "ref is an required element of property" if @ref.nil?
|
951
|
+
raise "datatype is an required element of property" if @datatype.nil?
|
952
|
+
raise "applies_to is an required element of property" if @applies_to.nil?
|
953
|
+
|
954
|
+
property = LibXML::XML::Node.new('property')
|
955
|
+
Writer.generate_xml(property, self, [
|
956
|
+
[:attr, 'ref'],
|
957
|
+
[:attr, 'unit'],
|
958
|
+
[:attr, 'datatype'],
|
959
|
+
[:attr, 'applies_to'],
|
960
|
+
[:attr, 'id_ref']])
|
961
|
+
|
962
|
+
property << @value if @value != nil
|
963
|
+
return property
|
964
|
+
end
|
965
|
+
end
|
966
|
+
|
967
|
+
# == Description
|
968
|
+
# A literature reference for a clade. It is recommended to use the 'doi'
|
969
|
+
# attribute instead of the free text 'desc' element whenever possible.
|
970
|
+
class Reference
|
971
|
+
# String. Digital Object Identifier.
|
972
|
+
attr_accessor :doi
|
973
|
+
|
974
|
+
# String. Free text description.
|
975
|
+
attr_accessor :desc
|
976
|
+
|
977
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
978
|
+
def to_xml
|
979
|
+
ref = LibXML::XML::Node.new('reference')
|
980
|
+
Writer.generate_xml(ref, self, [
|
981
|
+
[:attr, 'doi'],
|
982
|
+
[:simple, 'desc', @desc]])
|
983
|
+
return ref
|
984
|
+
end
|
985
|
+
|
986
|
+
end
|
987
|
+
|
988
|
+
# == Description
|
989
|
+
#
|
990
|
+
# This is used to express a typed relationship between two clades.
|
991
|
+
# For example it could be used to describe multiple parents of a clade.
|
992
|
+
class CladeRelation
|
993
|
+
# Float
|
994
|
+
attr_accessor :distance
|
995
|
+
# String. Id of the referenced parents of a clade.
|
996
|
+
attr_accessor :id_ref_0, :id_ref_1
|
997
|
+
# String
|
998
|
+
attr_accessor :type
|
999
|
+
# Confidence object
|
1000
|
+
attr_accessor :confidence
|
1001
|
+
|
1002
|
+
def distance=(str)
|
1003
|
+
@distance = str.to_f
|
1004
|
+
end
|
1005
|
+
|
1006
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
1007
|
+
def to_xml
|
1008
|
+
if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil
|
1009
|
+
raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element."
|
1010
|
+
else
|
1011
|
+
cr = LibXML::XML::Node.new('clade_relation')
|
1012
|
+
Writer.generate_xml(cr, self, [
|
1013
|
+
[:attr, 'id_ref_0'],
|
1014
|
+
[:attr, 'id_ref_1'],
|
1015
|
+
[:attr, 'distance'],
|
1016
|
+
[:attr, 'type'],
|
1017
|
+
[:complex, 'confidence', @confidnece]])
|
1018
|
+
|
1019
|
+
return cr
|
1020
|
+
end
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
end
|
1024
|
+
|
1025
|
+
|
1026
|
+
# == Description
|
1027
|
+
# The names and/or counts of binary characters present, gained, and
|
1028
|
+
# lost at the root of a clade.
|
1029
|
+
class BinaryCharacters
|
1030
|
+
attr_accessor :bc_type, :gained, :lost, :present, :absent
|
1031
|
+
attr_reader :gained_count, :lost_count, :present_count, :absent_count
|
1032
|
+
|
1033
|
+
def gained_count=(str)
|
1034
|
+
@gained_count = str.to_i
|
1035
|
+
end
|
1036
|
+
|
1037
|
+
def lost_count=(str)
|
1038
|
+
@lost_count = str.to_i
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
def present_count=(str)
|
1042
|
+
@present_count = str.to_i
|
1043
|
+
end
|
1044
|
+
|
1045
|
+
def absent_count=(str)
|
1046
|
+
@absent_count = str.to_i
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
def initialize
|
1050
|
+
@gained = []
|
1051
|
+
@lost = []
|
1052
|
+
@present = []
|
1053
|
+
@absent = []
|
1054
|
+
end
|
1055
|
+
|
1056
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
1057
|
+
def to_xml
|
1058
|
+
bc = LibXML::XML::Node.new('binary_characters')
|
1059
|
+
bc['type'] = @bc_type
|
1060
|
+
PhyloXML::Writer.generate_xml(bc, self, [
|
1061
|
+
[:attr, 'gained_count'],
|
1062
|
+
[:attr, 'lost_count'],
|
1063
|
+
[:attr, 'present_count'],
|
1064
|
+
[:attr, 'absent_count']])
|
1065
|
+
|
1066
|
+
if not @gained.empty?
|
1067
|
+
gained_xml = LibXML::XML::Node.new('gained')
|
1068
|
+
PhyloXML::Writer.generate_xml(gained_xml, self, [[:simplearr, 'bc', @gained]])
|
1069
|
+
bc << gained_xml
|
1070
|
+
end
|
1071
|
+
|
1072
|
+
if not @lost.empty?
|
1073
|
+
lost_xml = LibXML::XML::Node.new('lost')
|
1074
|
+
PhyloXML::Writer.generate_xml(lost_xml, self, [[:simplearr, 'bc', @lost]])
|
1075
|
+
bc << lost_xml
|
1076
|
+
end
|
1077
|
+
|
1078
|
+
if not @present.empty?
|
1079
|
+
present_xml = LibXML::XML::Node.new('present')
|
1080
|
+
PhyloXML::Writer.generate_xml(present_xml, self, [[:simplearr, 'bc', @present]])
|
1081
|
+
bc << present_xml
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
if not @absent.empty?
|
1085
|
+
absent_xml = LibXML::XML::Node.new('absent')
|
1086
|
+
PhyloXML::Writer.generate_xml(absent_xml, self, [[:simplearr, 'bc', @absent]])
|
1087
|
+
bc << absent_xml
|
1088
|
+
end
|
1089
|
+
|
1090
|
+
return bc
|
1091
|
+
end
|
1092
|
+
|
1093
|
+
|
1094
|
+
end
|
1095
|
+
|
1096
|
+
# == Description
|
1097
|
+
# This is used to express a typed relationship between two sequences.
|
1098
|
+
# For example it could be used to describe an orthology (in which case
|
1099
|
+
# attribute 'type' is 'orthology').
|
1100
|
+
class SequenceRelation
|
1101
|
+
# String
|
1102
|
+
attr_accessor :id_ref_0, :id_ref_1, :type
|
1103
|
+
# Float
|
1104
|
+
attr_reader :distance
|
1105
|
+
|
1106
|
+
#@todo it has Confidences objects.
|
1107
|
+
|
1108
|
+
def distance=(str)
|
1109
|
+
@distance = str.to_f if str != nil
|
1110
|
+
end
|
1111
|
+
|
1112
|
+
def type=(str)
|
1113
|
+
#@todo do warning instead?
|
1114
|
+
#@todo do validation at actually writing xml
|
1115
|
+
allowed_values = ["orthology", "one_to_one_orthology", "super_orthology", "paralogy",
|
1116
|
+
"ultra_paralogy", "xenology", "unknown", "other"]
|
1117
|
+
if not allowed_values.include? str
|
1118
|
+
raise "SequenceRelation#type has to be one one of #{allowed_values.join("; ")}"
|
1119
|
+
else
|
1120
|
+
@type = str
|
1121
|
+
end
|
1122
|
+
end
|
1123
|
+
|
1124
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
1125
|
+
def to_xml
|
1126
|
+
if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil
|
1127
|
+
raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element."
|
1128
|
+
else
|
1129
|
+
sr = LibXML::XML::Node.new('sequence_relation')
|
1130
|
+
sr['id_ref_0'] = @id_ref_0
|
1131
|
+
sr['id_ref_1'] = @id_ref_1
|
1132
|
+
sr['distance'] = @distance.to_s if @distance != nil
|
1133
|
+
sr['type'] = @type
|
1134
|
+
return sr
|
1135
|
+
end
|
1136
|
+
end
|
1137
|
+
|
1138
|
+
end
|
1139
|
+
|
1140
|
+
class Other
|
1141
|
+
attr_accessor :element_name, :attributes, :children, :value
|
1142
|
+
|
1143
|
+
def initialize
|
1144
|
+
@children = []
|
1145
|
+
@attributes = Hash.new
|
1146
|
+
end
|
1147
|
+
|
1148
|
+
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
1149
|
+
def to_xml
|
1150
|
+
o = LibXML::XML::Node.new(@element_name)
|
1151
|
+
@attributes.each do |key, value|
|
1152
|
+
o[key] = value
|
1153
|
+
end
|
1154
|
+
o << value if value != nil
|
1155
|
+
children.each do |child_node|
|
1156
|
+
o << child_node.to_xml
|
1157
|
+
end
|
1158
|
+
return o
|
1159
|
+
end
|
1160
|
+
|
1161
|
+
end
|
1162
|
+
|
1163
|
+
|
1164
|
+
end #module PhyloXML
|
1165
|
+
|
1166
|
+
end #end module Bio
|