bio 1.5.2 → 1.6.0.pre.20181210
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +12 -11
- data/ChangeLog +14 -3106
- data/{gemfiles/Gemfile.travis-ruby2.2 → Gemfile} +0 -1
- data/KNOWN_ISSUES.rdoc +0 -5
- data/README.rdoc +11 -18
- data/RELEASE_NOTES.rdoc +34 -291
- data/Rakefile +13 -9
- data/appveyor.yml +21 -0
- data/bioruby.gemspec +7 -78
- data/bioruby.gemspec.erb +8 -27
- data/doc/ChangeLog-1.5.0 +2919 -0
- data/doc/RELEASE_NOTES-1.5.0.rdoc +285 -0
- data/doc/Tutorial.rd +6 -108
- data/doc/Tutorial.rd.html +19 -98
- data/gemfiles/Gemfile.travis-jruby1.8 +3 -5
- data/gemfiles/Gemfile.travis-jruby1.9 +0 -3
- data/gemfiles/Gemfile.travis-rbx +0 -1
- data/gemfiles/Gemfile.travis-ruby1.8 +4 -4
- data/gemfiles/Gemfile.travis-ruby1.9 +0 -1
- data/gemfiles/prepare-gemspec.rb +4 -0
- data/lib/bio.rb +0 -10
- data/lib/bio/data/codontable.rb +99 -3
- data/lib/bio/io/togows.rb +5 -5
- data/lib/bio/version.rb +6 -8
- data/sample/test_restriction_enzyme_long.rb +1 -1
- data/test/unit/bio/data/test_codontable.rb +3 -0
- metadata +11 -77
- data/bin/bioruby +0 -47
- data/bin/br_biofetch.rb +0 -71
- data/bin/br_bioflat.rb +0 -293
- data/bin/br_biogetseq.rb +0 -45
- data/bin/br_pmfetch.rb +0 -422
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +0 -78
- data/lib/bio/db/biosql/sequence.rb +0 -444
- data/lib/bio/db/phyloxml/phyloxml.xsd +0 -582
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +0 -1197
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +0 -1001
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +0 -227
- data/lib/bio/io/biosql/ar-biosql.rb +0 -257
- data/lib/bio/io/biosql/biosql.rb +0 -39
- data/lib/bio/io/biosql/config/database.yml +0 -21
- data/lib/bio/io/sql.rb +0 -79
- data/lib/bio/shell.rb +0 -44
- data/lib/bio/shell/core.rb +0 -578
- data/lib/bio/shell/demo.rb +0 -146
- data/lib/bio/shell/interface.rb +0 -217
- data/lib/bio/shell/irb.rb +0 -94
- data/lib/bio/shell/object.rb +0 -71
- data/lib/bio/shell/plugin/blast.rb +0 -42
- data/lib/bio/shell/plugin/codon.rb +0 -218
- data/lib/bio/shell/plugin/das.rb +0 -58
- data/lib/bio/shell/plugin/emboss.rb +0 -23
- data/lib/bio/shell/plugin/entry.rb +0 -137
- data/lib/bio/shell/plugin/flatfile.rb +0 -101
- data/lib/bio/shell/plugin/midi.rb +0 -430
- data/lib/bio/shell/plugin/ncbirest.rb +0 -68
- data/lib/bio/shell/plugin/obda.rb +0 -45
- data/lib/bio/shell/plugin/psort.rb +0 -56
- data/lib/bio/shell/plugin/seq.rb +0 -248
- data/lib/bio/shell/plugin/togows.rb +0 -40
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb +0 -29
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml +0 -4
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml +0 -27
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtml +0 -11
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtml +0 -4
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml +0 -7
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css +0 -368
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml +0 -47
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rb +0 -144
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb +0 -47
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml +0 -8
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml +0 -10
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml +0 -26
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/shell/script.rb +0 -25
- data/lib/bio/shell/setup.rb +0 -108
- data/lib/bio/shell/web.rb +0 -102
- data/sample/test_phyloxml_big.rb +0 -205
- data/test/data/phyloxml/apaf.xml +0 -666
- data/test/data/phyloxml/bcl_2.xml +0 -2097
- data/test/data/phyloxml/made_up.xml +0 -144
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +0 -65
- data/test/data/phyloxml/phyloxml_examples.xml +0 -415
- data/test/unit/bio/db/biosql/tc_biosql.rb +0 -114
- data/test/unit/bio/db/biosql/ts_suite_biosql.rb +0 -8
- data/test/unit/bio/db/test_phyloxml.rb +0 -821
- data/test/unit/bio/db/test_phyloxml_writer.rb +0 -334
- data/test/unit/bio/shell/plugin/test_seq.rb +0 -187
- data/test/unit/bio/test_shell.rb +0 -20
@@ -1,444 +0,0 @@
|
|
1
|
-
|
2
|
-
#TODO save on db reading from a genbank or embl object
|
3
|
-
module Bio
|
4
|
-
class SQL
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
class Sequence
|
9
|
-
private
|
10
|
-
# example
|
11
|
-
# bioentry_qualifier_anchor :molecule_type, :synonym=>'mol_type'
|
12
|
-
# this function creates other 3 functions, molecule_type, molecule_type=, molecule_type_update
|
13
|
-
#molecule_type => return an array of strings, where each string is the value associated with the qualifier, ordered by rank.
|
14
|
-
#molecule_type=value add a bioentry_qualifier value to the table
|
15
|
-
#molecule_type_update(value, rank) update an entry of the table with an existing rank
|
16
|
-
#the method inferr the qualifier term from the name of the first symbol, or you can specify a synonym to use
|
17
|
-
|
18
|
-
#creating an object with to_biosql is transaction safe.
|
19
|
-
|
20
|
-
#TODO: implement setting for more than a qualifier-vale.
|
21
|
-
def self.bioentry_qualifier_anchor(sym, *args)
|
22
|
-
options = args.first || Hash.new
|
23
|
-
#options.assert_valid_keys(:rank,:synonym,:multi)
|
24
|
-
method_reader = sym.to_s.to_sym
|
25
|
-
method_writer_operator = (sym.to_s+"=").to_sym
|
26
|
-
method_writer_modder = (sym.to_s+"_update").to_sym
|
27
|
-
synonym = options[:synonym].nil? ? sym.to_s : options[:synonym]
|
28
|
-
|
29
|
-
#DELETE #Bio::SQL::Term.create(:name=>synonym, :ontology=> Bio::SQL::Ontology.find_by_name('Annotation Tags')) unless Bio::SQL::Term.exists?(:name =>synonym)
|
30
|
-
send :define_method, method_reader do
|
31
|
-
#return an array of bioentry_qualifier_values
|
32
|
-
begin
|
33
|
-
#DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'})
|
34
|
-
term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])})
|
35
|
-
bioentry_qualifier_values = @entry.bioentry_qualifier_values.all(:conditions=>["term_id = ?",term.term_id])
|
36
|
-
data = bioentry_qualifier_values.map{|row| row.value} unless bioentry_qualifier_values.nil?
|
37
|
-
begin
|
38
|
-
# this block try to check if the data retrived is a
|
39
|
-
# Date or not and change it according to GenBank/EMBL format
|
40
|
-
# in that case return a string
|
41
|
-
# otherwise the []
|
42
|
-
Date.parse(data.to_s).strftime("%d-%b-%Y").upcase
|
43
|
-
rescue ArgumentError, TypeError, NoMethodError, NameError
|
44
|
-
data
|
45
|
-
end
|
46
|
-
rescue Exception => e
|
47
|
-
puts "Reader Error: #{synonym} #{e.message}"
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
send :define_method, method_writer_operator do |value|
|
52
|
-
begin
|
53
|
-
#DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'})
|
54
|
-
term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])})
|
55
|
-
datas = @entry.bioentry_qualifier_values.all(:conditions=>["term_id = ?",term.term_id])
|
56
|
-
#add an element incrementing the rank or setting the first to 1
|
57
|
-
be_qu_va=@entry.bioentry_qualifier_values.build({:term=>term, :rank=>(datas.empty? ? 1 : datas.last.rank.succ), :value=>value})
|
58
|
-
be_qu_va.save
|
59
|
-
rescue Exception => e
|
60
|
-
puts "WriterOperator= Error: #{synonym} #{e.message}"
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
send :define_method, method_writer_modder do |value, rank|
|
65
|
-
begin
|
66
|
-
#DELETE ontology_annotation_tags = Ontology.find_or_create({:name=>'Annotation Tags'})
|
67
|
-
term = Term.first(:conditions=>["name = ?",synonym]) || Term.create({:name => synonym, :ontology=> Ontology.first(:conditions=>["name = ?",'Annotation Tags'])})
|
68
|
-
data = @entry.bioentry_qualifier_values.all(:term_id=>term.term_id, :rank=>rank)
|
69
|
-
if data.nil?
|
70
|
-
send method_writer_operator, value
|
71
|
-
else
|
72
|
-
data.value=value
|
73
|
-
data.save
|
74
|
-
end
|
75
|
-
rescue Exception => e
|
76
|
-
puts "WriterModder Error: #{synonym} #{e.message}"
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
end
|
81
|
-
|
82
|
-
public
|
83
|
-
attr_reader :entry
|
84
|
-
|
85
|
-
def delete
|
86
|
-
#TODO: check is references connected to this bioentry are leaf or not.
|
87
|
-
#actually I think it should be more sofisticated, check if there are
|
88
|
-
#other bioentries connected to references; if not delete 'em
|
89
|
-
@entry.references.each { |ref| ref.delete if ref.bioentries.size==1}
|
90
|
-
@entry.destroy
|
91
|
-
end
|
92
|
-
|
93
|
-
def get_seqfeature(sf)
|
94
|
-
|
95
|
-
#in seqfeature BioSQL class
|
96
|
-
locations_str = sf.locations.map{|loc| loc.to_s}.join(',')
|
97
|
-
#pp sf.locations.inspect
|
98
|
-
locations_str = "join(#{locations_str})" if sf.locations.count>1
|
99
|
-
Bio::Feature.new(sf.type_term.name, locations_str,sf.seqfeature_qualifier_values.collect{|sfqv| Bio::Feature::Qualifier.new(sfqv.term.name,sfqv.value)})
|
100
|
-
end
|
101
|
-
|
102
|
-
def length=(len)
|
103
|
-
@entry.biosequence.length=len
|
104
|
-
end
|
105
|
-
|
106
|
-
def initialize(options={})
|
107
|
-
#options.assert_valid_keys(:entry, :biodatabase,:biosequence)
|
108
|
-
return @entry = options[:entry] unless options[:entry].nil?
|
109
|
-
|
110
|
-
return to_biosql(options[:biosequence], options[:biodatabase]) unless options[:biosequence].nil? or options[:biodatabase].nil?
|
111
|
-
|
112
|
-
end
|
113
|
-
|
114
|
-
def to_biosql(bs,biodatabase)
|
115
|
-
#DELETE #Transcaction works greatly!!!
|
116
|
-
begin
|
117
|
-
#DELETE Bioentry.transaction do
|
118
|
-
@entry = biodatabase.bioentries.build({:name=>bs.entry_id})
|
119
|
-
|
120
|
-
puts "primary" if $DEBUG
|
121
|
-
self.primary_accession = bs.primary_accession
|
122
|
-
|
123
|
-
puts "def" if $DEBUG
|
124
|
-
self.definition = bs.definition unless bs.definition.nil?
|
125
|
-
|
126
|
-
puts "seqver" if $DEBUG
|
127
|
-
self.sequence_version = bs.sequence_version || 0
|
128
|
-
|
129
|
-
puts "divi" if $DEBUG
|
130
|
-
self.division = bs.division unless bs.division.nil?
|
131
|
-
|
132
|
-
puts "identifier" if $DEBUG
|
133
|
-
self.identifier = bs.other_seqids.collect{|dblink| "#{dblink.database}:#{dblink.id}"}.join(';') unless bs.other_seqids.nil?
|
134
|
-
@entry.save
|
135
|
-
puts "secacc" if $DEBUG
|
136
|
-
|
137
|
-
bs.secondary_accessions.each do |sa|
|
138
|
-
puts "#{sa}" if $DEBUG
|
139
|
-
#write as qualifier every secondary accession into the array
|
140
|
-
self.secondary_accessions = sa
|
141
|
-
end unless bs.secondary_accessions.nil?
|
142
|
-
|
143
|
-
|
144
|
-
#to create the sequence entry needs to exists
|
145
|
-
puts "seq" if $DEBUG
|
146
|
-
puts bs.seq if $DEBUG
|
147
|
-
self.seq = bs.seq unless bs.seq.nil?
|
148
|
-
puts "mol" if $DEBUG
|
149
|
-
|
150
|
-
self.molecule_type = bs.molecule_type unless bs.molecule_type.nil?
|
151
|
-
puts "dc" if $DEBUG
|
152
|
-
|
153
|
-
self.data_class = bs.data_class unless bs.data_class.nil?
|
154
|
-
puts "top" if $DEBUG
|
155
|
-
self.topology = bs.topology unless bs.topology.nil?
|
156
|
-
puts "datec" if $DEBUG
|
157
|
-
self.date_created = bs.date_created unless bs.date_created.nil?
|
158
|
-
puts "datemod" if $DEBUG
|
159
|
-
self.date_modified = bs.date_modified unless bs.date_modified.nil?
|
160
|
-
puts "key" if $DEBUG
|
161
|
-
|
162
|
-
bs.keywords.each do |kw|
|
163
|
-
#write as qualifier every secondary accessions into the array
|
164
|
-
self.keywords = kw
|
165
|
-
end unless bs.keywords.nil?
|
166
|
-
|
167
|
-
puts "spec" if $DEBUG
|
168
|
-
#self.species = bs.species unless bs.species.nil?
|
169
|
-
self.species = bs.species unless bs.species.empty?
|
170
|
-
puts "Debug: #{bs.species}" if $DEBUG
|
171
|
-
puts "Debug: feat..start" if $DEBUG
|
172
|
-
|
173
|
-
bs.features.each do |feat|
|
174
|
-
self.feature=feat
|
175
|
-
end unless bs.features.nil?
|
176
|
-
|
177
|
-
puts "Debug: feat...end" if $DEBUG
|
178
|
-
bs.references.each do |reference|
|
179
|
-
self.reference=reference
|
180
|
-
end unless bs.references.nil?
|
181
|
-
|
182
|
-
bs.comments.each do |comment|
|
183
|
-
self.comment=comment
|
184
|
-
end unless bs.comments.nil?
|
185
|
-
|
186
|
-
#DELETE end #transaction
|
187
|
-
return self
|
188
|
-
rescue Exception => e
|
189
|
-
puts "to_biosql exception: #{e}"
|
190
|
-
puts $!
|
191
|
-
end #rescue
|
192
|
-
end #to_biosql
|
193
|
-
|
194
|
-
|
195
|
-
def name
|
196
|
-
@entry.name
|
197
|
-
end
|
198
|
-
alias entry_id name
|
199
|
-
|
200
|
-
def name=(value)
|
201
|
-
@entry.name=value
|
202
|
-
end
|
203
|
-
alias entry_id= name=
|
204
|
-
|
205
|
-
def primary_accession
|
206
|
-
@entry.accession
|
207
|
-
end
|
208
|
-
|
209
|
-
def primary_accession=(value)
|
210
|
-
@entry.accession=value
|
211
|
-
end
|
212
|
-
|
213
|
-
#TODO def secondary_accession
|
214
|
-
# @entry.bioentry_qualifier_values
|
215
|
-
# end
|
216
|
-
|
217
|
-
def organism
|
218
|
-
@entry.taxon.nil? ? "" : "#{@entry.taxon.taxon_scientific_name.name}"+ (@entry.taxon.taxon_genbank_common_name ? "(#{@entry.taxon.taxon_genbank_common_name.name})" : '')
|
219
|
-
end
|
220
|
-
alias species organism
|
221
|
-
|
222
|
-
def organism=(value)
|
223
|
-
#FIX there is a shortcut
|
224
|
-
taxon_name=TaxonName.first(:conditions=>["name = ? and name_class = ?",value.gsub(/\s+\(.+\)/,''),'scientific name'])
|
225
|
-
if taxon_name.nil?
|
226
|
-
puts "Error value doesn't exists in taxon_name table with scientific name constraint."
|
227
|
-
else
|
228
|
-
@entry.taxon_id=taxon_name.taxon_id
|
229
|
-
@entry.save
|
230
|
-
end
|
231
|
-
end
|
232
|
-
alias species= organism=
|
233
|
-
|
234
|
-
def database
|
235
|
-
@entry.biodatabase.name
|
236
|
-
end
|
237
|
-
|
238
|
-
def database_desc
|
239
|
-
@entry.biodatabase.description
|
240
|
-
end
|
241
|
-
|
242
|
-
def version
|
243
|
-
@entry.version
|
244
|
-
end
|
245
|
-
alias sequence_version version
|
246
|
-
|
247
|
-
def version=(value)
|
248
|
-
@entry.version=value
|
249
|
-
end
|
250
|
-
alias sequence_version= version=
|
251
|
-
|
252
|
-
def division
|
253
|
-
@entry.division
|
254
|
-
end
|
255
|
-
|
256
|
-
def division=(value)
|
257
|
-
@entry.division=value
|
258
|
-
end
|
259
|
-
|
260
|
-
def description
|
261
|
-
@entry.description
|
262
|
-
end
|
263
|
-
alias definition description
|
264
|
-
|
265
|
-
def description=(value)
|
266
|
-
@entry.description=value
|
267
|
-
end
|
268
|
-
alias definition= description=
|
269
|
-
|
270
|
-
def identifier
|
271
|
-
@entry.identifier
|
272
|
-
end
|
273
|
-
alias other_seqids identifier
|
274
|
-
|
275
|
-
def identifier=(value)
|
276
|
-
@entry.identifier=value
|
277
|
-
end
|
278
|
-
|
279
|
-
bioentry_qualifier_anchor :data_class
|
280
|
-
bioentry_qualifier_anchor :molecule_type, :synonym=>'mol_type'
|
281
|
-
bioentry_qualifier_anchor :topology
|
282
|
-
bioentry_qualifier_anchor :date_created
|
283
|
-
bioentry_qualifier_anchor :date_modified, :synonym=>'date_changed'
|
284
|
-
bioentry_qualifier_anchor :keywords, :synonym=>'keyword'
|
285
|
-
bioentry_qualifier_anchor :secondary_accessions, :synonym=>'secondary_accession'
|
286
|
-
|
287
|
-
def features
|
288
|
-
@entry.seqfeatures.collect do |sf|
|
289
|
-
self.get_seqfeature(sf)
|
290
|
-
end
|
291
|
-
end
|
292
|
-
|
293
|
-
def feature=(feat)
|
294
|
-
#ToDo: avoid Ontology find here, probably more efficient create class variables
|
295
|
-
#DELETE type_term_ontology = Ontology.find_or_create({:name=>'SeqFeature Keys'})
|
296
|
-
puts "feature:type_term = #{feat.feature}" if $DEBUG
|
297
|
-
type_term = Term.first(:conditions=>["name = ?", feat.feature]) || Term.create({:name=>feat.feature, :ontology=>Ontology.first(:conditions=>["name = ?",'SeqFeature Keys'])})
|
298
|
-
#DELETE source_term_ontology = Ontology.find_or_create({:name=>'SeqFeature Sources'})
|
299
|
-
puts "feature:source_term" if $DEBUG
|
300
|
-
source_term = Term.first(:conditions=>["name = ?",'EMBLGenBankSwit'])
|
301
|
-
puts "feature:seqfeature" if $DEBUG
|
302
|
-
seqfeature = @entry.seqfeatures.build({:source_term=>source_term, :type_term=>type_term, :rank=>@entry.seqfeatures.count.succ, :display_name=>''})
|
303
|
-
seqfeature.save
|
304
|
-
puts "feature:location" if $DEBUG
|
305
|
-
feat.locations.each do |loc|
|
306
|
-
location = seqfeature.locations.build({:seqfeature=>seqfeature, :start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand, :rank=>seqfeature.locations.count.succ})
|
307
|
-
location.save
|
308
|
-
end
|
309
|
-
|
310
|
-
#DELETE qual_term_ontology = Ontology.find_or_create({:name=>'Annotation Tags'})
|
311
|
-
|
312
|
-
puts "feature:qualifier" if $DEBUG
|
313
|
-
feat.each do |qualifier|
|
314
|
-
#DELETE qual_term = Term.find_or_create({:name=>qualifier.qualifier}, {:ontology=>qual_term_ontology})
|
315
|
-
qual_term = Term.first(:conditions=>["name = ?", qualifier.qualifier]) || Term.create({:name=>qualifier.qualifier, :ontology=>Ontology.first(:conditions=>["name = ?", 'Annotation Tags'])})
|
316
|
-
qual = seqfeature.seqfeature_qualifier_values.build({:seqfeature=>seqfeature, :term=>qual_term, :value=>qualifier.value.to_s, :rank=>seqfeature.seqfeature_qualifier_values.count.succ})
|
317
|
-
qual.save
|
318
|
-
|
319
|
-
end
|
320
|
-
end
|
321
|
-
|
322
|
-
#return the seqfeature mapped from BioSQL with a type_term like 'CDS'
|
323
|
-
def cdsfeatures
|
324
|
-
@entry.cdsfeatures
|
325
|
-
end
|
326
|
-
|
327
|
-
# Returns the sequence.
|
328
|
-
# Returns a Bio::Sequence::Generic object.
|
329
|
-
|
330
|
-
def seq
|
331
|
-
s = @entry.biosequence
|
332
|
-
Bio::Sequence::Generic.new(s ? s.seq : '')
|
333
|
-
end
|
334
|
-
|
335
|
-
def seq=(value)
|
336
|
-
#TODO: revise this piece of code.
|
337
|
-
#chk which type of alphabet is, NU/NA/nil
|
338
|
-
if @entry.biosequence.nil?
|
339
|
-
#DELETE puts "intoseq1"
|
340
|
-
@entry.biosequence = Biosequence.new(:seq=>value)
|
341
|
-
# biosequence = @entry.biosequence.build({:seq=>value})
|
342
|
-
@entry.biosequence.save
|
343
|
-
# biosequence.save
|
344
|
-
else
|
345
|
-
@entry.biosequence.seq=value
|
346
|
-
end
|
347
|
-
self.length=value.length
|
348
|
-
#DELETE #@entry.biosequence.length=value.length
|
349
|
-
#DELETE #break
|
350
|
-
@entry.save
|
351
|
-
end
|
352
|
-
|
353
|
-
#report parents and exclude info with "no rank". Now I report rank == class but ... Question ? Have to be reported taxonomy with rank=="class"?
|
354
|
-
def taxonomy
|
355
|
-
tax = []
|
356
|
-
taxon = Taxon.first(:conditions=>["taxon_id = ?",@entry.taxon.parent_taxon_id])
|
357
|
-
while taxon and taxon.taxon_id != taxon.parent_taxon_id and taxon.node_rank!='no rank'
|
358
|
-
tax << taxon.taxon_scientific_name.name if taxon.node_rank!='class'
|
359
|
-
#Note: I don't like this call very much, correct with a relationship in the ref class.
|
360
|
-
taxon = Taxon.first(:conditions=>["taxon_id = ?",taxon.parent_taxon_id])
|
361
|
-
end
|
362
|
-
tax.reverse
|
363
|
-
end
|
364
|
-
|
365
|
-
def length
|
366
|
-
@entry.biosequence.length
|
367
|
-
end
|
368
|
-
|
369
|
-
def references
|
370
|
-
#return and array of hash, hash has these keys ["title", "dbxref_id", "reference_id", "authors", "crc", "location"]
|
371
|
-
#probably would be better to d a class refrence to collect these informations
|
372
|
-
@entry.bioentry_references.collect do |bio_ref|
|
373
|
-
hash = Hash.new
|
374
|
-
hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/) if (bio_ref.reference and bio_ref.reference.authors)
|
375
|
-
|
376
|
-
hash['sequence_position'] = "#{bio_ref.start_pos}-#{bio_ref.end_pos}" if (bio_ref.start_pos and bio_ref.end_pos)
|
377
|
-
hash['title'] = bio_ref.reference.title
|
378
|
-
hash['embl_gb_record_number'] = bio_ref.rank
|
379
|
-
#TODO: solve the problem with specific comment per reference.
|
380
|
-
#TODO: get dbxref
|
381
|
-
#take a look when location is build up in def reference=(value)
|
382
|
-
|
383
|
-
bio_ref.reference.location.split('|').each do |element|
|
384
|
-
key,value=element.split('=')
|
385
|
-
hash[key]=value
|
386
|
-
end unless bio_ref.reference.location.nil?
|
387
|
-
|
388
|
-
hash['xrefs'] = bio_ref.reference.dbxref ? "#{bio_ref.reference.dbxref.dbname}; #{bio_ref.reference.dbxref.accession}." : ''
|
389
|
-
Bio::Reference.new(hash)
|
390
|
-
end
|
391
|
-
end
|
392
|
-
|
393
|
-
def comments
|
394
|
-
@entry.comments.map do |comment|
|
395
|
-
comment.comment_text
|
396
|
-
end
|
397
|
-
end
|
398
|
-
|
399
|
-
def reference=(value)
|
400
|
-
locations=Array.new
|
401
|
-
locations << "journal=#{value.journal}" unless value.journal.empty?
|
402
|
-
locations << "volume=#{value.volume}" unless value.volume.empty?
|
403
|
-
locations << "issue=#{value.issue}" unless value.issue.empty?
|
404
|
-
locations << "pages=#{value.pages}" unless value.pages.empty?
|
405
|
-
locations << "year=#{value.year}" unless value.year.empty?
|
406
|
-
locations << "pubmed=#{value.pubmed}" unless value.pubmed.empty?
|
407
|
-
locations << "medline=#{value.medline}" unless value.medline.empty?
|
408
|
-
locations << "doi=#{value.doi}" unless value.doi.nil?
|
409
|
-
locations << "abstract=#{value.abstract}" unless value.abstract.empty?
|
410
|
-
locations << "url=#{value.url}" unless value.url.nil?
|
411
|
-
locations << "mesh=#{value.mesh}" unless value.mesh.empty?
|
412
|
-
locations << "affiliations=#{value.affiliations}" unless value.affiliations.empty?
|
413
|
-
locations << "comments=#{value.comments.join('~')}"unless value.comments.nil?
|
414
|
-
start_pos, end_pos = value.sequence_position ? value.sequence_position.gsub(/\s*/,'').split('-') : [nil,nil]
|
415
|
-
reference= Reference.first(:conditions=>["title = ?",value.title]) || Reference.create({:title=>value.title,:authors=>value.authors.join(' '), :location=>locations.join('|')})
|
416
|
-
bio_reference=@entry.bioentry_references.build({:reference=>reference,:rank=>value.embl_gb_record_number, :start_pos=>start_pos, :end_pos=>end_pos})
|
417
|
-
bio_reference.save
|
418
|
-
end
|
419
|
-
|
420
|
-
def comment=(value)
|
421
|
-
#DELETE comment=Comment.new({:bioentry=>@entry, :comment_text=>value, :rank=>@entry.comments.count.succ})
|
422
|
-
comment = @entry.comments.build({:comment_text=>value, :rank=>@entry.comments.count.succ})
|
423
|
-
comment.save
|
424
|
-
end
|
425
|
-
|
426
|
-
def save
|
427
|
-
#I should add chks for SQL errors
|
428
|
-
@entry.biosequence.save
|
429
|
-
@entry.save
|
430
|
-
end
|
431
|
-
def to_fasta
|
432
|
-
">" + accession + "\n" + seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
|
433
|
-
end
|
434
|
-
|
435
|
-
def to_fasta_reverse_complememt
|
436
|
-
">" + accession + "\n" + seq.reverse_complement.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
|
437
|
-
end
|
438
|
-
|
439
|
-
def to_biosequence
|
440
|
-
Bio::Sequence.adapter(self,Bio::Sequence::Adapter::BioSQL)
|
441
|
-
end
|
442
|
-
end #Sequence
|
443
|
-
end #SQL
|
444
|
-
end #Bio
|
@@ -1,582 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<!-- -->
|
3
|
-
<!-- phyloXML -->
|
4
|
-
<!-- -->
|
5
|
-
<!-- schema in XMLSchema -->
|
6
|
-
<!-- -->
|
7
|
-
<!-- License: The phyloXML XML Schema Definition is -->
|
8
|
-
<!-- dual-licensed under the LGPL or Ruby's -->
|
9
|
-
<!-- License. -->
|
10
|
-
<!-- You can redistribute and/or modify -->
|
11
|
-
<!-- it under either the terms of the LGPL, -->
|
12
|
-
<!-- or Ruby's License. -->
|
13
|
-
<!-- see: http://www.ruby-lang.org/en/about/license.txt -->
|
14
|
-
<!-- -->
|
15
|
-
<!-- Copyright (c) 2008-2009 Christian M Zmasek -->
|
16
|
-
<!-- -->
|
17
|
-
<!-- www.phyloxml.org -->
|
18
|
-
<!-- Version: 1.10 -->
|
19
|
-
<!-- Last modified: 2009.11.17 by Christian M Zmasek -->
|
20
|
-
<!-- -->
|
21
|
-
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:phy="http://www.phyloxml.org"
|
22
|
-
targetNamespace="http://www.phyloxml.org" elementFormDefault="qualified" attributeFormDefault="unqualified">
|
23
|
-
<xs:annotation>
|
24
|
-
<xs:documentation> phyloXML is an XML language to describe evolutionary trees and associated data. Version: 1.10.
|
25
|
-
License: dual-licensed under the LGPL or Ruby's License. Copyright (c) 2008-2009 Christian M Zmasek.</xs:documentation>
|
26
|
-
</xs:annotation>
|
27
|
-
<!-- phyloxml is the root element:-->
|
28
|
-
<xs:element name="phyloxml" type="phy:Phyloxml"/>
|
29
|
-
<!-- phyloXML definition:-->
|
30
|
-
<xs:complexType name="Phyloxml">
|
31
|
-
<xs:annotation>
|
32
|
-
<xs:documentation> 'phyloxml' is the name of the root element. Phyloxml contains an arbitrary number of
|
33
|
-
'phylogeny' elements (each representing one phylogeny) possibly followed by elements from other namespaces.
|
34
|
-
</xs:documentation>
|
35
|
-
</xs:annotation>
|
36
|
-
<xs:sequence maxOccurs="unbounded">
|
37
|
-
<xs:element name="phylogeny" type="phy:Phylogeny" minOccurs="0" maxOccurs="unbounded"/>
|
38
|
-
<xs:any minOccurs="0" maxOccurs="unbounded" processContents="lax" namespace="##other"/>
|
39
|
-
</xs:sequence>
|
40
|
-
</xs:complexType>
|
41
|
-
<!-- Phylogeny:-->
|
42
|
-
<xs:complexType name="Phylogeny">
|
43
|
-
<xs:annotation>
|
44
|
-
<xs:documentation> Element Phylogeny is used to represent a phylogeny. The required attribute 'rooted' is used
|
45
|
-
to indicate whether the phylogeny is rooted or not. The attribute 'rerootable' can be used to indicate that
|
46
|
-
the phylogeny is not allowed to be rooted differently (i.e. because it is associated with root dependent
|
47
|
-
data, such as gene duplications). The attribute 'type' can be used to indicate the type of phylogeny (i.e.
|
48
|
-
'gene tree'). It is recommended to use the attribute 'branch_length_unit' if the phylogeny has branch
|
49
|
-
lengths. Element clade is used in a recursive manner to describe the topology of a phylogenetic
|
50
|
-
tree.</xs:documentation>
|
51
|
-
</xs:annotation>
|
52
|
-
<xs:sequence>
|
53
|
-
<xs:element name="name" type="xs:token" minOccurs="0"/>
|
54
|
-
<xs:element name="id" type="phy:Id" minOccurs="0"/>
|
55
|
-
<xs:element name="description" type="xs:token" minOccurs="0"/>
|
56
|
-
<xs:element name="date" type="xs:dateTime" minOccurs="0"/>
|
57
|
-
<xs:element name="confidence" type="phy:Confidence" minOccurs="0" maxOccurs="unbounded"/>
|
58
|
-
<xs:element name="clade" type="phy:Clade" minOccurs="0"/>
|
59
|
-
<xs:element name="clade_relation" type="phy:CladeRelation" minOccurs="0" maxOccurs="unbounded"/>
|
60
|
-
<xs:element name="sequence_relation" type="phy:SequenceRelation" minOccurs="0" maxOccurs="unbounded"/>
|
61
|
-
<xs:element name="property" type="phy:Property" minOccurs="0" maxOccurs="unbounded"/>
|
62
|
-
<xs:any minOccurs="0" maxOccurs="unbounded" processContents="lax" namespace="##other"/>
|
63
|
-
</xs:sequence>
|
64
|
-
<xs:attribute name="rooted" type="xs:boolean" use="required"/>
|
65
|
-
<xs:attribute name="rerootable" type="xs:boolean"/>
|
66
|
-
<xs:attribute name="branch_length_unit" type="xs:token"/>
|
67
|
-
<xs:attribute name="type" type="xs:token"/>
|
68
|
-
</xs:complexType>
|
69
|
-
<!-- Clade:-->
|
70
|
-
<xs:complexType name="Clade">
|
71
|
-
<xs:annotation>
|
72
|
-
<xs:documentation> Element Clade is used in a recursive manner to describe the topology of a phylogenetic tree.
|
73
|
-
The parent branch length of a clade can be described either with the 'branch_length' element or the
|
74
|
-
'branch_length' attribute (it is not recommended to use both at the same time, though). Usage of the
|
75
|
-
'branch_length' attribute allows for a less verbose description. Element 'confidence' is used to indicate
|
76
|
-
the support for a clade/parent branch. Element 'events' is used to describe such events as gene-duplications
|
77
|
-
at the root node/parent branch of a clade. Element 'width' is the branch width for this clade (including
|
78
|
-
parent branch). Both 'color' and 'width' elements apply for the whole clade unless overwritten in-sub
|
79
|
-
clades. Attribute 'id_source' is used to link other elements to a clade (on the xml-level).
|
80
|
-
</xs:documentation>
|
81
|
-
</xs:annotation>
|
82
|
-
<xs:sequence>
|
83
|
-
<xs:element name="name" type="xs:token" minOccurs="0"/>
|
84
|
-
<xs:element name="branch_length" type="xs:double" minOccurs="0"/>
|
85
|
-
<xs:element name="confidence" type="phy:Confidence" minOccurs="0" maxOccurs="unbounded"/>
|
86
|
-
<xs:element name="width" type="xs:double" minOccurs="0"/>
|
87
|
-
<xs:element name="color" type="phy:BranchColor" minOccurs="0"/>
|
88
|
-
<xs:element name="node_id" type="phy:Id" minOccurs="0"/>
|
89
|
-
<xs:element name="taxonomy" type="phy:Taxonomy" minOccurs="0" maxOccurs="unbounded"/>
|
90
|
-
<xs:element name="sequence" type="phy:Sequence" minOccurs="0" maxOccurs="unbounded"/>
|
91
|
-
<xs:element name="events" type="phy:Events" minOccurs="0"/>
|
92
|
-
<xs:element name="binary_characters" type="phy:BinaryCharacters" minOccurs="0"/>
|
93
|
-
<xs:element name="distribution" type="phy:Distribution" minOccurs="0" maxOccurs="unbounded"/>
|
94
|
-
<xs:element name="date" type="phy:Date" minOccurs="0"/>
|
95
|
-
<xs:element name="reference" type="phy:Reference" minOccurs="0" maxOccurs="unbounded"/>
|
96
|
-
<xs:element name="property" type="phy:Property" minOccurs="0" maxOccurs="unbounded"/>
|
97
|
-
<xs:element name="clade" type="phy:Clade" minOccurs="0" maxOccurs="unbounded"/>
|
98
|
-
<xs:any minOccurs="0" maxOccurs="unbounded" processContents="lax" namespace="##other"/>
|
99
|
-
</xs:sequence>
|
100
|
-
<xs:attribute name="branch_length" type="xs:double"/>
|
101
|
-
<xs:attribute name="id_source" type="phy:id_source"/>
|
102
|
-
</xs:complexType>
|
103
|
-
<!-- Taxonomy:-->
|
104
|
-
<xs:complexType name="Taxonomy">
|
105
|
-
<xs:annotation>
|
106
|
-
<xs:documentation> Element Taxonomy is used to describe taxonomic information for a clade. Element 'code' is
|
107
|
-
intended to store UniProt/Swiss-Prot style organism codes (e.g. 'APLCA' for the California sea hare 'Aplysia
|
108
|
-
californica') or other styles of mnemonics (e.g. 'Aca'). Element 'authority' is used to keep the authority,
|
109
|
-
such as 'J. G. Cooper, 1863', associated with the 'scientific_name'. Element 'id' is used for a unique
|
110
|
-
identifier of a taxon (for example '6500' with 'ncbi_taxonomy' as 'provider' for the California sea hare).
|
111
|
-
Attribute 'id_source' is used to link other elements to a taxonomy (on the xml-level).</xs:documentation>
|
112
|
-
</xs:annotation>
|
113
|
-
<xs:sequence>
|
114
|
-
<xs:element name="id" type="phy:Id" minOccurs="0"/>
|
115
|
-
<xs:element name="code" type="phy:TaxonomyCode" minOccurs="0"/>
|
116
|
-
<xs:element name="scientific_name" type="xs:token" minOccurs="0"/>
|
117
|
-
<xs:element name="authority" type="xs:token" minOccurs="0"/>
|
118
|
-
<xs:element name="common_name" type="xs:token" minOccurs="0" maxOccurs="unbounded"/>
|
119
|
-
<xs:element name="synonym" type="xs:token" minOccurs="0" maxOccurs="unbounded"/>
|
120
|
-
<xs:element name="rank" type="phy:Rank" minOccurs="0"/>
|
121
|
-
<xs:element name="uri" type="phy:Uri" minOccurs="0"/>
|
122
|
-
<xs:any minOccurs="0" maxOccurs="unbounded" processContents="lax" namespace="##other"/>
|
123
|
-
</xs:sequence>
|
124
|
-
<xs:attribute name="id_source" type="phy:id_source"/>
|
125
|
-
</xs:complexType>
|
126
|
-
<xs:simpleType name="TaxonomyCode">
|
127
|
-
<xs:restriction base="xs:token">
|
128
|
-
<xs:pattern value="[a-zA-Z0-9_]{2,10}"/>
|
129
|
-
</xs:restriction>
|
130
|
-
</xs:simpleType>
|
131
|
-
<xs:simpleType name="Rank">
|
132
|
-
<xs:restriction base="xs:token">
|
133
|
-
<xs:enumeration value="domain"/>
|
134
|
-
<xs:enumeration value="kingdom"/>
|
135
|
-
<xs:enumeration value="subkingdom"/>
|
136
|
-
<xs:enumeration value="branch"/>
|
137
|
-
<xs:enumeration value="infrakingdom"/>
|
138
|
-
<xs:enumeration value="superphylum"/>
|
139
|
-
<xs:enumeration value="phylum"/>
|
140
|
-
<xs:enumeration value="subphylum"/>
|
141
|
-
<xs:enumeration value="infraphylum"/>
|
142
|
-
<xs:enumeration value="microphylum"/>
|
143
|
-
<xs:enumeration value="superdivision"/>
|
144
|
-
<xs:enumeration value="division"/>
|
145
|
-
<xs:enumeration value="subdivision"/>
|
146
|
-
<xs:enumeration value="infradivision"/>
|
147
|
-
<xs:enumeration value="superclass"/>
|
148
|
-
<xs:enumeration value="class"/>
|
149
|
-
<xs:enumeration value="subclass"/>
|
150
|
-
<xs:enumeration value="infraclass"/>
|
151
|
-
<xs:enumeration value="superlegion"/>
|
152
|
-
<xs:enumeration value="legion"/>
|
153
|
-
<xs:enumeration value="sublegion"/>
|
154
|
-
<xs:enumeration value="infralegion"/>
|
155
|
-
<xs:enumeration value="supercohort"/>
|
156
|
-
<xs:enumeration value="cohort"/>
|
157
|
-
<xs:enumeration value="subcohort"/>
|
158
|
-
<xs:enumeration value="infracohort"/>
|
159
|
-
<xs:enumeration value="superorder"/>
|
160
|
-
<xs:enumeration value="order"/>
|
161
|
-
<xs:enumeration value="suborder"/>
|
162
|
-
<xs:enumeration value="superfamily"/>
|
163
|
-
<xs:enumeration value="family"/>
|
164
|
-
<xs:enumeration value="subfamily"/>
|
165
|
-
<xs:enumeration value="supertribe"/>
|
166
|
-
<xs:enumeration value="tribe"/>
|
167
|
-
<xs:enumeration value="subtribe"/>
|
168
|
-
<xs:enumeration value="infratribe"/>
|
169
|
-
<xs:enumeration value="genus"/>
|
170
|
-
<xs:enumeration value="subgenus"/>
|
171
|
-
<xs:enumeration value="superspecies"/>
|
172
|
-
<xs:enumeration value="species"/>
|
173
|
-
<xs:enumeration value="subspecies"/>
|
174
|
-
<xs:enumeration value="variety"/>
|
175
|
-
<xs:enumeration value="subvariety"/>
|
176
|
-
<xs:enumeration value="form"/>
|
177
|
-
<xs:enumeration value="subform"/>
|
178
|
-
<xs:enumeration value="cultivar"/>
|
179
|
-
<xs:enumeration value="unknown"/>
|
180
|
-
<xs:enumeration value="other"/>
|
181
|
-
</xs:restriction>
|
182
|
-
</xs:simpleType>
|
183
|
-
<!-- Sequence:-->
|
184
|
-
<xs:complexType name="Sequence">
|
185
|
-
<xs:annotation>
|
186
|
-
<xs:documentation> Element Sequence is used to represent a molecular sequence (Protein, DNA, RNA) associated
|
187
|
-
with a node. 'symbol' is a short (maximal ten characters) symbol of the sequence (e.g. 'ACTM') whereas
|
188
|
-
'name' is used for the full name (e.g. 'muscle Actin'). 'location' is used for the location of a sequence on
|
189
|
-
a genome/chromosome. The actual sequence can be stored with the 'mol_seq' element. Attribute 'type' is used
|
190
|
-
to indicate the type of sequence ('dna', 'rna', or 'protein'). One intended use for 'id_ref' is to link a
|
191
|
-
sequence to a taxonomy (via the taxonomy's 'id_source') in case of multiple sequences and taxonomies per
|
192
|
-
node. </xs:documentation>
|
193
|
-
</xs:annotation>
|
194
|
-
<xs:sequence>
|
195
|
-
<xs:element name="symbol" type="phy:SequenceSymbol" minOccurs="0"/>
|
196
|
-
<xs:element name="accession" type="phy:Accession" minOccurs="0"/>
|
197
|
-
<xs:element name="name" type="xs:token" minOccurs="0"/>
|
198
|
-
<xs:element name="location" type="xs:token" minOccurs="0"/>
|
199
|
-
<xs:element name="mol_seq" type="phy:MolSeq" minOccurs="0"/>
|
200
|
-
<xs:element name="uri" type="phy:Uri" minOccurs="0"/>
|
201
|
-
<xs:element name="annotation" type="phy:Annotation" minOccurs="0" maxOccurs="unbounded"/>
|
202
|
-
<xs:element name="domain_architecture" type="phy:DomainArchitecture" minOccurs="0"/>
|
203
|
-
<xs:any minOccurs="0" maxOccurs="unbounded" processContents="lax" namespace="##other"/>
|
204
|
-
</xs:sequence>
|
205
|
-
<xs:attribute name="type" type="phy:SequenceType"/>
|
206
|
-
<xs:attribute name="id_source" type="phy:id_source"/>
|
207
|
-
<xs:attribute name="id_ref" type="phy:id_ref"/>
|
208
|
-
</xs:complexType>
|
209
|
-
<xs:simpleType name="SequenceSymbol">
|
210
|
-
<xs:restriction base="xs:token">
|
211
|
-
<xs:pattern value="\S{1,10}"/>
|
212
|
-
</xs:restriction>
|
213
|
-
</xs:simpleType>
|
214
|
-
<xs:complexType name="MolSeq">
|
215
|
-
<xs:annotation>
|
216
|
-
<xs:documentation> Element 'mol_seq' is used to store molecular sequences. The 'is_aligned' attribute is used
|
217
|
-
to indicated that this molecular sequence is aligned with all other sequences in the same phylogeny for
|
218
|
-
which 'is aligned' is true as well (which, in most cases, means that gaps were introduced, and that all
|
219
|
-
sequences for which 'is aligned' is true must have the same length).</xs:documentation>
|
220
|
-
</xs:annotation>
|
221
|
-
<xs:simpleContent>
|
222
|
-
<xs:extension base="phy:MolSeqChars">
|
223
|
-
<xs:attribute name="is_aligned" type="xs:boolean"/>
|
224
|
-
</xs:extension>
|
225
|
-
</xs:simpleContent>
|
226
|
-
</xs:complexType>
|
227
|
-
<xs:simpleType name="MolSeqChars">
|
228
|
-
<xs:restriction base="xs:token">
|
229
|
-
<xs:pattern value="[a-zA-Z\.\-\?\*_]+"/>
|
230
|
-
</xs:restriction>
|
231
|
-
</xs:simpleType>
|
232
|
-
<xs:simpleType name="SequenceType">
|
233
|
-
<xs:restriction base="xs:token">
|
234
|
-
<xs:enumeration value="rna"/>
|
235
|
-
<xs:enumeration value="dna"/>
|
236
|
-
<xs:enumeration value="protein"/>
|
237
|
-
</xs:restriction>
|
238
|
-
</xs:simpleType>
|
239
|
-
<!-- Accession:-->
|
240
|
-
<xs:complexType name="Accession">
|
241
|
-
<xs:annotation>
|
242
|
-
<xs:documentation> Element Accession is used to capture the local part in a sequence identifier (e.g. 'P17304'
|
243
|
-
in 'UniProtKB:P17304', in which case the 'source' attribute would be 'UniProtKB'). </xs:documentation>
|
244
|
-
</xs:annotation>
|
245
|
-
<xs:simpleContent>
|
246
|
-
<xs:extension base="xs:token">
|
247
|
-
<xs:attribute name="source" type="xs:token" use="required"/>
|
248
|
-
</xs:extension>
|
249
|
-
</xs:simpleContent>
|
250
|
-
</xs:complexType>
|
251
|
-
<!-- DomainArchitecture: -->
|
252
|
-
<xs:complexType name="DomainArchitecture">
|
253
|
-
<xs:annotation>
|
254
|
-
<xs:documentation> This is used describe the domain architecture of a protein. Attribute 'length' is the total
|
255
|
-
length of the protein</xs:documentation>
|
256
|
-
</xs:annotation>
|
257
|
-
<xs:sequence>
|
258
|
-
<xs:element name="domain" type="phy:ProteinDomain" minOccurs="1" maxOccurs="unbounded"/>
|
259
|
-
</xs:sequence>
|
260
|
-
<xs:attribute name="length" type="xs:nonNegativeInteger"/>
|
261
|
-
</xs:complexType>
|
262
|
-
<xs:complexType name="ProteinDomain">
|
263
|
-
<xs:annotation>
|
264
|
-
<xs:documentation> To represent an individual domain in a domain architecture. The name/unique identifier is
|
265
|
-
described via the 'id' attribute. 'confidence' can be used to store (i.e.) E-values.</xs:documentation>
|
266
|
-
</xs:annotation>
|
267
|
-
<xs:simpleContent>
|
268
|
-
<xs:extension base="xs:token">
|
269
|
-
<xs:attribute name="from" type="xs:nonNegativeInteger" use="required"/>
|
270
|
-
<xs:attribute name="to" type="xs:nonNegativeInteger" use="required"/>
|
271
|
-
<xs:attribute name="confidence" type="xs:double"/>
|
272
|
-
<xs:attribute name="id" type="xs:token"/>
|
273
|
-
</xs:extension>
|
274
|
-
</xs:simpleContent>
|
275
|
-
</xs:complexType>
|
276
|
-
<!-- Events:-->
|
277
|
-
<xs:complexType name="Events">
|
278
|
-
<xs:annotation>
|
279
|
-
<xs:documentation> Events at the root node of a clade (e.g. one gene duplication). </xs:documentation>
|
280
|
-
</xs:annotation>
|
281
|
-
<xs:sequence>
|
282
|
-
<xs:element name="type" type="phy:EventType" minOccurs="0"/>
|
283
|
-
<xs:element name="duplications" type="xs:nonNegativeInteger" minOccurs="0"/>
|
284
|
-
<xs:element name="speciations" type="xs:nonNegativeInteger" minOccurs="0"/>
|
285
|
-
<xs:element name="losses" type="xs:nonNegativeInteger" minOccurs="0"/>
|
286
|
-
<xs:element name="confidence" type="phy:Confidence" minOccurs="0"/>
|
287
|
-
</xs:sequence>
|
288
|
-
</xs:complexType>
|
289
|
-
<xs:simpleType name="EventType">
|
290
|
-
<xs:restriction base="xs:token">
|
291
|
-
<xs:enumeration value="transfer"/>
|
292
|
-
<xs:enumeration value="fusion"/>
|
293
|
-
<xs:enumeration value="speciation_or_duplication"/>
|
294
|
-
<xs:enumeration value="other"/>
|
295
|
-
<xs:enumeration value="mixed"/>
|
296
|
-
<xs:enumeration value="unassigned"/>
|
297
|
-
</xs:restriction>
|
298
|
-
</xs:simpleType>
|
299
|
-
<!--BinaryCharacters:-->
|
300
|
-
<xs:complexType name="BinaryCharacters">
|
301
|
-
<xs:annotation>
|
302
|
-
<xs:documentation> The names and/or counts of binary characters present, gained, and lost at the root of a
|
303
|
-
clade. </xs:documentation>
|
304
|
-
</xs:annotation>
|
305
|
-
<xs:sequence>
|
306
|
-
<xs:element name="gained" type="phy:BinaryCharacterList" minOccurs="0"/>
|
307
|
-
<xs:element name="lost" type="phy:BinaryCharacterList" minOccurs="0"/>
|
308
|
-
<xs:element name="present" type="phy:BinaryCharacterList" minOccurs="0"/>
|
309
|
-
<xs:element name="absent" type="phy:BinaryCharacterList" minOccurs="0"/>
|
310
|
-
</xs:sequence>
|
311
|
-
<xs:attribute name="type" type="xs:token"/>
|
312
|
-
<xs:attribute name="gained_count" type="xs:nonNegativeInteger"/>
|
313
|
-
<xs:attribute name="lost_count" type="xs:nonNegativeInteger"/>
|
314
|
-
<xs:attribute name="present_count" type="xs:nonNegativeInteger"/>
|
315
|
-
<xs:attribute name="absent_count" type="xs:nonNegativeInteger"/>
|
316
|
-
</xs:complexType>
|
317
|
-
<xs:complexType name="BinaryCharacterList">
|
318
|
-
<xs:sequence>
|
319
|
-
<xs:element name="bc" type="xs:token" maxOccurs="unbounded"/>
|
320
|
-
</xs:sequence>
|
321
|
-
</xs:complexType>
|
322
|
-
<!-- Reference:-->
|
323
|
-
<xs:complexType name="Reference">
|
324
|
-
<xs:annotation>
|
325
|
-
<xs:documentation> A literature reference for a clade. It is recommended to use the 'doi' attribute instead of
|
326
|
-
the free text 'desc' element whenever possible. </xs:documentation>
|
327
|
-
</xs:annotation>
|
328
|
-
<xs:sequence>
|
329
|
-
<xs:element name="desc" type="xs:token" minOccurs="0"/>
|
330
|
-
</xs:sequence>
|
331
|
-
<xs:attribute name="doi" type="phy:Doi"/>
|
332
|
-
</xs:complexType>
|
333
|
-
<xs:simpleType name="Doi">
|
334
|
-
<xs:restriction base="xs:token">
|
335
|
-
<xs:pattern value="[a-zA-Z0-9_\.]+/\S+"/>
|
336
|
-
</xs:restriction>
|
337
|
-
</xs:simpleType>
|
338
|
-
<!-- Annotation:-->
|
339
|
-
<xs:complexType name="Annotation">
|
340
|
-
<xs:annotation>
|
341
|
-
<xs:documentation> The annotation of a molecular sequence. It is recommended to annotate by using the optional
|
342
|
-
'ref' attribute (some examples of acceptable values for the ref attribute: 'GO:0008270',
|
343
|
-
'KEGG:Tetrachloroethene degradation', 'EC:1.1.1.1'). Optional element 'desc' allows for a free text
|
344
|
-
description. Optional element 'confidence' is used to state the type and value of support for a annotation.
|
345
|
-
Similarly, optional attribute 'evidence' is used to describe the evidence for a annotation as free text
|
346
|
-
(e.g. 'experimental'). Optional element 'property' allows for further, typed and referenced annotations from
|
347
|
-
external resources.</xs:documentation>
|
348
|
-
</xs:annotation>
|
349
|
-
<xs:sequence>
|
350
|
-
<xs:element name="desc" type="xs:token" minOccurs="0"/>
|
351
|
-
<xs:element name="confidence" type="phy:Confidence" minOccurs="0"/>
|
352
|
-
<xs:element name="property" type="phy:Property" minOccurs="0" maxOccurs="unbounded"/>
|
353
|
-
<xs:element name="uri" type="phy:Uri" minOccurs="0"/>
|
354
|
-
</xs:sequence>
|
355
|
-
<xs:attribute name="ref" type="phy:ref"/>
|
356
|
-
<xs:attribute name="source" type="xs:token"/>
|
357
|
-
<xs:attribute name="evidence" type="xs:token"/>
|
358
|
-
<xs:attribute name="type" type="xs:token"/>
|
359
|
-
</xs:complexType>
|
360
|
-
<!-- Property:-->
|
361
|
-
<xs:complexType name="Property" mixed="true">
|
362
|
-
<xs:annotation>
|
363
|
-
<xs:documentation> Property allows for typed and referenced properties from external resources to be attached
|
364
|
-
to 'Phylogeny', 'Clade', and 'Annotation'. The value of a property is its mixed (free text) content.
|
365
|
-
Attribute 'datatype' indicates the type of a property and is limited to xsd-datatypes (e.g. 'xsd:string',
|
366
|
-
'xsd:boolean', 'xsd:integer', 'xsd:decimal', 'xsd:float', 'xsd:double', 'xsd:date', 'xsd:anyURI'). Attribute
|
367
|
-
'applies_to' indicates the item to which a property applies to (e.g. 'node' for the parent node of a clade,
|
368
|
-
'parent_branch' for the parent branch of a clade). Attribute 'id_ref' allows to attached a property
|
369
|
-
specifically to one element (on the xml-level). Optional attribute 'unit' is used to indicate the unit of
|
370
|
-
the property. An example: <property datatype="xsd:integer" ref="NOAA:depth" applies_to="clade"
|
371
|
-
unit="METRIC:m"> 200 </property> </xs:documentation>
|
372
|
-
</xs:annotation>
|
373
|
-
<xs:attribute name="ref" type="phy:ref" use="required"/>
|
374
|
-
<xs:attribute name="unit" type="phy:ref"/>
|
375
|
-
<xs:attribute name="datatype" type="phy:PropertyDataType" use="required"/>
|
376
|
-
<xs:attribute name="applies_to" type="phy:AppliesTo" use="required"/>
|
377
|
-
<xs:attribute name="id_ref" type="phy:id_ref"/>
|
378
|
-
</xs:complexType>
|
379
|
-
<xs:simpleType name="ref">
|
380
|
-
<xs:restriction base="xs:token">
|
381
|
-
<xs:pattern value="[a-zA-Z0-9_]+:[a-zA-Z0-9_\.\-\s]+"/>
|
382
|
-
</xs:restriction>
|
383
|
-
</xs:simpleType>
|
384
|
-
<xs:simpleType name="AppliesTo">
|
385
|
-
<xs:restriction base="xs:token">
|
386
|
-
<xs:enumeration value="phylogeny"/>
|
387
|
-
<xs:enumeration value="clade"/>
|
388
|
-
<xs:enumeration value="node"/>
|
389
|
-
<xs:enumeration value="annotation"/>
|
390
|
-
<xs:enumeration value="parent_branch"/>
|
391
|
-
<xs:enumeration value="other"/>
|
392
|
-
</xs:restriction>
|
393
|
-
</xs:simpleType>
|
394
|
-
<xs:simpleType name="PropertyDataType">
|
395
|
-
<xs:restriction base="xs:token">
|
396
|
-
<xs:enumeration value="xsd:string"/>
|
397
|
-
<xs:enumeration value="xsd:boolean"/>
|
398
|
-
<xs:enumeration value="xsd:decimal"/>
|
399
|
-
<xs:enumeration value="xsd:float"/>
|
400
|
-
<xs:enumeration value="xsd:double"/>
|
401
|
-
<xs:enumeration value="xsd:duration"/>
|
402
|
-
<xs:enumeration value="xsd:dateTime"/>
|
403
|
-
<xs:enumeration value="xsd:time"/>
|
404
|
-
<xs:enumeration value="xsd:date"/>
|
405
|
-
<xs:enumeration value="xsd:gYearMonth"/>
|
406
|
-
<xs:enumeration value="xsd:gYear"/>
|
407
|
-
<xs:enumeration value="xsd:gMonthDay"/>
|
408
|
-
<xs:enumeration value="xsd:gDay"/>
|
409
|
-
<xs:enumeration value="xsd:gMonth"/>
|
410
|
-
<xs:enumeration value="xsd:hexBinary"/>
|
411
|
-
<xs:enumeration value="xsd:base64Binary"/>
|
412
|
-
<xs:enumeration value="xsd:anyURI"/>
|
413
|
-
<xs:enumeration value="xsd:normalizedString"/>
|
414
|
-
<xs:enumeration value="xsd:token"/>
|
415
|
-
<xs:enumeration value="xsd:integer"/>
|
416
|
-
<xs:enumeration value="xsd:nonPositiveInteger"/>
|
417
|
-
<xs:enumeration value="xsd:negativeInteger"/>
|
418
|
-
<xs:enumeration value="xsd:long"/>
|
419
|
-
<xs:enumeration value="xsd:int"/>
|
420
|
-
<xs:enumeration value="xsd:short"/>
|
421
|
-
<xs:enumeration value="xsd:byte"/>
|
422
|
-
<xs:enumeration value="xsd:nonNegativeInteger"/>
|
423
|
-
<xs:enumeration value="xsd:unsignedLong"/>
|
424
|
-
<xs:enumeration value="xsd:unsignedInt"/>
|
425
|
-
<xs:enumeration value="xsd:unsignedShort"/>
|
426
|
-
<xs:enumeration value="xsd:unsignedByte"/>
|
427
|
-
<xs:enumeration value="xsd:positiveInteger"/>
|
428
|
-
</xs:restriction>
|
429
|
-
</xs:simpleType>
|
430
|
-
<!--Uri-->
|
431
|
-
<xs:complexType name="Uri">
|
432
|
-
<xs:annotation>
|
433
|
-
<xs:documentation> A uniform resource identifier. In general, this is expected to be an URL (for example, to
|
434
|
-
link to an image on a website, in which case the 'type' attribute might be 'image' and 'desc' might be
|
435
|
-
'image of a California sea hare'). </xs:documentation>
|
436
|
-
</xs:annotation>
|
437
|
-
<xs:simpleContent>
|
438
|
-
<xs:extension base="xs:anyURI">
|
439
|
-
<xs:attribute name="desc" type="xs:token"/>
|
440
|
-
<xs:attribute name="type" type="xs:token"/>
|
441
|
-
</xs:extension>
|
442
|
-
</xs:simpleContent>
|
443
|
-
</xs:complexType>
|
444
|
-
<!-- Confidence:-->
|
445
|
-
<xs:complexType name="Confidence">
|
446
|
-
<xs:annotation>
|
447
|
-
<xs:documentation> A general purpose confidence element. For example this can be used to express the bootstrap
|
448
|
-
support value of a clade (in which case the 'type' attribute is 'bootstrap').</xs:documentation>
|
449
|
-
</xs:annotation>
|
450
|
-
<xs:simpleContent>
|
451
|
-
<xs:extension base="xs:double">
|
452
|
-
<xs:attribute name="type" type="xs:token" use="required"/>
|
453
|
-
</xs:extension>
|
454
|
-
</xs:simpleContent>
|
455
|
-
</xs:complexType>
|
456
|
-
<!-- Identifier:-->
|
457
|
-
<xs:complexType name="Id">
|
458
|
-
<xs:annotation>
|
459
|
-
<xs:documentation> A general purpose identifier element. Allows to indicate the provider (or authority) of an
|
460
|
-
identifier. </xs:documentation>
|
461
|
-
</xs:annotation>
|
462
|
-
<xs:simpleContent>
|
463
|
-
<xs:extension base="xs:token">
|
464
|
-
<xs:attribute name="provider" type="xs:token"/>
|
465
|
-
</xs:extension>
|
466
|
-
</xs:simpleContent>
|
467
|
-
</xs:complexType>
|
468
|
-
<!-- Distribution:-->
|
469
|
-
<xs:complexType name="Distribution">
|
470
|
-
<xs:annotation>
|
471
|
-
<xs:documentation> The geographic distribution of the items of a clade (species, sequences), intended for
|
472
|
-
phylogeographic applications. The location can be described either by free text in the 'desc' element and/or
|
473
|
-
by the coordinates of one or more 'Points' (similar to the 'Point' element in Google's KML format) or by
|
474
|
-
'Polygons'. </xs:documentation>
|
475
|
-
</xs:annotation>
|
476
|
-
<xs:sequence>
|
477
|
-
<xs:element name="desc" type="xs:token" minOccurs="0"/>
|
478
|
-
<xs:element name="point" type="phy:Point" minOccurs="0" maxOccurs="unbounded"/>
|
479
|
-
<xs:element name="polygon" type="phy:Polygon" minOccurs="0" maxOccurs="unbounded"/>
|
480
|
-
</xs:sequence>
|
481
|
-
</xs:complexType>
|
482
|
-
<xs:complexType name="Point">
|
483
|
-
<xs:annotation>
|
484
|
-
<xs:documentation> The coordinates of a point with an optional altitude (used by element 'Distribution').
|
485
|
-
Required attributes are the 'geodetic_datum' used to indicate the geodetic datum (also called 'map datum',
|
486
|
-
for example Google's KML uses 'WGS84'). Attribute 'alt_unit' is the unit for the altitude (e.g. 'meter').
|
487
|
-
</xs:documentation>
|
488
|
-
</xs:annotation>
|
489
|
-
<xs:sequence>
|
490
|
-
<xs:element name="lat" type="xs:decimal"/>
|
491
|
-
<xs:element name="long" type="xs:decimal"/>
|
492
|
-
<xs:element name="alt" type="xs:decimal" minOccurs="0"/>
|
493
|
-
</xs:sequence>
|
494
|
-
<xs:attribute name="geodetic_datum" type="xs:token" use="required"/>
|
495
|
-
<xs:attribute name="alt_unit" type="xs:token"/>
|
496
|
-
</xs:complexType>
|
497
|
-
<xs:complexType name="Polygon">
|
498
|
-
<xs:annotation>
|
499
|
-
<xs:documentation> A polygon defined by a list of 'Points' (used by element 'Distribution').
|
500
|
-
</xs:documentation>
|
501
|
-
</xs:annotation>
|
502
|
-
<xs:sequence>
|
503
|
-
<xs:element name="point" type="phy:Point" minOccurs="3" maxOccurs="unbounded"/>
|
504
|
-
</xs:sequence>
|
505
|
-
</xs:complexType>
|
506
|
-
<!-- Date:-->
|
507
|
-
<xs:complexType name="Date">
|
508
|
-
<xs:annotation>
|
509
|
-
<xs:documentation> A date associated with a clade/node. Its value can be numerical by using the 'value' element
|
510
|
-
and/or free text with the 'desc' element' (e.g. 'Silurian'). If a numerical value is used, it is recommended
|
511
|
-
to employ the 'unit' attribute to indicate the type of the numerical value (e.g. 'mya' for 'million years
|
512
|
-
ago'). The elements 'minimum' and 'maximum' are used the indicate a range/confidence
|
513
|
-
interval</xs:documentation>
|
514
|
-
</xs:annotation>
|
515
|
-
<xs:sequence>
|
516
|
-
<xs:element name="desc" type="xs:token" minOccurs="0"/>
|
517
|
-
<xs:element name="value" type="xs:decimal" minOccurs="0"/>
|
518
|
-
<xs:element name="minimum" type="xs:decimal" minOccurs="0"/>
|
519
|
-
<xs:element name="maximum" type="xs:decimal" minOccurs="0"/>
|
520
|
-
</xs:sequence>
|
521
|
-
<xs:attribute name="unit" type="xs:token"/>
|
522
|
-
</xs:complexType>
|
523
|
-
<!-- BranchColor:-->
|
524
|
-
<xs:complexType name="BranchColor">
|
525
|
-
<xs:annotation>
|
526
|
-
<xs:documentation> This indicates the color of a clade when rendered (the color applies to the whole clade
|
527
|
-
unless overwritten by the color(s) of sub clades).</xs:documentation>
|
528
|
-
</xs:annotation>
|
529
|
-
<xs:sequence>
|
530
|
-
<xs:element name="red" type="xs:unsignedByte"/>
|
531
|
-
<xs:element name="green" type="xs:unsignedByte"/>
|
532
|
-
<xs:element name="blue" type="xs:unsignedByte"/>
|
533
|
-
</xs:sequence>
|
534
|
-
</xs:complexType>
|
535
|
-
<!-- SequenceRelation:-->
|
536
|
-
<xs:complexType name="SequenceRelation">
|
537
|
-
<xs:annotation>
|
538
|
-
<xs:documentation> This is used to express a typed relationship between two sequences. For example it could be
|
539
|
-
used to describe an orthology (in which case attribute 'type' is 'orthology'). </xs:documentation>
|
540
|
-
</xs:annotation>
|
541
|
-
<xs:sequence>
|
542
|
-
<xs:element name="confidence" type="phy:Confidence" minOccurs="0"/>
|
543
|
-
</xs:sequence>
|
544
|
-
<xs:attribute name="id_ref_0" type="phy:id_ref" use="required"/>
|
545
|
-
<xs:attribute name="id_ref_1" type="phy:id_ref" use="required"/>
|
546
|
-
<xs:attribute name="distance" type="xs:double"/>
|
547
|
-
<xs:attribute name="type" type="phy:SequenceRelationType" use="required"/>
|
548
|
-
</xs:complexType>
|
549
|
-
<xs:simpleType name="SequenceRelationType">
|
550
|
-
<xs:restriction base="xs:token">
|
551
|
-
<xs:enumeration value="orthology"/>
|
552
|
-
<xs:enumeration value="one_to_one_orthology"/>
|
553
|
-
<xs:enumeration value="super_orthology"/>
|
554
|
-
<xs:enumeration value="paralogy"/>
|
555
|
-
<xs:enumeration value="ultra_paralogy"/>
|
556
|
-
<xs:enumeration value="xenology"/>
|
557
|
-
<xs:enumeration value="unknown"/>
|
558
|
-
<xs:enumeration value="other"/>
|
559
|
-
</xs:restriction>
|
560
|
-
</xs:simpleType>
|
561
|
-
<!-- CladeRelation:-->
|
562
|
-
<xs:complexType name="CladeRelation">
|
563
|
-
<xs:annotation>
|
564
|
-
<xs:documentation> This is used to express a typed relationship between two clades. For example it could be
|
565
|
-
used to describe multiple parents of a clade.</xs:documentation>
|
566
|
-
</xs:annotation>
|
567
|
-
<xs:sequence>
|
568
|
-
<xs:element name="confidence" type="phy:Confidence" minOccurs="0"/>
|
569
|
-
</xs:sequence>
|
570
|
-
<xs:attribute name="id_ref_0" type="phy:id_ref" use="required"/>
|
571
|
-
<xs:attribute name="id_ref_1" type="phy:id_ref" use="required"/>
|
572
|
-
<xs:attribute name="distance" type="xs:double"/>
|
573
|
-
<xs:attribute name="type" type="xs:token" use="required"/>
|
574
|
-
</xs:complexType>
|
575
|
-
<!-- Used to link elements together on the xml level:-->
|
576
|
-
<xs:simpleType name="id_source">
|
577
|
-
<xs:restriction base="xs:ID"/>
|
578
|
-
</xs:simpleType>
|
579
|
-
<xs:simpleType name="id_ref">
|
580
|
-
<xs:restriction base="xs:IDREF"/>
|
581
|
-
</xs:simpleType>
|
582
|
-
</xs:schema>
|