bio 1.4.3.0001 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +39 -33
- data/BSDL +22 -0
- data/COPYING +2 -2
- data/COPYING.ja +36 -36
- data/ChangeLog +2404 -1025
- data/KNOWN_ISSUES.rdoc +15 -55
- data/README.rdoc +17 -23
- data/RELEASE_NOTES.rdoc +246 -183
- data/Rakefile +3 -2
- data/bin/br_biofetch.rb +29 -5
- data/bioruby.gemspec +15 -32
- data/bioruby.gemspec.erb +10 -20
- data/doc/ChangeLog-1.4.3 +1478 -0
- data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
- data/doc/Tutorial.rd +0 -6
- data/doc/Tutorial.rd.html +7 -12
- data/doc/Tutorial.rd.ja +960 -1064
- data/doc/Tutorial.rd.ja.html +977 -1067
- data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-rbx +13 -0
- data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
- data/lib/bio.rb +10 -43
- data/lib/bio/alignment.rb +8 -14
- data/lib/bio/appl/blast.rb +1 -2
- data/lib/bio/appl/blast/format0.rb +18 -7
- data/lib/bio/appl/blast/remote.rb +0 -9
- data/lib/bio/appl/blast/report.rb +1 -1
- data/lib/bio/appl/clustalw/report.rb +3 -1
- data/lib/bio/appl/genscan/report.rb +1 -2
- data/lib/bio/appl/iprscan/report.rb +1 -2
- data/lib/bio/appl/meme/mast.rb +4 -4
- data/lib/bio/appl/meme/mast/report.rb +1 -1
- data/lib/bio/appl/paml/codeml.rb +2 -2
- data/lib/bio/appl/paml/codeml/report.rb +1 -0
- data/lib/bio/appl/paml/common.rb +1 -1
- data/lib/bio/appl/sosui/report.rb +1 -2
- data/lib/bio/command.rb +62 -2
- data/lib/bio/data/aa.rb +13 -31
- data/lib/bio/data/codontable.rb +1 -2
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
- data/lib/bio/db/biosql/sequence.rb +1 -1
- data/lib/bio/db/embl/common.rb +1 -1
- data/lib/bio/db/embl/embl.rb +5 -4
- data/lib/bio/db/embl/format_embl.rb +3 -3
- data/lib/bio/db/embl/sptr.rb +9 -1444
- data/lib/bio/db/embl/swissprot.rb +12 -29
- data/lib/bio/db/embl/trembl.rb +13 -30
- data/lib/bio/db/embl/uniprot.rb +12 -29
- data/lib/bio/db/embl/uniprotkb.rb +1455 -0
- data/lib/bio/db/fasta.rb +17 -0
- data/lib/bio/db/fasta/defline.rb +1 -3
- data/lib/bio/db/fastq.rb +1 -1
- data/lib/bio/db/genbank/ddbj.rb +9 -5
- data/lib/bio/db/genbank/refseq.rb +11 -3
- data/lib/bio/db/gff.rb +3 -4
- data/lib/bio/db/go.rb +5 -6
- data/lib/bio/db/kegg/module.rb +4 -5
- data/lib/bio/db/kegg/pathway.rb +4 -5
- data/lib/bio/db/kegg/reaction.rb +1 -1
- data/lib/bio/db/nexus.rb +3 -2
- data/lib/bio/db/pdb/pdb.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
- data/lib/bio/db/transfac.rb +1 -1
- data/lib/bio/io/das.rb +40 -41
- data/lib/bio/io/fastacmd.rb +0 -16
- data/lib/bio/io/fetch.rb +111 -55
- data/lib/bio/io/flatfile/buffer.rb +4 -5
- data/lib/bio/io/hinv.rb +2 -3
- data/lib/bio/io/ncbirest.rb +43 -6
- data/lib/bio/io/pubmed.rb +76 -81
- data/lib/bio/io/togows.rb +33 -10
- data/lib/bio/map.rb +1 -1
- data/lib/bio/pathway.rb +1 -1
- data/lib/bio/sequence/compat.rb +1 -1
- data/lib/bio/sequence/na.rb +63 -12
- data/lib/bio/shell.rb +0 -2
- data/lib/bio/shell/core.rb +5 -6
- data/lib/bio/shell/interface.rb +3 -4
- data/lib/bio/shell/irb.rb +1 -2
- data/lib/bio/shell/plugin/entry.rb +2 -3
- data/lib/bio/shell/plugin/seq.rb +7 -6
- data/lib/bio/shell/setup.rb +1 -2
- data/lib/bio/tree.rb +2 -2
- data/lib/bio/util/contingency_table.rb +0 -2
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
- data/lib/bio/util/sirna.rb +76 -16
- data/lib/bio/version.rb +8 -9
- data/sample/benchmark_clustalw_report.rb +47 -0
- data/sample/biofetch.rb +248 -151
- data/setup.rb +6 -7
- data/test/data/clustalw/example1-seqnos.aln +58 -0
- data/test/network/bio/appl/blast/test_remote.rb +1 -15
- data/test/network/bio/appl/test_blast.rb +0 -12
- data/test/network/bio/io/test_pubmed.rb +49 -0
- data/test/network/bio/io/test_togows.rb +0 -1
- data/test/network/bio/test_command.rb +65 -2
- data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
- data/test/unit/bio/appl/blast/test_report.rb +110 -48
- data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
- data/test/unit/bio/appl/sim4/test_report.rb +46 -17
- data/test/unit/bio/appl/test_blast.rb +2 -2
- data/test/unit/bio/db/embl/test_embl.rb +0 -1
- data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
- data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
- data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
- data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
- data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
- data/test/unit/bio/db/test_fasta.rb +41 -1
- data/test/unit/bio/db/test_fastq.rb +14 -4
- data/test/unit/bio/db/test_gff.rb +2 -2
- data/test/unit/bio/db/test_phyloxml.rb +30 -30
- data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
- data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
- data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
- data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
- data/test/unit/bio/io/test_togows.rb +3 -2
- data/test/unit/bio/sequence/test_dblink.rb +1 -1
- data/test/unit/bio/sequence/test_na.rb +3 -1
- data/test/unit/bio/test_alignment.rb +1 -2
- data/test/unit/bio/test_command.rb +5 -4
- data/test/unit/bio/test_db.rb +4 -2
- data/test/unit/bio/test_pathway.rb +25 -10
- data/test/unit/bio/util/test_sirna.rb +22 -22
- metadata +656 -1430
- data/doc/KEGG_API.rd +0 -1843
- data/doc/KEGG_API.rd.ja +0 -1834
- data/extconf.rb +0 -2
- data/lib/bio/appl/blast/ddbj.rb +0 -131
- data/lib/bio/db/kegg/taxonomy.rb +0 -280
- data/lib/bio/io/dbget.rb +0 -194
- data/lib/bio/io/ddbjrest.rb +0 -344
- data/lib/bio/io/ddbjxml.rb +0 -458
- data/lib/bio/io/ebisoap.rb +0 -158
- data/lib/bio/io/ensembl.rb +0 -229
- data/lib/bio/io/higet.rb +0 -73
- data/lib/bio/io/keggapi.rb +0 -363
- data/lib/bio/io/ncbisoap.rb +0 -156
- data/lib/bio/io/soapwsdl.rb +0 -119
- data/lib/bio/shell/plugin/keggapi.rb +0 -181
- data/lib/bio/shell/plugin/soap.rb +0 -87
- data/sample/dbget +0 -37
- data/sample/demo_ddbjxml.rb +0 -212
- data/sample/demo_kegg_taxonomy.rb +0 -92
- data/sample/demo_keggapi.rb +0 -502
- data/sample/psortplot_html.rb +0 -214
- data/test/network/bio/io/test_ddbjrest.rb +0 -47
- data/test/network/bio/io/test_ensembl.rb +0 -230
- data/test/network/bio/io/test_soapwsdl.rb +0 -53
- data/test/unit/bio/io/test_ddbjxml.rb +0 -81
- data/test/unit/bio/io/test_ensembl.rb +0 -111
- data/test/unit/bio/io/test_soapwsdl.rb +0 -33
data/lib/bio/db/fasta.rb
CHANGED
@@ -68,6 +68,7 @@ module Bio
|
|
68
68
|
# A larger range of methods for dealing with Fasta definition lines can be found in FastaDefline, accessed through the FastaFormat#identifiers method.
|
69
69
|
#
|
70
70
|
# f.entry_id #=> "gi|398365175"
|
71
|
+
# f.first_name #=> "gi|398365175|ref|NP_009718.3|"
|
71
72
|
# f.definition #=> "gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]"
|
72
73
|
# f.identifiers #=> Bio::FastaDefline instance
|
73
74
|
# f.accession #=> "NP_009718"
|
@@ -90,6 +91,7 @@ module Bio
|
|
90
91
|
# f.entry #=> ">abc 123 456\nASDF"
|
91
92
|
#
|
92
93
|
# f.entry_id #=> "abc"
|
94
|
+
# f.first_name #=> "abc"
|
93
95
|
# f.definition #=> "abc 123 456"
|
94
96
|
# f.comment #=> nil
|
95
97
|
# f.accession #=> nil
|
@@ -282,6 +284,21 @@ module Bio
|
|
282
284
|
def locus
|
283
285
|
identifiers.locus
|
284
286
|
end
|
287
|
+
|
288
|
+
# Returns the first name (word) of the definition line - everything
|
289
|
+
# before the first whitespace.
|
290
|
+
#
|
291
|
+
# >abc def #=> 'abc'
|
292
|
+
# >gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c] #=> 'gi|398365175|ref|NP_009718.3|'
|
293
|
+
# >abc #=> 'abc'
|
294
|
+
def first_name
|
295
|
+
index = definition.index(/\s/)
|
296
|
+
if index.nil?
|
297
|
+
return @definition
|
298
|
+
else
|
299
|
+
return @definition[0...index]
|
300
|
+
end
|
301
|
+
end
|
285
302
|
|
286
303
|
end #class FastaFormat
|
287
304
|
|
data/lib/bio/db/fasta/defline.rb
CHANGED
@@ -6,7 +6,6 @@
|
|
6
6
|
# Toshiaki Katayama <k@bioruby.org>
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id: defline.rb,v 1.1.2.1 2008/06/20 13:22:32 ngoto Exp $
|
10
9
|
#
|
11
10
|
# == Description
|
12
11
|
#
|
@@ -292,7 +291,6 @@ module Bio
|
|
292
291
|
while token = ary.shift
|
293
292
|
if labels = self.class::NSIDs[token] then
|
294
293
|
di = [ token ]
|
295
|
-
idtype = token
|
296
294
|
labels.each do |x|
|
297
295
|
token = ary.shift
|
298
296
|
break unless token
|
@@ -391,7 +389,7 @@ module Bio
|
|
391
389
|
# Shows words used in the defline. Returns an Array.
|
392
390
|
def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray,
|
393
391
|
kwhash = self.class::KillWordsHash)
|
394
|
-
a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'
|
392
|
+
a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'\`\~\/\|\?\!\&\@\# \x00-\x1f\x7f]+/)
|
395
393
|
a.collect! do |x|
|
396
394
|
x.sub!(/\A[\$\*\-\+]+/, '')
|
397
395
|
x.sub!(/[\$\*\-\=]+\z/, '')
|
data/lib/bio/db/fastq.rb
CHANGED
data/lib/bio/db/genbank/ddbj.rb
CHANGED
@@ -5,16 +5,20 @@
|
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
7
|
|
8
|
-
|
8
|
+
warn "Bio::DDBJ is deprecated. Use Bio::GenBank."
|
9
9
|
|
10
10
|
module Bio
|
11
11
|
|
12
|
-
|
12
|
+
require 'bio/db/genbank/genbank' unless const_defined?(:GenBank)
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
# Bio::DDBJ is deprecated. Use Bio::GenBank.
|
15
|
+
class DDBJ < GenBank
|
16
16
|
|
17
|
-
#
|
17
|
+
# Bio::DDBJ is deprecated. Use Bio::GenBank.
|
18
|
+
def initialize(str)
|
19
|
+
warn "Bio::DDBJ is deprecated. Use Bio::GenBank."
|
20
|
+
super(str)
|
21
|
+
end
|
18
22
|
|
19
23
|
end # DDBJ
|
20
24
|
|
@@ -4,15 +4,23 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2000-2004 Toshiaki Katayama <k@bioruby.org>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id: refseq.rb,v 1.8 2007/04/05 23:35:40 trevor Exp $
|
8
7
|
#
|
9
8
|
|
10
|
-
|
9
|
+
warn "Bio::RefSeq is deprecated. Use Bio::GenBank."
|
11
10
|
|
12
11
|
module Bio
|
13
12
|
|
13
|
+
require 'bio/db/genbank/genbank' unless const_defined?(:GenBank)
|
14
|
+
|
15
|
+
# Bio::RefSeq is deprecated. Use Bio::GenBank.
|
14
16
|
class RefSeq < GenBank
|
15
|
-
|
17
|
+
|
18
|
+
# Bio::RefSeq is deprecated. Use Bio::GenBank.
|
19
|
+
def initialize(str)
|
20
|
+
warn "Bio::RefSeq is deprecated. Use Bio::GenBank."
|
21
|
+
super(str)
|
22
|
+
end
|
23
|
+
|
16
24
|
end
|
17
25
|
|
18
26
|
end # Bio
|
data/lib/bio/db/gff.rb
CHANGED
@@ -8,7 +8,6 @@
|
|
8
8
|
# 2008 Naohisa Goto <ng@bioruby.org>
|
9
9
|
# License:: The Ruby License
|
10
10
|
#
|
11
|
-
# $Id:$
|
12
11
|
#
|
13
12
|
require 'uri'
|
14
13
|
require 'strscan'
|
@@ -236,10 +235,10 @@ module Bio
|
|
236
235
|
CHAR2BACKSLASH.merge({ '"' => '"', "\\" => "\\" }).freeze
|
237
236
|
|
238
237
|
# prohibited characters in GFF2 columns
|
239
|
-
PROHIBITED_GFF2_COLUMNS = /[\t\r\n\x00-\x1f\x7f\xfe\xff]/
|
238
|
+
PROHIBITED_GFF2_COLUMNS = /[\t\r\n\x00-\x08\x0b\x0c\x0e-\x1f\x7f\xfe\xff]/
|
240
239
|
|
241
240
|
# prohibited characters in GFF2 attribute tags
|
242
|
-
PROHIBITED_GFF2_TAGS = /[\s\"\;\
|
241
|
+
PROHIBITED_GFF2_TAGS = /[\s\"\;\x00-\x08\x0e-\x1f\x7f\xfe\xff]/
|
243
242
|
|
244
243
|
private
|
245
244
|
# (private) escapes GFF2 free text string
|
@@ -1066,7 +1065,7 @@ module Bio
|
|
1066
1065
|
|
1067
1066
|
# parses given string and returns SequenceRegion class
|
1068
1067
|
def self.parse(str)
|
1069
|
-
|
1068
|
+
_, seqid, start, endpos =
|
1070
1069
|
str.chomp.split(/\s+/, 4).collect { |x| unescape(x) }
|
1071
1070
|
self.new(seqid, start, endpos)
|
1072
1071
|
end
|
data/lib/bio/db/go.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
# == Gene Ontology
|
11
10
|
#
|
@@ -104,12 +103,12 @@ class GO
|
|
104
103
|
depth = $1.length.to_i
|
105
104
|
rel = $2
|
106
105
|
term = $3
|
107
|
-
goid1 =
|
106
|
+
goid1 = $4
|
108
107
|
en = $5
|
109
108
|
goids = parse_goids(line) # GO:ID[ ; GO:ID...]
|
110
|
-
|
109
|
+
parse_synonyms(line) # synonym:Term[ ; synonym:Term...]
|
111
110
|
stack[depth] = goids.first
|
112
|
-
@id2term[
|
111
|
+
@id2term[goid1] = term
|
113
112
|
|
114
113
|
next if depth == 0
|
115
114
|
|
@@ -128,8 +127,8 @@ class GO
|
|
128
127
|
rel1 = $1
|
129
128
|
term1 = $2
|
130
129
|
goid1 = $3
|
131
|
-
|
132
|
-
|
130
|
+
parse_goids(line)
|
131
|
+
parse_synonyms(line)
|
133
132
|
|
134
133
|
@id2term[goid1] = term1
|
135
134
|
goids.each {|goid|
|
data/lib/bio/db/kegg/module.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Copyright:: Copyright (C) 2010 Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
|
11
10
|
require 'bio/db'
|
@@ -81,7 +80,7 @@ class MODULE < KEGGDB
|
|
81
80
|
# ---
|
82
81
|
# *Returns*:: Hash of pathway ID and its definition
|
83
82
|
def pathways_as_hash
|
84
|
-
unless @pathways_as_hash
|
83
|
+
unless (defined? @pathways_as_hash) && @pathways_as_hash
|
85
84
|
@pathways_as_hash = strings_as_hash(pathways_as_strings)
|
86
85
|
end
|
87
86
|
@pathways_as_hash
|
@@ -100,7 +99,7 @@ class MODULE < KEGGDB
|
|
100
99
|
# ---
|
101
100
|
# *Returns*:: Hash of orthology ID and its definition
|
102
101
|
def orthologs_as_hash
|
103
|
-
unless @orthologs_as_hash
|
102
|
+
unless (defined? @orthologs_as_hash) && @orthologs_as_hash
|
104
103
|
@orthologs_as_hash = strings_as_hash(orthologs_as_strings)
|
105
104
|
end
|
106
105
|
@orthologs_as_hash
|
@@ -126,7 +125,7 @@ class MODULE < KEGGDB
|
|
126
125
|
# ---
|
127
126
|
# *Returns*:: Hash of reaction ID and its definition
|
128
127
|
def reactions_as_hash
|
129
|
-
unless @reactions_as_hash
|
128
|
+
unless (defined? @reactions_as_hash) && @reactions_as_hash
|
130
129
|
@reactions_as_hash = strings_as_hash(reactions_as_strings)
|
131
130
|
end
|
132
131
|
@reactions_as_hash
|
@@ -145,7 +144,7 @@ class MODULE < KEGGDB
|
|
145
144
|
# ---
|
146
145
|
# *Returns*:: Hash of compound ID and its definition
|
147
146
|
def compounds_as_hash
|
148
|
-
unless @compounds_as_hash
|
147
|
+
unless (defined? @compounds_as_hash) && @compounds_as_hash
|
149
148
|
@compounds_as_hash = strings_as_hash(compounds_as_strings)
|
150
149
|
end
|
151
150
|
@compounds_as_hash
|
data/lib/bio/db/kegg/pathway.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Copyright:: Copyright (C) 2010 Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
|
11
10
|
require 'bio/db'
|
@@ -127,7 +126,7 @@ class PATHWAY < KEGGDB
|
|
127
126
|
# ---
|
128
127
|
# *Returns*:: Hash of disease ID and its definition
|
129
128
|
def diseases_as_hash
|
130
|
-
unless @diseases_as_hash
|
129
|
+
unless (defined? @diseases_as_hash) && @diseases_as_hash
|
131
130
|
@diseases_as_hash = strings_as_hash(diseases_as_strings)
|
132
131
|
end
|
133
132
|
@diseases_as_hash
|
@@ -166,7 +165,7 @@ class PATHWAY < KEGGDB
|
|
166
165
|
# ---
|
167
166
|
# *Returns*:: Hash of gene ID and its definition
|
168
167
|
def genes_as_hash
|
169
|
-
unless @genes_as_hash
|
168
|
+
unless (defined? @genes_as_hash) && @genes_as_hash
|
170
169
|
@genes_as_hash = strings_as_hash(genes_as_strings)
|
171
170
|
end
|
172
171
|
@genes_as_hash
|
@@ -192,7 +191,7 @@ class PATHWAY < KEGGDB
|
|
192
191
|
# ---
|
193
192
|
# *Returns*:: Hash of reaction ID and its definition
|
194
193
|
def reactions_as_hash
|
195
|
-
unless @reactions_as_hash
|
194
|
+
unless (defined? @reactions_as_hash) && @reactions_as_hash
|
196
195
|
@reactions_as_hash = strings_as_hash(reactions_as_strings)
|
197
196
|
end
|
198
197
|
@reactions_as_hash
|
@@ -210,7 +209,7 @@ class PATHWAY < KEGGDB
|
|
210
209
|
# ---
|
211
210
|
# *Returns*:: Hash of compound ID and its definition
|
212
211
|
def compounds_as_hash
|
213
|
-
unless @compounds_as_hash
|
212
|
+
unless (defined? @compounds_as_hash) && @compounds_as_hash
|
214
213
|
@compounds_as_hash = strings_as_hash(compounds_as_strings)
|
215
214
|
end
|
216
215
|
@compounds_as_hash
|
data/lib/bio/db/kegg/reaction.rb
CHANGED
@@ -86,7 +86,7 @@ class REACTION < KEGGDB
|
|
86
86
|
unless defined? @rpairs_as_hash
|
87
87
|
rps = {}
|
88
88
|
rpairs_as_strings.each do |line|
|
89
|
-
|
89
|
+
_, entry_id, name, rptype = line.split(/\s+/)
|
90
90
|
rps[entry_id] = [ name, rptype ]
|
91
91
|
end
|
92
92
|
@rpairs_as_hash = rps
|
data/lib/bio/db/nexus.rb
CHANGED
@@ -299,7 +299,7 @@ module Bio
|
|
299
299
|
ary = str.split(/[\s+=]/)
|
300
300
|
ary.collect! { |x| x.strip!; x.empty? ? nil : x }
|
301
301
|
ary.compact!
|
302
|
-
in_comment = false
|
302
|
+
#in_comment = false
|
303
303
|
comment_level = 0
|
304
304
|
|
305
305
|
# Main loop
|
@@ -807,6 +807,7 @@ module Bio
|
|
807
807
|
# *Returns*:: String
|
808
808
|
def to_nexus
|
809
809
|
str = "generic block \"" + get_name + "\" [do not know how to write in nexus format]"
|
810
|
+
str
|
810
811
|
end
|
811
812
|
|
812
813
|
# Adds a token to this.
|
@@ -1716,7 +1717,7 @@ module Bio
|
|
1716
1717
|
return "empty"
|
1717
1718
|
end
|
1718
1719
|
str = String.new
|
1719
|
-
row_array = to_nexus_row_array(
|
1720
|
+
row_array = to_nexus_row_array( " ", false )
|
1720
1721
|
row_array.each do | row |
|
1721
1722
|
str << row << END_OF_LINE
|
1722
1723
|
end
|
data/lib/bio/db/pdb/pdb.rb
CHANGED
@@ -608,8 +608,8 @@ module Bio
|
|
608
608
|
|
609
609
|
# SEQRES record class
|
610
610
|
SEQRES =
|
611
|
-
def_rec(#[
|
612
|
-
[
|
611
|
+
def_rec(#[ 8, 10, Pdb_Integer, :serNum ],
|
612
|
+
[ 8, 10, Pdb_Continuation, nil ], # PDB v3.2 (2008)
|
613
613
|
[ 12, 12, Pdb_Character, :chainID ],
|
614
614
|
[ 14, 17, Pdb_Integer, :numRes ],
|
615
615
|
[ 20, 22, Pdb_Residue_name, :resName ],
|
@@ -5,7 +5,6 @@
|
|
5
5
|
# Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
# == Description
|
11
10
|
#
|
@@ -91,17 +90,17 @@ module PhyloXML
|
|
91
90
|
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
92
91
|
def to_xml
|
93
92
|
taxonomy = LibXML::XML::Node.new('taxonomy')
|
94
|
-
taxonomy["type"] = @type if @type
|
95
|
-
taxonomy["id_source"] = @id_source if @id_source
|
96
|
-
|
97
|
-
PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', @taxonomy_id],
|
98
|
-
[:pattern, 'code', @code, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
|
99
|
-
[:simple, 'scientific_name', @scientific_name],
|
100
|
-
[:simple, 'authority', @authority],
|
101
|
-
[:simplearr, 'common_name', @common_names],
|
102
|
-
[:simplearr, 'synonym', @synonyms],
|
103
|
-
[:simple, 'rank', @rank],
|
104
|
-
[:complex, 'uri'
|
93
|
+
taxonomy["type"] = @type if (defined? @type) && @type
|
94
|
+
taxonomy["id_source"] = @id_source if (defined? @id_source) && @id_source
|
95
|
+
|
96
|
+
PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', (defined? @taxonomy_id) ? @taxonomy_id : nil],
|
97
|
+
[:pattern, 'code', (defined? @code) ? @code : nil, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
|
98
|
+
[:simple, 'scientific_name', (defined? @scientific_name) ? @scientific_name : nil],
|
99
|
+
[:simple, 'authority', (defined? @authority) ? @authority : nil],
|
100
|
+
[:simplearr, 'common_name', (defined? @common_names) ? @common_names : nil],
|
101
|
+
[:simplearr, 'synonym', (defined? @synonyms) ? @synonyms : nil],
|
102
|
+
[:simple, 'rank', (defined? @rank) ? @rank : nil],
|
103
|
+
[:complex, 'uri',(defined? @uri) ? @uri : nil]])
|
105
104
|
#@todo anything else
|
106
105
|
|
107
106
|
|
@@ -161,7 +160,7 @@ module PhyloXML
|
|
161
160
|
@other = []
|
162
161
|
end
|
163
162
|
|
164
|
-
|
163
|
+
end
|
165
164
|
|
166
165
|
|
167
166
|
# == Description
|
@@ -287,7 +286,7 @@ module PhyloXML
|
|
287
286
|
def to_xml(branch_length, write_branch_length_as_subelement)
|
288
287
|
clade = LibXML::XML::Node.new('clade')
|
289
288
|
|
290
|
-
PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', @name]])
|
289
|
+
PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', (defined? @name) ? @name : nil]])
|
291
290
|
|
292
291
|
if branch_length != nil
|
293
292
|
if write_branch_length_as_subelement
|
@@ -301,15 +300,15 @@ module PhyloXML
|
|
301
300
|
PhyloXML::Writer.generate_xml(clade, self, [
|
302
301
|
[:attr, "id_source"],
|
303
302
|
[:objarr, 'confidence', 'confidences'],
|
304
|
-
[:simple, 'width', @width],
|
305
|
-
[:complex, 'branch_color', @branch_color],
|
306
|
-
[:simple, 'node_id', @node_id],
|
303
|
+
[:simple, 'width', (defined? @width) ? @width : nil],
|
304
|
+
[:complex, 'branch_color', (defined? @branch_color) ? @branch_color : nil],
|
305
|
+
[:simple, 'node_id', (defined? @node_id) ? @node_id : nil],
|
307
306
|
[:objarr, 'taxonomy', 'taxonomies'],
|
308
307
|
[:objarr, 'sequence', 'sequences'],
|
309
|
-
[:complex, 'events', @events],
|
310
|
-
[:complex, 'binary_characters', @binary_characters],
|
308
|
+
[:complex, 'events', (defined? @events) ? @events : nil],
|
309
|
+
[:complex, 'binary_characters', (defined? @binary_characters) ? @binary_characters : nil],
|
311
310
|
[:objarr, 'distribution', 'distributions'],
|
312
|
-
[:complex, 'date', @date],
|
311
|
+
[:complex, 'date', (defined? @date) ? @date : nil],
|
313
312
|
[:objarr, 'reference', 'references'],
|
314
313
|
[:objarr, 'propery', 'properties']])
|
315
314
|
|
@@ -322,7 +321,7 @@ module PhyloXML
|
|
322
321
|
# Events at the root node of a clade (e.g. one gene duplication).
|
323
322
|
class Events
|
324
323
|
#value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
|
325
|
-
|
324
|
+
attr_reader :type
|
326
325
|
|
327
326
|
# Integer
|
328
327
|
attr_reader :duplications, :speciations, :losses
|
@@ -330,26 +329,33 @@ module PhyloXML
|
|
330
329
|
# Confidence object
|
331
330
|
attr_reader :confidence
|
332
331
|
|
333
|
-
|
334
|
-
|
335
|
-
|
332
|
+
#---
|
333
|
+
#def confidence=(type, value)
|
334
|
+
# @confidence = Confidence.new(type, value)
|
335
|
+
#end
|
336
|
+
#+++
|
336
337
|
|
338
|
+
# Confidence object
|
337
339
|
def confidence=(conf)
|
338
340
|
@confidence = conf
|
339
341
|
end
|
340
342
|
|
343
|
+
# Integer
|
341
344
|
def duplications=(str)
|
342
345
|
@duplications = str.to_i
|
343
346
|
end
|
344
347
|
|
348
|
+
# Integer
|
345
349
|
def losses=(str)
|
346
350
|
@losses = str.to_i
|
347
351
|
end
|
348
352
|
|
353
|
+
# Integer
|
349
354
|
def speciations=(str)
|
350
355
|
@speciations=str.to_i
|
351
356
|
end
|
352
357
|
|
358
|
+
#value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
|
353
359
|
def type=(str)
|
354
360
|
@type = str
|
355
361
|
#@todo add unit test for this
|
@@ -363,11 +369,11 @@ module PhyloXML
|
|
363
369
|
#@todo add unit test
|
364
370
|
events = LibXML::XML::Node.new('events')
|
365
371
|
PhyloXML::Writer.generate_xml(events, self, [
|
366
|
-
[:simple, 'type', @type],
|
367
|
-
[:simple, 'duplications', @duplications],
|
368
|
-
[:simple, 'speciations', @speciations],
|
369
|
-
[:simple, 'losses', @losses],
|
370
|
-
[:complex, 'confidence', @confidence]])
|
372
|
+
[:simple, 'type', (defined? @type) ? @type : nil],
|
373
|
+
[:simple, 'duplications', (defined? @duplications) ? @duplications : nil],
|
374
|
+
[:simple, 'speciations', (defined? @speciations) ? @speciations : nil],
|
375
|
+
[:simple, 'losses', (defined? @losses) ? @losses : nil],
|
376
|
+
[:complex, 'confidence', (defined? @confidence) ? @confidence : nil]])
|
371
377
|
return events
|
372
378
|
end
|
373
379
|
|
@@ -437,13 +443,13 @@ module PhyloXML
|
|
437
443
|
# 'map datum'), for example Google's KML uses 'WGS84'.
|
438
444
|
class Point
|
439
445
|
# Float. Latitude
|
440
|
-
|
446
|
+
attr_reader :lat
|
441
447
|
|
442
448
|
# Float. Longitute
|
443
|
-
|
449
|
+
attr_reader :long
|
444
450
|
|
445
451
|
# Float. Altitude
|
446
|
-
|
452
|
+
attr_reader :alt
|
447
453
|
|
448
454
|
# String. Altitude unit.
|
449
455
|
attr_accessor :alt_unit
|
@@ -451,14 +457,17 @@ module PhyloXML
|
|
451
457
|
# Geodedic datum / map datum
|
452
458
|
attr_accessor :geodetic_datum
|
453
459
|
|
460
|
+
# Float. Latitude
|
454
461
|
def lat=(str)
|
455
462
|
@lat = str.to_f unless str.nil?
|
456
463
|
end
|
457
464
|
|
465
|
+
# Float. Longitute
|
458
466
|
def long=(str)
|
459
467
|
@long = str.to_f unless str.nil?
|
460
468
|
end
|
461
469
|
|
470
|
+
# Float. Altitude
|
462
471
|
def alt=(str)
|
463
472
|
@alt = str.to_f unless str.nil?
|
464
473
|
end
|
@@ -581,7 +590,7 @@ module PhyloXML
|
|
581
590
|
def to_xml
|
582
591
|
|
583
592
|
seq = LibXML::XML::Node.new('sequence')
|
584
|
-
if @type
|
593
|
+
if (defined? @type) && @type
|
585
594
|
if ["dna", "rna", "protein"].include?(@type)
|
586
595
|
seq["type"] = @type
|
587
596
|
else
|
@@ -592,22 +601,22 @@ module PhyloXML
|
|
592
601
|
PhyloXML::Writer.generate_xml(seq, self, [
|
593
602
|
[:attr, 'id_source'],
|
594
603
|
[:attr, 'id_ref'],
|
595
|
-
[:pattern, 'symbol', @symbol, Regexp.new("^\\S{1,10}$")],
|
596
|
-
[:complex, 'accession', @accession],
|
597
|
-
[:simple, 'name', @name],
|
598
|
-
[:simple, 'location', @location]])
|
604
|
+
[:pattern, 'symbol', (defined? @symbol) ? @symbol : nil, Regexp.new("^\\S{1,10}$")],
|
605
|
+
[:complex, 'accession', (defined? @accession) ? @accession : nil],
|
606
|
+
[:simple, 'name', (defined? @name) ? @name : nil],
|
607
|
+
[:simple, 'location', (defined? @location) ? @location : nil]])
|
599
608
|
|
600
|
-
if @mol_seq
|
609
|
+
if (defined? @mol_seq) && @mol_seq
|
601
610
|
molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
|
602
|
-
molseq["is_aligned"] = @is_aligned.to_s if @is_aligned != nil
|
611
|
+
molseq["is_aligned"] = @is_aligned.to_s if (defined? @is_aligned) && @is_aligned != nil
|
603
612
|
seq << molseq
|
604
613
|
end
|
605
614
|
|
606
615
|
PhyloXML::Writer.generate_xml(seq, self, [
|
607
616
|
#[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
|
608
|
-
[:complex, 'uri', @uri],
|
617
|
+
[:complex, 'uri', (defined? @uri) ? @uri : nil],
|
609
618
|
[:objarr, 'annotation', 'annotations'],
|
610
|
-
[:complex, 'domain_architecture', @domain_architecture]])
|
619
|
+
[:complex, 'domain_architecture', (defined? @domain_architecture) ? @domain_architecture : nil]])
|
611
620
|
#@todo test domain_architecture
|
612
621
|
#any
|
613
622
|
return seq
|
@@ -626,7 +635,7 @@ module PhyloXML
|
|
626
635
|
# seq.primary_accession = @accession.value could be this
|
627
636
|
seq.definition = @name
|
628
637
|
#seq.comments = @name //this one?
|
629
|
-
if @uri
|
638
|
+
if (defined? @uri) && @uri
|
630
639
|
h = {'url' => @uri.uri,
|
631
640
|
'title' => @uri.desc }
|
632
641
|
ref = Bio::Reference.new(h)
|
@@ -720,11 +729,11 @@ module PhyloXML
|
|
720
729
|
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
721
730
|
def to_xml
|
722
731
|
annot = LibXML::XML::Node.new('annotation')
|
723
|
-
annot["ref"] = @ref if @ref
|
724
|
-
PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', @desc],
|
725
|
-
[:complex, 'confidence', @confidence],
|
732
|
+
annot["ref"] = @ref if (defined? @ref) && @ref
|
733
|
+
PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', (defined? @desc) ? @desc : nil],
|
734
|
+
[:complex, 'confidence', (defined? @confidence) ? @confidence : nil],
|
726
735
|
[:objarr, 'property', 'properties'],
|
727
|
-
[:complex, 'uri', @uri]])
|
736
|
+
[:complex, 'uri', (defined? @uri) ? @uri : nil]])
|
728
737
|
return annot
|
729
738
|
end
|
730
739
|
end
|
@@ -825,10 +834,10 @@ module PhyloXML
|
|
825
834
|
date = LibXML::XML::Node.new('date')
|
826
835
|
PhyloXML::Writer.generate_xml(date, self, [
|
827
836
|
[:attr, 'unit'],
|
828
|
-
[:simple, 'desc', @desc],
|
829
|
-
[:simple, 'value', @value],
|
830
|
-
[:simple, 'minimum', @minimum],
|
831
|
-
[:simple, 'maximum', @maximum]])
|
837
|
+
[:simple, 'desc', (defined? @desc) ? @desc : nil],
|
838
|
+
[:simple, 'value', (defined? @value) ? @value : nil],
|
839
|
+
[:simple, 'minimum', (defined? @minimum) ? @minimum : nil],
|
840
|
+
[:simple, 'maximum', (defined? @maximum) ? @maximum : nil]])
|
832
841
|
return date
|
833
842
|
end
|
834
843
|
|
@@ -839,11 +848,12 @@ module PhyloXML
|
|
839
848
|
# 'length' is the total length of the protein
|
840
849
|
class DomainArchitecture
|
841
850
|
# Integer. Total length of the protein
|
842
|
-
|
851
|
+
attr_reader :length
|
843
852
|
|
844
853
|
# Array of ProteinDomain objects.
|
845
854
|
attr_reader :domains
|
846
855
|
|
856
|
+
# Integer. Total length of the protein
|
847
857
|
def length=(str)
|
848
858
|
@length = str.to_i
|
849
859
|
end
|
@@ -868,7 +878,7 @@ module PhyloXML
|
|
868
878
|
# name/unique identifier is described via the 'id' attribute.
|
869
879
|
class ProteinDomain
|
870
880
|
#Float, for example to store E-values 4.7E-14
|
871
|
-
|
881
|
+
attr_reader :confidence
|
872
882
|
|
873
883
|
# String
|
874
884
|
attr_accessor :id, :value
|
@@ -879,14 +889,17 @@ module PhyloXML
|
|
879
889
|
# Integer. End of the domain.
|
880
890
|
attr_reader :to
|
881
891
|
|
892
|
+
# Integer. Beginning of the domain.
|
882
893
|
def from=(str)
|
883
894
|
@from = str.to_i
|
884
895
|
end
|
885
896
|
|
897
|
+
# Integer. End of the domain.
|
886
898
|
def to=(str)
|
887
899
|
@to = str.to_i
|
888
900
|
end
|
889
901
|
|
902
|
+
#Float, for example to store E-values 4.7E-14
|
890
903
|
def confidence=(str)
|
891
904
|
@confidence = str.to_f
|
892
905
|
end
|
@@ -901,7 +914,7 @@ module PhyloXML
|
|
901
914
|
xml_node = LibXML::XML::Node.new('domain', @value)
|
902
915
|
xml_node["from"] = @from.to_s
|
903
916
|
xml_node["to"] = @to.to_s
|
904
|
-
xml_node["id"] = @id if @id
|
917
|
+
xml_node["id"] = @id if (defined? @id) && @id
|
905
918
|
xml_node["confidence"] = @confidence.to_s
|
906
919
|
|
907
920
|
return xml_node
|
@@ -987,7 +1000,7 @@ module PhyloXML
|
|
987
1000
|
ref = LibXML::XML::Node.new('reference')
|
988
1001
|
Writer.generate_xml(ref, self, [
|
989
1002
|
[:attr, 'doi'],
|
990
|
-
[:simple, 'desc', @desc]])
|
1003
|
+
[:simple, 'desc', (defined? @desc) ? @desc : nil]])
|
991
1004
|
return ref
|
992
1005
|
end
|
993
1006
|
|
@@ -999,7 +1012,7 @@ module PhyloXML
|
|
999
1012
|
# For example it could be used to describe multiple parents of a clade.
|
1000
1013
|
class CladeRelation
|
1001
1014
|
# Float
|
1002
|
-
|
1015
|
+
attr_reader :distance
|
1003
1016
|
# String. Id of the referenced parents of a clade.
|
1004
1017
|
attr_accessor :id_ref_0, :id_ref_1
|
1005
1018
|
# String
|
@@ -1007,6 +1020,7 @@ module PhyloXML
|
|
1007
1020
|
# Confidence object
|
1008
1021
|
attr_accessor :confidence
|
1009
1022
|
|
1023
|
+
# Float
|
1010
1024
|
def distance=(str)
|
1011
1025
|
@distance = str.to_f
|
1012
1026
|
end
|
@@ -1022,7 +1036,7 @@ module PhyloXML
|
|
1022
1036
|
[:attr, 'id_ref_1'],
|
1023
1037
|
[:attr, 'distance'],
|
1024
1038
|
[:attr, 'type'],
|
1025
|
-
[:complex, 'confidence', @confidnece]])
|
1039
|
+
[:complex, 'confidence', (defined? @confidnece) ? @confidnece : nil]])
|
1026
1040
|
|
1027
1041
|
return cr
|
1028
1042
|
end
|
@@ -1107,7 +1121,13 @@ module PhyloXML
|
|
1107
1121
|
# attribute 'type' is 'orthology').
|
1108
1122
|
class SequenceRelation
|
1109
1123
|
# String
|
1110
|
-
attr_accessor :id_ref_0, :id_ref_1
|
1124
|
+
attr_accessor :id_ref_0, :id_ref_1
|
1125
|
+
|
1126
|
+
# String. Allowed values: "orthology", "one_to_one_orthology",
|
1127
|
+
# "super_orthology", "paralogy", "ultra_paralogy", "xenology",
|
1128
|
+
# "unknown", "other"
|
1129
|
+
attr_reader :type
|
1130
|
+
|
1111
1131
|
# Float
|
1112
1132
|
attr_reader :distance
|
1113
1133
|
|
@@ -1117,6 +1137,9 @@ module PhyloXML
|
|
1117
1137
|
@distance = str.to_f if str != nil
|
1118
1138
|
end
|
1119
1139
|
|
1140
|
+
# String. Allowed values: "orthology", "one_to_one_orthology",
|
1141
|
+
# "super_orthology", "paralogy", "ultra_paralogy", "xenology",
|
1142
|
+
# "unknown", "other"
|
1120
1143
|
def type=(str)
|
1121
1144
|
#@todo do warning instead?
|
1122
1145
|
#@todo do validation at actually writing xml
|
@@ -1137,7 +1160,7 @@ module PhyloXML
|
|
1137
1160
|
sr = LibXML::XML::Node.new('sequence_relation')
|
1138
1161
|
sr['id_ref_0'] = @id_ref_0
|
1139
1162
|
sr['id_ref_1'] = @id_ref_1
|
1140
|
-
sr['distance'] = @distance.to_s if @distance
|
1163
|
+
sr['distance'] = @distance.to_s if (defined? @distance) && @distance
|
1141
1164
|
sr['type'] = @type
|
1142
1165
|
return sr
|
1143
1166
|
end
|
@@ -1145,7 +1168,7 @@ module PhyloXML
|
|
1145
1168
|
|
1146
1169
|
end
|
1147
1170
|
|
1148
|
-
|
1171
|
+
class Other
|
1149
1172
|
attr_accessor :element_name, :attributes, :children, :value
|
1150
1173
|
|
1151
1174
|
def initialize
|