bio 1.4.3.0001 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +39 -33
- data/BSDL +22 -0
- data/COPYING +2 -2
- data/COPYING.ja +36 -36
- data/ChangeLog +2404 -1025
- data/KNOWN_ISSUES.rdoc +15 -55
- data/README.rdoc +17 -23
- data/RELEASE_NOTES.rdoc +246 -183
- data/Rakefile +3 -2
- data/bin/br_biofetch.rb +29 -5
- data/bioruby.gemspec +15 -32
- data/bioruby.gemspec.erb +10 -20
- data/doc/ChangeLog-1.4.3 +1478 -0
- data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
- data/doc/Tutorial.rd +0 -6
- data/doc/Tutorial.rd.html +7 -12
- data/doc/Tutorial.rd.ja +960 -1064
- data/doc/Tutorial.rd.ja.html +977 -1067
- data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-rbx +13 -0
- data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
- data/lib/bio.rb +10 -43
- data/lib/bio/alignment.rb +8 -14
- data/lib/bio/appl/blast.rb +1 -2
- data/lib/bio/appl/blast/format0.rb +18 -7
- data/lib/bio/appl/blast/remote.rb +0 -9
- data/lib/bio/appl/blast/report.rb +1 -1
- data/lib/bio/appl/clustalw/report.rb +3 -1
- data/lib/bio/appl/genscan/report.rb +1 -2
- data/lib/bio/appl/iprscan/report.rb +1 -2
- data/lib/bio/appl/meme/mast.rb +4 -4
- data/lib/bio/appl/meme/mast/report.rb +1 -1
- data/lib/bio/appl/paml/codeml.rb +2 -2
- data/lib/bio/appl/paml/codeml/report.rb +1 -0
- data/lib/bio/appl/paml/common.rb +1 -1
- data/lib/bio/appl/sosui/report.rb +1 -2
- data/lib/bio/command.rb +62 -2
- data/lib/bio/data/aa.rb +13 -31
- data/lib/bio/data/codontable.rb +1 -2
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
- data/lib/bio/db/biosql/sequence.rb +1 -1
- data/lib/bio/db/embl/common.rb +1 -1
- data/lib/bio/db/embl/embl.rb +5 -4
- data/lib/bio/db/embl/format_embl.rb +3 -3
- data/lib/bio/db/embl/sptr.rb +9 -1444
- data/lib/bio/db/embl/swissprot.rb +12 -29
- data/lib/bio/db/embl/trembl.rb +13 -30
- data/lib/bio/db/embl/uniprot.rb +12 -29
- data/lib/bio/db/embl/uniprotkb.rb +1455 -0
- data/lib/bio/db/fasta.rb +17 -0
- data/lib/bio/db/fasta/defline.rb +1 -3
- data/lib/bio/db/fastq.rb +1 -1
- data/lib/bio/db/genbank/ddbj.rb +9 -5
- data/lib/bio/db/genbank/refseq.rb +11 -3
- data/lib/bio/db/gff.rb +3 -4
- data/lib/bio/db/go.rb +5 -6
- data/lib/bio/db/kegg/module.rb +4 -5
- data/lib/bio/db/kegg/pathway.rb +4 -5
- data/lib/bio/db/kegg/reaction.rb +1 -1
- data/lib/bio/db/nexus.rb +3 -2
- data/lib/bio/db/pdb/pdb.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
- data/lib/bio/db/transfac.rb +1 -1
- data/lib/bio/io/das.rb +40 -41
- data/lib/bio/io/fastacmd.rb +0 -16
- data/lib/bio/io/fetch.rb +111 -55
- data/lib/bio/io/flatfile/buffer.rb +4 -5
- data/lib/bio/io/hinv.rb +2 -3
- data/lib/bio/io/ncbirest.rb +43 -6
- data/lib/bio/io/pubmed.rb +76 -81
- data/lib/bio/io/togows.rb +33 -10
- data/lib/bio/map.rb +1 -1
- data/lib/bio/pathway.rb +1 -1
- data/lib/bio/sequence/compat.rb +1 -1
- data/lib/bio/sequence/na.rb +63 -12
- data/lib/bio/shell.rb +0 -2
- data/lib/bio/shell/core.rb +5 -6
- data/lib/bio/shell/interface.rb +3 -4
- data/lib/bio/shell/irb.rb +1 -2
- data/lib/bio/shell/plugin/entry.rb +2 -3
- data/lib/bio/shell/plugin/seq.rb +7 -6
- data/lib/bio/shell/setup.rb +1 -2
- data/lib/bio/tree.rb +2 -2
- data/lib/bio/util/contingency_table.rb +0 -2
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
- data/lib/bio/util/sirna.rb +76 -16
- data/lib/bio/version.rb +8 -9
- data/sample/benchmark_clustalw_report.rb +47 -0
- data/sample/biofetch.rb +248 -151
- data/setup.rb +6 -7
- data/test/data/clustalw/example1-seqnos.aln +58 -0
- data/test/network/bio/appl/blast/test_remote.rb +1 -15
- data/test/network/bio/appl/test_blast.rb +0 -12
- data/test/network/bio/io/test_pubmed.rb +49 -0
- data/test/network/bio/io/test_togows.rb +0 -1
- data/test/network/bio/test_command.rb +65 -2
- data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
- data/test/unit/bio/appl/blast/test_report.rb +110 -48
- data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
- data/test/unit/bio/appl/sim4/test_report.rb +46 -17
- data/test/unit/bio/appl/test_blast.rb +2 -2
- data/test/unit/bio/db/embl/test_embl.rb +0 -1
- data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
- data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
- data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
- data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
- data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
- data/test/unit/bio/db/test_fasta.rb +41 -1
- data/test/unit/bio/db/test_fastq.rb +14 -4
- data/test/unit/bio/db/test_gff.rb +2 -2
- data/test/unit/bio/db/test_phyloxml.rb +30 -30
- data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
- data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
- data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
- data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
- data/test/unit/bio/io/test_togows.rb +3 -2
- data/test/unit/bio/sequence/test_dblink.rb +1 -1
- data/test/unit/bio/sequence/test_na.rb +3 -1
- data/test/unit/bio/test_alignment.rb +1 -2
- data/test/unit/bio/test_command.rb +5 -4
- data/test/unit/bio/test_db.rb +4 -2
- data/test/unit/bio/test_pathway.rb +25 -10
- data/test/unit/bio/util/test_sirna.rb +22 -22
- metadata +656 -1430
- data/doc/KEGG_API.rd +0 -1843
- data/doc/KEGG_API.rd.ja +0 -1834
- data/extconf.rb +0 -2
- data/lib/bio/appl/blast/ddbj.rb +0 -131
- data/lib/bio/db/kegg/taxonomy.rb +0 -280
- data/lib/bio/io/dbget.rb +0 -194
- data/lib/bio/io/ddbjrest.rb +0 -344
- data/lib/bio/io/ddbjxml.rb +0 -458
- data/lib/bio/io/ebisoap.rb +0 -158
- data/lib/bio/io/ensembl.rb +0 -229
- data/lib/bio/io/higet.rb +0 -73
- data/lib/bio/io/keggapi.rb +0 -363
- data/lib/bio/io/ncbisoap.rb +0 -156
- data/lib/bio/io/soapwsdl.rb +0 -119
- data/lib/bio/shell/plugin/keggapi.rb +0 -181
- data/lib/bio/shell/plugin/soap.rb +0 -87
- data/sample/dbget +0 -37
- data/sample/demo_ddbjxml.rb +0 -212
- data/sample/demo_kegg_taxonomy.rb +0 -92
- data/sample/demo_keggapi.rb +0 -502
- data/sample/psortplot_html.rb +0 -214
- data/test/network/bio/io/test_ddbjrest.rb +0 -47
- data/test/network/bio/io/test_ensembl.rb +0 -230
- data/test/network/bio/io/test_soapwsdl.rb +0 -53
- data/test/unit/bio/io/test_ddbjxml.rb +0 -81
- data/test/unit/bio/io/test_ensembl.rb +0 -111
- data/test/unit/bio/io/test_soapwsdl.rb +0 -33
data/lib/bio/db/fasta.rb
CHANGED
@@ -68,6 +68,7 @@ module Bio
|
|
68
68
|
# A larger range of methods for dealing with Fasta definition lines can be found in FastaDefline, accessed through the FastaFormat#identifiers method.
|
69
69
|
#
|
70
70
|
# f.entry_id #=> "gi|398365175"
|
71
|
+
# f.first_name #=> "gi|398365175|ref|NP_009718.3|"
|
71
72
|
# f.definition #=> "gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]"
|
72
73
|
# f.identifiers #=> Bio::FastaDefline instance
|
73
74
|
# f.accession #=> "NP_009718"
|
@@ -90,6 +91,7 @@ module Bio
|
|
90
91
|
# f.entry #=> ">abc 123 456\nASDF"
|
91
92
|
#
|
92
93
|
# f.entry_id #=> "abc"
|
94
|
+
# f.first_name #=> "abc"
|
93
95
|
# f.definition #=> "abc 123 456"
|
94
96
|
# f.comment #=> nil
|
95
97
|
# f.accession #=> nil
|
@@ -282,6 +284,21 @@ module Bio
|
|
282
284
|
def locus
|
283
285
|
identifiers.locus
|
284
286
|
end
|
287
|
+
|
288
|
+
# Returns the first name (word) of the definition line - everything
|
289
|
+
# before the first whitespace.
|
290
|
+
#
|
291
|
+
# >abc def #=> 'abc'
|
292
|
+
# >gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c] #=> 'gi|398365175|ref|NP_009718.3|'
|
293
|
+
# >abc #=> 'abc'
|
294
|
+
def first_name
|
295
|
+
index = definition.index(/\s/)
|
296
|
+
if index.nil?
|
297
|
+
return @definition
|
298
|
+
else
|
299
|
+
return @definition[0...index]
|
300
|
+
end
|
301
|
+
end
|
285
302
|
|
286
303
|
end #class FastaFormat
|
287
304
|
|
data/lib/bio/db/fasta/defline.rb
CHANGED
@@ -6,7 +6,6 @@
|
|
6
6
|
# Toshiaki Katayama <k@bioruby.org>
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id: defline.rb,v 1.1.2.1 2008/06/20 13:22:32 ngoto Exp $
|
10
9
|
#
|
11
10
|
# == Description
|
12
11
|
#
|
@@ -292,7 +291,6 @@ module Bio
|
|
292
291
|
while token = ary.shift
|
293
292
|
if labels = self.class::NSIDs[token] then
|
294
293
|
di = [ token ]
|
295
|
-
idtype = token
|
296
294
|
labels.each do |x|
|
297
295
|
token = ary.shift
|
298
296
|
break unless token
|
@@ -391,7 +389,7 @@ module Bio
|
|
391
389
|
# Shows words used in the defline. Returns an Array.
|
392
390
|
def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray,
|
393
391
|
kwhash = self.class::KillWordsHash)
|
394
|
-
a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'
|
392
|
+
a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'\`\~\/\|\?\!\&\@\# \x00-\x1f\x7f]+/)
|
395
393
|
a.collect! do |x|
|
396
394
|
x.sub!(/\A[\$\*\-\+]+/, '')
|
397
395
|
x.sub!(/[\$\*\-\=]+\z/, '')
|
data/lib/bio/db/fastq.rb
CHANGED
data/lib/bio/db/genbank/ddbj.rb
CHANGED
@@ -5,16 +5,20 @@
|
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
7
|
|
8
|
-
|
8
|
+
warn "Bio::DDBJ is deprecated. Use Bio::GenBank."
|
9
9
|
|
10
10
|
module Bio
|
11
11
|
|
12
|
-
|
12
|
+
require 'bio/db/genbank/genbank' unless const_defined?(:GenBank)
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
# Bio::DDBJ is deprecated. Use Bio::GenBank.
|
15
|
+
class DDBJ < GenBank
|
16
16
|
|
17
|
-
#
|
17
|
+
# Bio::DDBJ is deprecated. Use Bio::GenBank.
|
18
|
+
def initialize(str)
|
19
|
+
warn "Bio::DDBJ is deprecated. Use Bio::GenBank."
|
20
|
+
super(str)
|
21
|
+
end
|
18
22
|
|
19
23
|
end # DDBJ
|
20
24
|
|
@@ -4,15 +4,23 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2000-2004 Toshiaki Katayama <k@bioruby.org>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id: refseq.rb,v 1.8 2007/04/05 23:35:40 trevor Exp $
|
8
7
|
#
|
9
8
|
|
10
|
-
|
9
|
+
warn "Bio::RefSeq is deprecated. Use Bio::GenBank."
|
11
10
|
|
12
11
|
module Bio
|
13
12
|
|
13
|
+
require 'bio/db/genbank/genbank' unless const_defined?(:GenBank)
|
14
|
+
|
15
|
+
# Bio::RefSeq is deprecated. Use Bio::GenBank.
|
14
16
|
class RefSeq < GenBank
|
15
|
-
|
17
|
+
|
18
|
+
# Bio::RefSeq is deprecated. Use Bio::GenBank.
|
19
|
+
def initialize(str)
|
20
|
+
warn "Bio::RefSeq is deprecated. Use Bio::GenBank."
|
21
|
+
super(str)
|
22
|
+
end
|
23
|
+
|
16
24
|
end
|
17
25
|
|
18
26
|
end # Bio
|
data/lib/bio/db/gff.rb
CHANGED
@@ -8,7 +8,6 @@
|
|
8
8
|
# 2008 Naohisa Goto <ng@bioruby.org>
|
9
9
|
# License:: The Ruby License
|
10
10
|
#
|
11
|
-
# $Id:$
|
12
11
|
#
|
13
12
|
require 'uri'
|
14
13
|
require 'strscan'
|
@@ -236,10 +235,10 @@ module Bio
|
|
236
235
|
CHAR2BACKSLASH.merge({ '"' => '"', "\\" => "\\" }).freeze
|
237
236
|
|
238
237
|
# prohibited characters in GFF2 columns
|
239
|
-
PROHIBITED_GFF2_COLUMNS = /[\t\r\n\x00-\x1f\x7f\xfe\xff]/
|
238
|
+
PROHIBITED_GFF2_COLUMNS = /[\t\r\n\x00-\x08\x0b\x0c\x0e-\x1f\x7f\xfe\xff]/
|
240
239
|
|
241
240
|
# prohibited characters in GFF2 attribute tags
|
242
|
-
PROHIBITED_GFF2_TAGS = /[\s\"\;\
|
241
|
+
PROHIBITED_GFF2_TAGS = /[\s\"\;\x00-\x08\x0e-\x1f\x7f\xfe\xff]/
|
243
242
|
|
244
243
|
private
|
245
244
|
# (private) escapes GFF2 free text string
|
@@ -1066,7 +1065,7 @@ module Bio
|
|
1066
1065
|
|
1067
1066
|
# parses given string and returns SequenceRegion class
|
1068
1067
|
def self.parse(str)
|
1069
|
-
|
1068
|
+
_, seqid, start, endpos =
|
1070
1069
|
str.chomp.split(/\s+/, 4).collect { |x| unescape(x) }
|
1071
1070
|
self.new(seqid, start, endpos)
|
1072
1071
|
end
|
data/lib/bio/db/go.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
# == Gene Ontology
|
11
10
|
#
|
@@ -104,12 +103,12 @@ class GO
|
|
104
103
|
depth = $1.length.to_i
|
105
104
|
rel = $2
|
106
105
|
term = $3
|
107
|
-
goid1 =
|
106
|
+
goid1 = $4
|
108
107
|
en = $5
|
109
108
|
goids = parse_goids(line) # GO:ID[ ; GO:ID...]
|
110
|
-
|
109
|
+
parse_synonyms(line) # synonym:Term[ ; synonym:Term...]
|
111
110
|
stack[depth] = goids.first
|
112
|
-
@id2term[
|
111
|
+
@id2term[goid1] = term
|
113
112
|
|
114
113
|
next if depth == 0
|
115
114
|
|
@@ -128,8 +127,8 @@ class GO
|
|
128
127
|
rel1 = $1
|
129
128
|
term1 = $2
|
130
129
|
goid1 = $3
|
131
|
-
|
132
|
-
|
130
|
+
parse_goids(line)
|
131
|
+
parse_synonyms(line)
|
133
132
|
|
134
133
|
@id2term[goid1] = term1
|
135
134
|
goids.each {|goid|
|
data/lib/bio/db/kegg/module.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Copyright:: Copyright (C) 2010 Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
|
11
10
|
require 'bio/db'
|
@@ -81,7 +80,7 @@ class MODULE < KEGGDB
|
|
81
80
|
# ---
|
82
81
|
# *Returns*:: Hash of pathway ID and its definition
|
83
82
|
def pathways_as_hash
|
84
|
-
unless @pathways_as_hash
|
83
|
+
unless (defined? @pathways_as_hash) && @pathways_as_hash
|
85
84
|
@pathways_as_hash = strings_as_hash(pathways_as_strings)
|
86
85
|
end
|
87
86
|
@pathways_as_hash
|
@@ -100,7 +99,7 @@ class MODULE < KEGGDB
|
|
100
99
|
# ---
|
101
100
|
# *Returns*:: Hash of orthology ID and its definition
|
102
101
|
def orthologs_as_hash
|
103
|
-
unless @orthologs_as_hash
|
102
|
+
unless (defined? @orthologs_as_hash) && @orthologs_as_hash
|
104
103
|
@orthologs_as_hash = strings_as_hash(orthologs_as_strings)
|
105
104
|
end
|
106
105
|
@orthologs_as_hash
|
@@ -126,7 +125,7 @@ class MODULE < KEGGDB
|
|
126
125
|
# ---
|
127
126
|
# *Returns*:: Hash of reaction ID and its definition
|
128
127
|
def reactions_as_hash
|
129
|
-
unless @reactions_as_hash
|
128
|
+
unless (defined? @reactions_as_hash) && @reactions_as_hash
|
130
129
|
@reactions_as_hash = strings_as_hash(reactions_as_strings)
|
131
130
|
end
|
132
131
|
@reactions_as_hash
|
@@ -145,7 +144,7 @@ class MODULE < KEGGDB
|
|
145
144
|
# ---
|
146
145
|
# *Returns*:: Hash of compound ID and its definition
|
147
146
|
def compounds_as_hash
|
148
|
-
unless @compounds_as_hash
|
147
|
+
unless (defined? @compounds_as_hash) && @compounds_as_hash
|
149
148
|
@compounds_as_hash = strings_as_hash(compounds_as_strings)
|
150
149
|
end
|
151
150
|
@compounds_as_hash
|
data/lib/bio/db/kegg/pathway.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Copyright:: Copyright (C) 2010 Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
|
11
10
|
require 'bio/db'
|
@@ -127,7 +126,7 @@ class PATHWAY < KEGGDB
|
|
127
126
|
# ---
|
128
127
|
# *Returns*:: Hash of disease ID and its definition
|
129
128
|
def diseases_as_hash
|
130
|
-
unless @diseases_as_hash
|
129
|
+
unless (defined? @diseases_as_hash) && @diseases_as_hash
|
131
130
|
@diseases_as_hash = strings_as_hash(diseases_as_strings)
|
132
131
|
end
|
133
132
|
@diseases_as_hash
|
@@ -166,7 +165,7 @@ class PATHWAY < KEGGDB
|
|
166
165
|
# ---
|
167
166
|
# *Returns*:: Hash of gene ID and its definition
|
168
167
|
def genes_as_hash
|
169
|
-
unless @genes_as_hash
|
168
|
+
unless (defined? @genes_as_hash) && @genes_as_hash
|
170
169
|
@genes_as_hash = strings_as_hash(genes_as_strings)
|
171
170
|
end
|
172
171
|
@genes_as_hash
|
@@ -192,7 +191,7 @@ class PATHWAY < KEGGDB
|
|
192
191
|
# ---
|
193
192
|
# *Returns*:: Hash of reaction ID and its definition
|
194
193
|
def reactions_as_hash
|
195
|
-
unless @reactions_as_hash
|
194
|
+
unless (defined? @reactions_as_hash) && @reactions_as_hash
|
196
195
|
@reactions_as_hash = strings_as_hash(reactions_as_strings)
|
197
196
|
end
|
198
197
|
@reactions_as_hash
|
@@ -210,7 +209,7 @@ class PATHWAY < KEGGDB
|
|
210
209
|
# ---
|
211
210
|
# *Returns*:: Hash of compound ID and its definition
|
212
211
|
def compounds_as_hash
|
213
|
-
unless @compounds_as_hash
|
212
|
+
unless (defined? @compounds_as_hash) && @compounds_as_hash
|
214
213
|
@compounds_as_hash = strings_as_hash(compounds_as_strings)
|
215
214
|
end
|
216
215
|
@compounds_as_hash
|
data/lib/bio/db/kegg/reaction.rb
CHANGED
@@ -86,7 +86,7 @@ class REACTION < KEGGDB
|
|
86
86
|
unless defined? @rpairs_as_hash
|
87
87
|
rps = {}
|
88
88
|
rpairs_as_strings.each do |line|
|
89
|
-
|
89
|
+
_, entry_id, name, rptype = line.split(/\s+/)
|
90
90
|
rps[entry_id] = [ name, rptype ]
|
91
91
|
end
|
92
92
|
@rpairs_as_hash = rps
|
data/lib/bio/db/nexus.rb
CHANGED
@@ -299,7 +299,7 @@ module Bio
|
|
299
299
|
ary = str.split(/[\s+=]/)
|
300
300
|
ary.collect! { |x| x.strip!; x.empty? ? nil : x }
|
301
301
|
ary.compact!
|
302
|
-
in_comment = false
|
302
|
+
#in_comment = false
|
303
303
|
comment_level = 0
|
304
304
|
|
305
305
|
# Main loop
|
@@ -807,6 +807,7 @@ module Bio
|
|
807
807
|
# *Returns*:: String
|
808
808
|
def to_nexus
|
809
809
|
str = "generic block \"" + get_name + "\" [do not know how to write in nexus format]"
|
810
|
+
str
|
810
811
|
end
|
811
812
|
|
812
813
|
# Adds a token to this.
|
@@ -1716,7 +1717,7 @@ module Bio
|
|
1716
1717
|
return "empty"
|
1717
1718
|
end
|
1718
1719
|
str = String.new
|
1719
|
-
row_array = to_nexus_row_array(
|
1720
|
+
row_array = to_nexus_row_array( " ", false )
|
1720
1721
|
row_array.each do | row |
|
1721
1722
|
str << row << END_OF_LINE
|
1722
1723
|
end
|
data/lib/bio/db/pdb/pdb.rb
CHANGED
@@ -608,8 +608,8 @@ module Bio
|
|
608
608
|
|
609
609
|
# SEQRES record class
|
610
610
|
SEQRES =
|
611
|
-
def_rec(#[
|
612
|
-
[
|
611
|
+
def_rec(#[ 8, 10, Pdb_Integer, :serNum ],
|
612
|
+
[ 8, 10, Pdb_Continuation, nil ], # PDB v3.2 (2008)
|
613
613
|
[ 12, 12, Pdb_Character, :chainID ],
|
614
614
|
[ 14, 17, Pdb_Integer, :numRes ],
|
615
615
|
[ 20, 22, Pdb_Residue_name, :resName ],
|
@@ -5,7 +5,6 @@
|
|
5
5
|
# Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
# == Description
|
11
10
|
#
|
@@ -91,17 +90,17 @@ module PhyloXML
|
|
91
90
|
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
92
91
|
def to_xml
|
93
92
|
taxonomy = LibXML::XML::Node.new('taxonomy')
|
94
|
-
taxonomy["type"] = @type if @type
|
95
|
-
taxonomy["id_source"] = @id_source if @id_source
|
96
|
-
|
97
|
-
PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', @taxonomy_id],
|
98
|
-
[:pattern, 'code', @code, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
|
99
|
-
[:simple, 'scientific_name', @scientific_name],
|
100
|
-
[:simple, 'authority', @authority],
|
101
|
-
[:simplearr, 'common_name', @common_names],
|
102
|
-
[:simplearr, 'synonym', @synonyms],
|
103
|
-
[:simple, 'rank', @rank],
|
104
|
-
[:complex, 'uri'
|
93
|
+
taxonomy["type"] = @type if (defined? @type) && @type
|
94
|
+
taxonomy["id_source"] = @id_source if (defined? @id_source) && @id_source
|
95
|
+
|
96
|
+
PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', (defined? @taxonomy_id) ? @taxonomy_id : nil],
|
97
|
+
[:pattern, 'code', (defined? @code) ? @code : nil, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
|
98
|
+
[:simple, 'scientific_name', (defined? @scientific_name) ? @scientific_name : nil],
|
99
|
+
[:simple, 'authority', (defined? @authority) ? @authority : nil],
|
100
|
+
[:simplearr, 'common_name', (defined? @common_names) ? @common_names : nil],
|
101
|
+
[:simplearr, 'synonym', (defined? @synonyms) ? @synonyms : nil],
|
102
|
+
[:simple, 'rank', (defined? @rank) ? @rank : nil],
|
103
|
+
[:complex, 'uri',(defined? @uri) ? @uri : nil]])
|
105
104
|
#@todo anything else
|
106
105
|
|
107
106
|
|
@@ -161,7 +160,7 @@ module PhyloXML
|
|
161
160
|
@other = []
|
162
161
|
end
|
163
162
|
|
164
|
-
|
163
|
+
end
|
165
164
|
|
166
165
|
|
167
166
|
# == Description
|
@@ -287,7 +286,7 @@ module PhyloXML
|
|
287
286
|
def to_xml(branch_length, write_branch_length_as_subelement)
|
288
287
|
clade = LibXML::XML::Node.new('clade')
|
289
288
|
|
290
|
-
PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', @name]])
|
289
|
+
PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', (defined? @name) ? @name : nil]])
|
291
290
|
|
292
291
|
if branch_length != nil
|
293
292
|
if write_branch_length_as_subelement
|
@@ -301,15 +300,15 @@ module PhyloXML
|
|
301
300
|
PhyloXML::Writer.generate_xml(clade, self, [
|
302
301
|
[:attr, "id_source"],
|
303
302
|
[:objarr, 'confidence', 'confidences'],
|
304
|
-
[:simple, 'width', @width],
|
305
|
-
[:complex, 'branch_color', @branch_color],
|
306
|
-
[:simple, 'node_id', @node_id],
|
303
|
+
[:simple, 'width', (defined? @width) ? @width : nil],
|
304
|
+
[:complex, 'branch_color', (defined? @branch_color) ? @branch_color : nil],
|
305
|
+
[:simple, 'node_id', (defined? @node_id) ? @node_id : nil],
|
307
306
|
[:objarr, 'taxonomy', 'taxonomies'],
|
308
307
|
[:objarr, 'sequence', 'sequences'],
|
309
|
-
[:complex, 'events', @events],
|
310
|
-
[:complex, 'binary_characters', @binary_characters],
|
308
|
+
[:complex, 'events', (defined? @events) ? @events : nil],
|
309
|
+
[:complex, 'binary_characters', (defined? @binary_characters) ? @binary_characters : nil],
|
311
310
|
[:objarr, 'distribution', 'distributions'],
|
312
|
-
[:complex, 'date', @date],
|
311
|
+
[:complex, 'date', (defined? @date) ? @date : nil],
|
313
312
|
[:objarr, 'reference', 'references'],
|
314
313
|
[:objarr, 'propery', 'properties']])
|
315
314
|
|
@@ -322,7 +321,7 @@ module PhyloXML
|
|
322
321
|
# Events at the root node of a clade (e.g. one gene duplication).
|
323
322
|
class Events
|
324
323
|
#value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
|
325
|
-
|
324
|
+
attr_reader :type
|
326
325
|
|
327
326
|
# Integer
|
328
327
|
attr_reader :duplications, :speciations, :losses
|
@@ -330,26 +329,33 @@ module PhyloXML
|
|
330
329
|
# Confidence object
|
331
330
|
attr_reader :confidence
|
332
331
|
|
333
|
-
|
334
|
-
|
335
|
-
|
332
|
+
#---
|
333
|
+
#def confidence=(type, value)
|
334
|
+
# @confidence = Confidence.new(type, value)
|
335
|
+
#end
|
336
|
+
#+++
|
336
337
|
|
338
|
+
# Confidence object
|
337
339
|
def confidence=(conf)
|
338
340
|
@confidence = conf
|
339
341
|
end
|
340
342
|
|
343
|
+
# Integer
|
341
344
|
def duplications=(str)
|
342
345
|
@duplications = str.to_i
|
343
346
|
end
|
344
347
|
|
348
|
+
# Integer
|
345
349
|
def losses=(str)
|
346
350
|
@losses = str.to_i
|
347
351
|
end
|
348
352
|
|
353
|
+
# Integer
|
349
354
|
def speciations=(str)
|
350
355
|
@speciations=str.to_i
|
351
356
|
end
|
352
357
|
|
358
|
+
#value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
|
353
359
|
def type=(str)
|
354
360
|
@type = str
|
355
361
|
#@todo add unit test for this
|
@@ -363,11 +369,11 @@ module PhyloXML
|
|
363
369
|
#@todo add unit test
|
364
370
|
events = LibXML::XML::Node.new('events')
|
365
371
|
PhyloXML::Writer.generate_xml(events, self, [
|
366
|
-
[:simple, 'type', @type],
|
367
|
-
[:simple, 'duplications', @duplications],
|
368
|
-
[:simple, 'speciations', @speciations],
|
369
|
-
[:simple, 'losses', @losses],
|
370
|
-
[:complex, 'confidence', @confidence]])
|
372
|
+
[:simple, 'type', (defined? @type) ? @type : nil],
|
373
|
+
[:simple, 'duplications', (defined? @duplications) ? @duplications : nil],
|
374
|
+
[:simple, 'speciations', (defined? @speciations) ? @speciations : nil],
|
375
|
+
[:simple, 'losses', (defined? @losses) ? @losses : nil],
|
376
|
+
[:complex, 'confidence', (defined? @confidence) ? @confidence : nil]])
|
371
377
|
return events
|
372
378
|
end
|
373
379
|
|
@@ -437,13 +443,13 @@ module PhyloXML
|
|
437
443
|
# 'map datum'), for example Google's KML uses 'WGS84'.
|
438
444
|
class Point
|
439
445
|
# Float. Latitude
|
440
|
-
|
446
|
+
attr_reader :lat
|
441
447
|
|
442
448
|
# Float. Longitute
|
443
|
-
|
449
|
+
attr_reader :long
|
444
450
|
|
445
451
|
# Float. Altitude
|
446
|
-
|
452
|
+
attr_reader :alt
|
447
453
|
|
448
454
|
# String. Altitude unit.
|
449
455
|
attr_accessor :alt_unit
|
@@ -451,14 +457,17 @@ module PhyloXML
|
|
451
457
|
# Geodedic datum / map datum
|
452
458
|
attr_accessor :geodetic_datum
|
453
459
|
|
460
|
+
# Float. Latitude
|
454
461
|
def lat=(str)
|
455
462
|
@lat = str.to_f unless str.nil?
|
456
463
|
end
|
457
464
|
|
465
|
+
# Float. Longitute
|
458
466
|
def long=(str)
|
459
467
|
@long = str.to_f unless str.nil?
|
460
468
|
end
|
461
469
|
|
470
|
+
# Float. Altitude
|
462
471
|
def alt=(str)
|
463
472
|
@alt = str.to_f unless str.nil?
|
464
473
|
end
|
@@ -581,7 +590,7 @@ module PhyloXML
|
|
581
590
|
def to_xml
|
582
591
|
|
583
592
|
seq = LibXML::XML::Node.new('sequence')
|
584
|
-
if @type
|
593
|
+
if (defined? @type) && @type
|
585
594
|
if ["dna", "rna", "protein"].include?(@type)
|
586
595
|
seq["type"] = @type
|
587
596
|
else
|
@@ -592,22 +601,22 @@ module PhyloXML
|
|
592
601
|
PhyloXML::Writer.generate_xml(seq, self, [
|
593
602
|
[:attr, 'id_source'],
|
594
603
|
[:attr, 'id_ref'],
|
595
|
-
[:pattern, 'symbol', @symbol, Regexp.new("^\\S{1,10}$")],
|
596
|
-
[:complex, 'accession', @accession],
|
597
|
-
[:simple, 'name', @name],
|
598
|
-
[:simple, 'location', @location]])
|
604
|
+
[:pattern, 'symbol', (defined? @symbol) ? @symbol : nil, Regexp.new("^\\S{1,10}$")],
|
605
|
+
[:complex, 'accession', (defined? @accession) ? @accession : nil],
|
606
|
+
[:simple, 'name', (defined? @name) ? @name : nil],
|
607
|
+
[:simple, 'location', (defined? @location) ? @location : nil]])
|
599
608
|
|
600
|
-
if @mol_seq
|
609
|
+
if (defined? @mol_seq) && @mol_seq
|
601
610
|
molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
|
602
|
-
molseq["is_aligned"] = @is_aligned.to_s if @is_aligned != nil
|
611
|
+
molseq["is_aligned"] = @is_aligned.to_s if (defined? @is_aligned) && @is_aligned != nil
|
603
612
|
seq << molseq
|
604
613
|
end
|
605
614
|
|
606
615
|
PhyloXML::Writer.generate_xml(seq, self, [
|
607
616
|
#[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
|
608
|
-
[:complex, 'uri', @uri],
|
617
|
+
[:complex, 'uri', (defined? @uri) ? @uri : nil],
|
609
618
|
[:objarr, 'annotation', 'annotations'],
|
610
|
-
[:complex, 'domain_architecture', @domain_architecture]])
|
619
|
+
[:complex, 'domain_architecture', (defined? @domain_architecture) ? @domain_architecture : nil]])
|
611
620
|
#@todo test domain_architecture
|
612
621
|
#any
|
613
622
|
return seq
|
@@ -626,7 +635,7 @@ module PhyloXML
|
|
626
635
|
# seq.primary_accession = @accession.value could be this
|
627
636
|
seq.definition = @name
|
628
637
|
#seq.comments = @name //this one?
|
629
|
-
if @uri
|
638
|
+
if (defined? @uri) && @uri
|
630
639
|
h = {'url' => @uri.uri,
|
631
640
|
'title' => @uri.desc }
|
632
641
|
ref = Bio::Reference.new(h)
|
@@ -720,11 +729,11 @@ module PhyloXML
|
|
720
729
|
# Converts elements to xml representation. Called by PhyloXML::Writer class.
|
721
730
|
def to_xml
|
722
731
|
annot = LibXML::XML::Node.new('annotation')
|
723
|
-
annot["ref"] = @ref if @ref
|
724
|
-
PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', @desc],
|
725
|
-
[:complex, 'confidence', @confidence],
|
732
|
+
annot["ref"] = @ref if (defined? @ref) && @ref
|
733
|
+
PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', (defined? @desc) ? @desc : nil],
|
734
|
+
[:complex, 'confidence', (defined? @confidence) ? @confidence : nil],
|
726
735
|
[:objarr, 'property', 'properties'],
|
727
|
-
[:complex, 'uri', @uri]])
|
736
|
+
[:complex, 'uri', (defined? @uri) ? @uri : nil]])
|
728
737
|
return annot
|
729
738
|
end
|
730
739
|
end
|
@@ -825,10 +834,10 @@ module PhyloXML
|
|
825
834
|
date = LibXML::XML::Node.new('date')
|
826
835
|
PhyloXML::Writer.generate_xml(date, self, [
|
827
836
|
[:attr, 'unit'],
|
828
|
-
[:simple, 'desc', @desc],
|
829
|
-
[:simple, 'value', @value],
|
830
|
-
[:simple, 'minimum', @minimum],
|
831
|
-
[:simple, 'maximum', @maximum]])
|
837
|
+
[:simple, 'desc', (defined? @desc) ? @desc : nil],
|
838
|
+
[:simple, 'value', (defined? @value) ? @value : nil],
|
839
|
+
[:simple, 'minimum', (defined? @minimum) ? @minimum : nil],
|
840
|
+
[:simple, 'maximum', (defined? @maximum) ? @maximum : nil]])
|
832
841
|
return date
|
833
842
|
end
|
834
843
|
|
@@ -839,11 +848,12 @@ module PhyloXML
|
|
839
848
|
# 'length' is the total length of the protein
|
840
849
|
class DomainArchitecture
|
841
850
|
# Integer. Total length of the protein
|
842
|
-
|
851
|
+
attr_reader :length
|
843
852
|
|
844
853
|
# Array of ProteinDomain objects.
|
845
854
|
attr_reader :domains
|
846
855
|
|
856
|
+
# Integer. Total length of the protein
|
847
857
|
def length=(str)
|
848
858
|
@length = str.to_i
|
849
859
|
end
|
@@ -868,7 +878,7 @@ module PhyloXML
|
|
868
878
|
# name/unique identifier is described via the 'id' attribute.
|
869
879
|
class ProteinDomain
|
870
880
|
#Float, for example to store E-values 4.7E-14
|
871
|
-
|
881
|
+
attr_reader :confidence
|
872
882
|
|
873
883
|
# String
|
874
884
|
attr_accessor :id, :value
|
@@ -879,14 +889,17 @@ module PhyloXML
|
|
879
889
|
# Integer. End of the domain.
|
880
890
|
attr_reader :to
|
881
891
|
|
892
|
+
# Integer. Beginning of the domain.
|
882
893
|
def from=(str)
|
883
894
|
@from = str.to_i
|
884
895
|
end
|
885
896
|
|
897
|
+
# Integer. End of the domain.
|
886
898
|
def to=(str)
|
887
899
|
@to = str.to_i
|
888
900
|
end
|
889
901
|
|
902
|
+
#Float, for example to store E-values 4.7E-14
|
890
903
|
def confidence=(str)
|
891
904
|
@confidence = str.to_f
|
892
905
|
end
|
@@ -901,7 +914,7 @@ module PhyloXML
|
|
901
914
|
xml_node = LibXML::XML::Node.new('domain', @value)
|
902
915
|
xml_node["from"] = @from.to_s
|
903
916
|
xml_node["to"] = @to.to_s
|
904
|
-
xml_node["id"] = @id if @id
|
917
|
+
xml_node["id"] = @id if (defined? @id) && @id
|
905
918
|
xml_node["confidence"] = @confidence.to_s
|
906
919
|
|
907
920
|
return xml_node
|
@@ -987,7 +1000,7 @@ module PhyloXML
|
|
987
1000
|
ref = LibXML::XML::Node.new('reference')
|
988
1001
|
Writer.generate_xml(ref, self, [
|
989
1002
|
[:attr, 'doi'],
|
990
|
-
[:simple, 'desc', @desc]])
|
1003
|
+
[:simple, 'desc', (defined? @desc) ? @desc : nil]])
|
991
1004
|
return ref
|
992
1005
|
end
|
993
1006
|
|
@@ -999,7 +1012,7 @@ module PhyloXML
|
|
999
1012
|
# For example it could be used to describe multiple parents of a clade.
|
1000
1013
|
class CladeRelation
|
1001
1014
|
# Float
|
1002
|
-
|
1015
|
+
attr_reader :distance
|
1003
1016
|
# String. Id of the referenced parents of a clade.
|
1004
1017
|
attr_accessor :id_ref_0, :id_ref_1
|
1005
1018
|
# String
|
@@ -1007,6 +1020,7 @@ module PhyloXML
|
|
1007
1020
|
# Confidence object
|
1008
1021
|
attr_accessor :confidence
|
1009
1022
|
|
1023
|
+
# Float
|
1010
1024
|
def distance=(str)
|
1011
1025
|
@distance = str.to_f
|
1012
1026
|
end
|
@@ -1022,7 +1036,7 @@ module PhyloXML
|
|
1022
1036
|
[:attr, 'id_ref_1'],
|
1023
1037
|
[:attr, 'distance'],
|
1024
1038
|
[:attr, 'type'],
|
1025
|
-
[:complex, 'confidence', @confidnece]])
|
1039
|
+
[:complex, 'confidence', (defined? @confidnece) ? @confidnece : nil]])
|
1026
1040
|
|
1027
1041
|
return cr
|
1028
1042
|
end
|
@@ -1107,7 +1121,13 @@ module PhyloXML
|
|
1107
1121
|
# attribute 'type' is 'orthology').
|
1108
1122
|
class SequenceRelation
|
1109
1123
|
# String
|
1110
|
-
attr_accessor :id_ref_0, :id_ref_1
|
1124
|
+
attr_accessor :id_ref_0, :id_ref_1
|
1125
|
+
|
1126
|
+
# String. Allowed values: "orthology", "one_to_one_orthology",
|
1127
|
+
# "super_orthology", "paralogy", "ultra_paralogy", "xenology",
|
1128
|
+
# "unknown", "other"
|
1129
|
+
attr_reader :type
|
1130
|
+
|
1111
1131
|
# Float
|
1112
1132
|
attr_reader :distance
|
1113
1133
|
|
@@ -1117,6 +1137,9 @@ module PhyloXML
|
|
1117
1137
|
@distance = str.to_f if str != nil
|
1118
1138
|
end
|
1119
1139
|
|
1140
|
+
# String. Allowed values: "orthology", "one_to_one_orthology",
|
1141
|
+
# "super_orthology", "paralogy", "ultra_paralogy", "xenology",
|
1142
|
+
# "unknown", "other"
|
1120
1143
|
def type=(str)
|
1121
1144
|
#@todo do warning instead?
|
1122
1145
|
#@todo do validation at actually writing xml
|
@@ -1137,7 +1160,7 @@ module PhyloXML
|
|
1137
1160
|
sr = LibXML::XML::Node.new('sequence_relation')
|
1138
1161
|
sr['id_ref_0'] = @id_ref_0
|
1139
1162
|
sr['id_ref_1'] = @id_ref_1
|
1140
|
-
sr['distance'] = @distance.to_s if @distance
|
1163
|
+
sr['distance'] = @distance.to_s if (defined? @distance) && @distance
|
1141
1164
|
sr['type'] = @type
|
1142
1165
|
return sr
|
1143
1166
|
end
|
@@ -1145,7 +1168,7 @@ module PhyloXML
|
|
1145
1168
|
|
1146
1169
|
end
|
1147
1170
|
|
1148
|
-
|
1171
|
+
class Other
|
1149
1172
|
attr_accessor :element_name, :attributes, :children, :value
|
1150
1173
|
|
1151
1174
|
def initialize
|