bio 1.4.3.0001 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +39 -33
- data/BSDL +22 -0
- data/COPYING +2 -2
- data/COPYING.ja +36 -36
- data/ChangeLog +2404 -1025
- data/KNOWN_ISSUES.rdoc +15 -55
- data/README.rdoc +17 -23
- data/RELEASE_NOTES.rdoc +246 -183
- data/Rakefile +3 -2
- data/bin/br_biofetch.rb +29 -5
- data/bioruby.gemspec +15 -32
- data/bioruby.gemspec.erb +10 -20
- data/doc/ChangeLog-1.4.3 +1478 -0
- data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
- data/doc/Tutorial.rd +0 -6
- data/doc/Tutorial.rd.html +7 -12
- data/doc/Tutorial.rd.ja +960 -1064
- data/doc/Tutorial.rd.ja.html +977 -1067
- data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-rbx +13 -0
- data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
- data/lib/bio.rb +10 -43
- data/lib/bio/alignment.rb +8 -14
- data/lib/bio/appl/blast.rb +1 -2
- data/lib/bio/appl/blast/format0.rb +18 -7
- data/lib/bio/appl/blast/remote.rb +0 -9
- data/lib/bio/appl/blast/report.rb +1 -1
- data/lib/bio/appl/clustalw/report.rb +3 -1
- data/lib/bio/appl/genscan/report.rb +1 -2
- data/lib/bio/appl/iprscan/report.rb +1 -2
- data/lib/bio/appl/meme/mast.rb +4 -4
- data/lib/bio/appl/meme/mast/report.rb +1 -1
- data/lib/bio/appl/paml/codeml.rb +2 -2
- data/lib/bio/appl/paml/codeml/report.rb +1 -0
- data/lib/bio/appl/paml/common.rb +1 -1
- data/lib/bio/appl/sosui/report.rb +1 -2
- data/lib/bio/command.rb +62 -2
- data/lib/bio/data/aa.rb +13 -31
- data/lib/bio/data/codontable.rb +1 -2
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
- data/lib/bio/db/biosql/sequence.rb +1 -1
- data/lib/bio/db/embl/common.rb +1 -1
- data/lib/bio/db/embl/embl.rb +5 -4
- data/lib/bio/db/embl/format_embl.rb +3 -3
- data/lib/bio/db/embl/sptr.rb +9 -1444
- data/lib/bio/db/embl/swissprot.rb +12 -29
- data/lib/bio/db/embl/trembl.rb +13 -30
- data/lib/bio/db/embl/uniprot.rb +12 -29
- data/lib/bio/db/embl/uniprotkb.rb +1455 -0
- data/lib/bio/db/fasta.rb +17 -0
- data/lib/bio/db/fasta/defline.rb +1 -3
- data/lib/bio/db/fastq.rb +1 -1
- data/lib/bio/db/genbank/ddbj.rb +9 -5
- data/lib/bio/db/genbank/refseq.rb +11 -3
- data/lib/bio/db/gff.rb +3 -4
- data/lib/bio/db/go.rb +5 -6
- data/lib/bio/db/kegg/module.rb +4 -5
- data/lib/bio/db/kegg/pathway.rb +4 -5
- data/lib/bio/db/kegg/reaction.rb +1 -1
- data/lib/bio/db/nexus.rb +3 -2
- data/lib/bio/db/pdb/pdb.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
- data/lib/bio/db/transfac.rb +1 -1
- data/lib/bio/io/das.rb +40 -41
- data/lib/bio/io/fastacmd.rb +0 -16
- data/lib/bio/io/fetch.rb +111 -55
- data/lib/bio/io/flatfile/buffer.rb +4 -5
- data/lib/bio/io/hinv.rb +2 -3
- data/lib/bio/io/ncbirest.rb +43 -6
- data/lib/bio/io/pubmed.rb +76 -81
- data/lib/bio/io/togows.rb +33 -10
- data/lib/bio/map.rb +1 -1
- data/lib/bio/pathway.rb +1 -1
- data/lib/bio/sequence/compat.rb +1 -1
- data/lib/bio/sequence/na.rb +63 -12
- data/lib/bio/shell.rb +0 -2
- data/lib/bio/shell/core.rb +5 -6
- data/lib/bio/shell/interface.rb +3 -4
- data/lib/bio/shell/irb.rb +1 -2
- data/lib/bio/shell/plugin/entry.rb +2 -3
- data/lib/bio/shell/plugin/seq.rb +7 -6
- data/lib/bio/shell/setup.rb +1 -2
- data/lib/bio/tree.rb +2 -2
- data/lib/bio/util/contingency_table.rb +0 -2
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
- data/lib/bio/util/sirna.rb +76 -16
- data/lib/bio/version.rb +8 -9
- data/sample/benchmark_clustalw_report.rb +47 -0
- data/sample/biofetch.rb +248 -151
- data/setup.rb +6 -7
- data/test/data/clustalw/example1-seqnos.aln +58 -0
- data/test/network/bio/appl/blast/test_remote.rb +1 -15
- data/test/network/bio/appl/test_blast.rb +0 -12
- data/test/network/bio/io/test_pubmed.rb +49 -0
- data/test/network/bio/io/test_togows.rb +0 -1
- data/test/network/bio/test_command.rb +65 -2
- data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
- data/test/unit/bio/appl/blast/test_report.rb +110 -48
- data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
- data/test/unit/bio/appl/sim4/test_report.rb +46 -17
- data/test/unit/bio/appl/test_blast.rb +2 -2
- data/test/unit/bio/db/embl/test_embl.rb +0 -1
- data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
- data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
- data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
- data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
- data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
- data/test/unit/bio/db/test_fasta.rb +41 -1
- data/test/unit/bio/db/test_fastq.rb +14 -4
- data/test/unit/bio/db/test_gff.rb +2 -2
- data/test/unit/bio/db/test_phyloxml.rb +30 -30
- data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
- data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
- data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
- data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
- data/test/unit/bio/io/test_togows.rb +3 -2
- data/test/unit/bio/sequence/test_dblink.rb +1 -1
- data/test/unit/bio/sequence/test_na.rb +3 -1
- data/test/unit/bio/test_alignment.rb +1 -2
- data/test/unit/bio/test_command.rb +5 -4
- data/test/unit/bio/test_db.rb +4 -2
- data/test/unit/bio/test_pathway.rb +25 -10
- data/test/unit/bio/util/test_sirna.rb +22 -22
- metadata +656 -1430
- data/doc/KEGG_API.rd +0 -1843
- data/doc/KEGG_API.rd.ja +0 -1834
- data/extconf.rb +0 -2
- data/lib/bio/appl/blast/ddbj.rb +0 -131
- data/lib/bio/db/kegg/taxonomy.rb +0 -280
- data/lib/bio/io/dbget.rb +0 -194
- data/lib/bio/io/ddbjrest.rb +0 -344
- data/lib/bio/io/ddbjxml.rb +0 -458
- data/lib/bio/io/ebisoap.rb +0 -158
- data/lib/bio/io/ensembl.rb +0 -229
- data/lib/bio/io/higet.rb +0 -73
- data/lib/bio/io/keggapi.rb +0 -363
- data/lib/bio/io/ncbisoap.rb +0 -156
- data/lib/bio/io/soapwsdl.rb +0 -119
- data/lib/bio/shell/plugin/keggapi.rb +0 -181
- data/lib/bio/shell/plugin/soap.rb +0 -87
- data/sample/dbget +0 -37
- data/sample/demo_ddbjxml.rb +0 -212
- data/sample/demo_kegg_taxonomy.rb +0 -92
- data/sample/demo_keggapi.rb +0 -502
- data/sample/psortplot_html.rb +0 -214
- data/test/network/bio/io/test_ddbjrest.rb +0 -47
- data/test/network/bio/io/test_ensembl.rb +0 -230
- data/test/network/bio/io/test_soapwsdl.rb +0 -53
- data/test/unit/bio/io/test_ddbjxml.rb +0 -81
- data/test/unit/bio/io/test_ensembl.rb +0 -111
- data/test/unit/bio/io/test_soapwsdl.rb +0 -33
data/lib/bio/version.rb
CHANGED
@@ -10,20 +10,19 @@
|
|
10
10
|
module Bio
|
11
11
|
|
12
12
|
# BioRuby version (Array containing Integer)
|
13
|
-
BIORUBY_VERSION = [1,
|
13
|
+
BIORUBY_VERSION = [1, 5, 0].extend(Comparable).freeze
|
14
14
|
|
15
15
|
# Extra version specifier (String or nil).
|
16
|
-
# Existance of the value indicates
|
16
|
+
# Existance of the value indicates development version.
|
17
17
|
#
|
18
18
|
# nil :: Release version.
|
19
|
-
# "
|
20
|
-
# ".
|
21
|
-
# ".5001"..".8999" :: Pre-alpha version.
|
22
|
-
# "-alphaN" (N=0..99) :: Alpha version.
|
23
|
-
# "-preN" (N=0..99) :: Pre-release test version.
|
24
|
-
# "-rcN" (N=0..99) :: Release candidate version.
|
19
|
+
# "-dev" :: Development version (with YYYYMMDD digits).
|
20
|
+
# ".20150630" :: Development version (specify the date digits).
|
25
21
|
#
|
26
|
-
|
22
|
+
# By default, if the third digit (teeny) of BIORUBY_VERSION is 0,
|
23
|
+
# the version is regarded as a development version.
|
24
|
+
BIORUBY_EXTRA_VERSION =
|
25
|
+
nil #(BIORUBY_VERSION[2] == 0) ? "-dev" : nil
|
27
26
|
|
28
27
|
# Version identifier, including extra version string (String)
|
29
28
|
# Unlike BIORUBY_VERSION, it is not comparable.
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#
|
2
|
+
# = sample/benchmark_clustalw_report.rb - Benchmark tests for Bio::ClustalW::Report
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2013
|
5
|
+
# Andrew Grimm <andrew.j.grimm@gmail.com>
|
6
|
+
# License:: The Ruby License
|
7
|
+
|
8
|
+
require 'pathname'
|
9
|
+
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 1, "test",
|
10
|
+
'bioruby_test_helper.rb')).cleanpath.to_s
|
11
|
+
|
12
|
+
require 'benchmark'
|
13
|
+
require 'bio'
|
14
|
+
|
15
|
+
class BenchmarkClustalWReport
|
16
|
+
|
17
|
+
DataDir = File.join(BioRubyTestDataPath, 'clustalw')
|
18
|
+
Filenames = [ 'example1.aln', 'example1-seqnos.aln' ]
|
19
|
+
|
20
|
+
def self.benchmark_clustalw_report
|
21
|
+
Filenames.each do |fn|
|
22
|
+
print "\n", fn, "\n"
|
23
|
+
fullpath = File.join(DataDir, fn)
|
24
|
+
self.new(fullpath).benchmark
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize(aln_filename)
|
29
|
+
@text = File.open(aln_filename, 'rb') { |f| f.read }
|
30
|
+
@text.freeze
|
31
|
+
end
|
32
|
+
|
33
|
+
def benchmark
|
34
|
+
GC.start
|
35
|
+
Benchmark.bmbm do |x|
|
36
|
+
x.report do
|
37
|
+
for i in 1...10_000
|
38
|
+
aln = Bio::ClustalW::Report.new(@text)
|
39
|
+
aln.alignment
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end #class BenchmarkClustalWReport
|
46
|
+
|
47
|
+
BenchmarkClustalWReport.benchmark_clustalw_report
|
data/sample/biofetch.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
-
#!/usr/
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
2
3
|
#
|
3
|
-
# biofetch.rb : BioFetch server (interface to
|
4
|
+
# biofetch.rb : BioFetch server (interface to TogoWS)
|
4
5
|
#
|
5
6
|
# Copyright (C) 2002-2004 KATAYAMA Toshiaki <k@bioruby.org>
|
7
|
+
# 2013 GOTO Naohisa <ng@bioruby.org>
|
6
8
|
#
|
7
9
|
# This program is free software; you can redistribute it and/or modify
|
8
10
|
# it under the terms of the GNU General Public License as published by
|
@@ -18,15 +20,97 @@
|
|
18
20
|
# along with this program; if not, write to the Free Software
|
19
21
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
22
|
#
|
21
|
-
# $Id: biofetch.rb,v 1.16 2005/08/07 10:02:41 k Exp $
|
22
23
|
#
|
23
24
|
|
24
25
|
require 'cgi'
|
25
|
-
require '
|
26
|
-
require '
|
26
|
+
require 'erb'
|
27
|
+
require 'open-uri'
|
28
|
+
require 'fileutils'
|
29
|
+
require 'tempfile'
|
27
30
|
|
28
31
|
MAX_ID_NUM = 50
|
29
32
|
|
33
|
+
# script name
|
34
|
+
SCRIPT_NAME = File.basename(__FILE__)
|
35
|
+
|
36
|
+
# full URL for this CGI
|
37
|
+
BASE_URL = "http://bioruby.org/cgi-bin/#{SCRIPT_NAME}"
|
38
|
+
|
39
|
+
# cache directory for metadata
|
40
|
+
# Note: The cache is only for metadata (database list and format list).
|
41
|
+
# Data entries are NOT cached.
|
42
|
+
CACHE_DIR = '/tmp/biofetch_rb.cache'
|
43
|
+
|
44
|
+
# cache lifetime
|
45
|
+
CACHE_LIFETIME = 60 * 60 # 1 hour
|
46
|
+
|
47
|
+
module TogoWS
|
48
|
+
TOGOWS_URL = 'http://togows.dbcls.jp/'
|
49
|
+
|
50
|
+
def togows_database_complete_list
|
51
|
+
result = togows_get_cached('/entry/')
|
52
|
+
result.to_s.split(/\n/).collect {|x| x.split(/\t/) }
|
53
|
+
end
|
54
|
+
|
55
|
+
def togows_database_formats(db)
|
56
|
+
db = CGI.escape(db)
|
57
|
+
result = togows_get_cached("/entry/#{db}/?formats")
|
58
|
+
end
|
59
|
+
|
60
|
+
def togows_get(path)
|
61
|
+
uristr = TOGOWS_URL + path
|
62
|
+
begin
|
63
|
+
result = OpenURI.open_uri(uristr).read
|
64
|
+
rescue OpenURI::HTTPError
|
65
|
+
result = nil
|
66
|
+
end
|
67
|
+
result
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def togows_get_cached(path)
|
73
|
+
filepath = path.sub(/\A\//, '').sub(/\/\z/, '')
|
74
|
+
filepath = filepath.gsub(/\//, " ")
|
75
|
+
filepath = filepath.sub(/\?/, '_')
|
76
|
+
filepath = File.join(CACHE_DIR, filepath)
|
77
|
+
result = nil
|
78
|
+
begin
|
79
|
+
if Time.now - File.mtime(filepath) > CACHE_LIFETIME
|
80
|
+
# delete expired cache file
|
81
|
+
File.delete(filepath)
|
82
|
+
end
|
83
|
+
result = File.read(filepath)
|
84
|
+
rescue IOError, SystemCallError
|
85
|
+
result = nil
|
86
|
+
end
|
87
|
+
unless result then
|
88
|
+
# valid cache is not found
|
89
|
+
result = togows_get(path)
|
90
|
+
if result then
|
91
|
+
# create cache directory if not found
|
92
|
+
FileUtils.mkdir_p(CACHE_DIR, :mode => 0700)
|
93
|
+
# simple security check for the cache dir
|
94
|
+
if File.stat(CACHE_DIR).mode & 0022 != 0 then
|
95
|
+
raise SecurityError, "CACHE_DIR #{CACHE_DIR} is writeable by others"
|
96
|
+
end
|
97
|
+
|
98
|
+
# write to temporary file
|
99
|
+
tmp = Tempfile.open('temp', CACHE_DIR)
|
100
|
+
tmp.print result
|
101
|
+
tmp.close
|
102
|
+
# create a hard link from the temporary to the cache file
|
103
|
+
begin
|
104
|
+
File.link(tmp.path, filepath)
|
105
|
+
rescue IOError, SystemCallError
|
106
|
+
end
|
107
|
+
# the temporay file will be automatically removed at exit
|
108
|
+
end
|
109
|
+
end
|
110
|
+
result
|
111
|
+
end
|
112
|
+
|
113
|
+
end #module TogoWS
|
30
114
|
|
31
115
|
module BioFetchError
|
32
116
|
|
@@ -36,32 +120,50 @@ module BioFetchError
|
|
36
120
|
exit
|
37
121
|
end
|
38
122
|
|
123
|
+
def print_html_page(str)
|
124
|
+
print "Content-type: text/html; charset=UTF-8\n\n"
|
125
|
+
print "<pre>", CGI.escapeHTML(str), "</pre>\n"
|
126
|
+
exit
|
127
|
+
end
|
128
|
+
|
39
129
|
def error1(db)
|
130
|
+
db = CGI.escapeHTML(db.to_s) # to avoid potential XSS with old IE
|
40
131
|
str = "ERROR 1 Unknown database [#{db}]."
|
41
132
|
print_text_page(str)
|
42
133
|
end
|
43
134
|
|
44
135
|
def error2(style)
|
136
|
+
style = CGI.escapeHTML(style.to_s) # to avoid potential XSS with old IE
|
45
137
|
str = "ERROR 2 Unknown style [#{style}]."
|
46
138
|
print_text_page(str)
|
47
139
|
end
|
48
140
|
|
49
141
|
def error3(format, db)
|
142
|
+
# to avoid potential XSS with old IE which ignores Content-Type
|
143
|
+
db = CGI.escapeHTML(db.to_s)
|
144
|
+
format = CGI.escapeHTML(format.to_s)
|
50
145
|
str = "ERROR 3 Format [#{format}] not known for database [#{db}]."
|
51
146
|
print_text_page(str)
|
52
147
|
end
|
53
148
|
|
54
149
|
def error4(entry_id, db)
|
150
|
+
# to avoid potential XSS with old IE which ignores Content-Type
|
151
|
+
entry_id = CGI.escapeHTML(entry_id.to_s)
|
152
|
+
db = CGI.escapeHTML(db.to_s)
|
55
153
|
str = "ERROR 4 ID [#{entry_id}] not found in database [#{db}]."
|
56
154
|
print_text_page(str)
|
57
155
|
end
|
58
156
|
|
59
157
|
def error5(count)
|
158
|
+
# to avoid potential XSS with old IE which ignores Content-Type
|
159
|
+
count = CGI.escapeHTML(count.to_s)
|
60
160
|
str = "ERROR 5 Too many IDs [#{count}]. Max [#{MAX_ID_NUM}] allowed."
|
61
161
|
print_text_page(str)
|
62
162
|
end
|
63
163
|
|
64
164
|
def error6(info)
|
165
|
+
# to avoid potential XSS with old IE which ignores Content-Type
|
166
|
+
count = CGI.escapeHTML(info.to_s)
|
65
167
|
str = "ERROR 6 Illegal information request [#{info}]."
|
66
168
|
print_text_page(str)
|
67
169
|
end
|
@@ -70,23 +172,35 @@ end
|
|
70
172
|
|
71
173
|
|
72
174
|
|
73
|
-
module
|
175
|
+
module ApiBridge
|
74
176
|
|
75
177
|
include BioFetchError
|
178
|
+
include TogoWS
|
179
|
+
|
180
|
+
def list_databases_with_synonyms
|
181
|
+
togows_database_complete_list
|
182
|
+
end
|
76
183
|
|
77
184
|
def list_databases
|
78
|
-
|
79
|
-
results = serv.list_databases
|
80
|
-
results.collect {|x| x.entry_id}
|
185
|
+
list_databases_with_synonyms.flatten
|
81
186
|
end
|
82
187
|
|
83
188
|
def bget(db, id_list, format)
|
84
|
-
|
189
|
+
case format
|
190
|
+
when 'fasta'
|
191
|
+
format = '.fasta'
|
192
|
+
else
|
193
|
+
format = ''
|
194
|
+
end
|
195
|
+
db = CGI.escape(db)
|
196
|
+
|
85
197
|
results = ''
|
86
198
|
id_list.each do |query_id|
|
87
|
-
|
88
|
-
|
89
|
-
|
199
|
+
query_id = CGI.escape(query_id)
|
200
|
+
path = "/entry/#{db}/#{query_id}#{format}"
|
201
|
+
result = togows_get(path)
|
202
|
+
|
203
|
+
if !result or result.empty? or /\AError\: / =~ result then
|
90
204
|
error4(query_id, db)
|
91
205
|
else
|
92
206
|
results << result
|
@@ -95,78 +209,41 @@ module KeggAPI
|
|
95
209
|
return results
|
96
210
|
end
|
97
211
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
class BioFetch
|
104
|
-
|
105
|
-
include BioFetchError
|
106
|
-
include KeggAPI
|
107
|
-
|
108
|
-
def initialize(db, id_list, style, format)
|
109
|
-
check_style(style)
|
110
|
-
check_format(format, db)
|
111
|
-
check_number_of_id(id_list.length)
|
112
|
-
check_dbname(db)
|
212
|
+
def check_fasta_ok?(db)
|
213
|
+
result = togows_database_formats(db)
|
214
|
+
/^fasta$/ =~ result.to_s
|
215
|
+
end
|
113
216
|
|
114
|
-
|
115
|
-
goto_html_style_page(db, id_list, format)
|
116
|
-
end
|
217
|
+
end #module ApiBridge
|
117
218
|
|
118
|
-
|
219
|
+
module BioFetchCheck
|
119
220
|
|
120
|
-
|
121
|
-
entries = convert_to_fasta_format(entries, db)
|
122
|
-
end
|
123
|
-
|
124
|
-
print_text_page(entries)
|
125
|
-
end
|
221
|
+
include ApiBridge
|
126
222
|
|
127
223
|
private
|
128
224
|
|
129
|
-
def convert_to_fasta_format(str, db)
|
130
|
-
require 'bio'
|
131
|
-
require 'stringio'
|
132
|
-
|
133
|
-
fasta = Array.new
|
134
|
-
|
135
|
-
entries = StringIO.new(str)
|
136
|
-
Bio::FlatFile.auto(entries) do |ff|
|
137
|
-
ff.each do |entry|
|
138
|
-
seq = nil
|
139
|
-
if entry.respond_to?(:seq)
|
140
|
-
seq = entry.seq
|
141
|
-
elsif entry.respond_to?(:aaseq)
|
142
|
-
seq = entry.aaseq
|
143
|
-
elsif entry.respond_to?(:naseq)
|
144
|
-
seq = entry.naseq
|
145
|
-
end
|
146
|
-
if seq
|
147
|
-
entry_id = entry.respond_to?(:entry_id) ? entry.entry_id : ''
|
148
|
-
definition = entry.respond_to?(:definition) ? entry.definition : ''
|
149
|
-
fasta << seq.to_fasta("#{db}:#{entry_id} #{definition}", 60)
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
153
|
-
return fasta.join
|
154
|
-
end
|
155
|
-
|
156
|
-
def goto_html_style_page(db, id_list, format)
|
157
|
-
url = "http://www.genome.jp/dbget-bin/www_bget"
|
158
|
-
opt = '-f+' if /fasta/.match(format)
|
159
|
-
ids = id_list.join('%2B')
|
160
|
-
print "Location: #{url}?#{opt}#{db}+#{ids}\n\n"
|
161
|
-
exit
|
162
|
-
end
|
163
|
-
|
164
225
|
def check_style(style)
|
165
|
-
|
226
|
+
style = style.to_s.downcase
|
227
|
+
error2(style) unless /\A(html|raw)\z/.match(style)
|
228
|
+
style
|
166
229
|
end
|
167
230
|
|
168
231
|
def check_format(format, db)
|
169
|
-
|
232
|
+
fmt = format ? format.to_s.downcase : nil
|
233
|
+
case fmt
|
234
|
+
when 'fasta'
|
235
|
+
db = check_dbname(db)
|
236
|
+
fmt = nil unless check_fasta_ok?(db)
|
237
|
+
when 'default'
|
238
|
+
# do nothing
|
239
|
+
when nil
|
240
|
+
fmt = 'default'
|
241
|
+
else
|
242
|
+
fmt = nil
|
243
|
+
end
|
244
|
+
|
245
|
+
error3(format, db) unless fmt
|
246
|
+
fmt
|
170
247
|
end
|
171
248
|
|
172
249
|
def check_number_of_id(num)
|
@@ -174,22 +251,50 @@ class BioFetch
|
|
174
251
|
end
|
175
252
|
|
176
253
|
def check_dbname(db)
|
254
|
+
db = db.to_s.downcase
|
177
255
|
error1(db) unless list_databases.include?(db)
|
256
|
+
db
|
178
257
|
end
|
179
258
|
|
180
|
-
end
|
259
|
+
end #module BioFetchCheck
|
260
|
+
|
261
|
+
class BioFetch
|
262
|
+
|
263
|
+
include BioFetchCheck
|
264
|
+
include BioFetchError
|
265
|
+
include ApiBridge
|
266
|
+
|
267
|
+
def initialize(db, id_list, style, format)
|
268
|
+
style = check_style(style)
|
269
|
+
format = check_format(format, db)
|
270
|
+
check_number_of_id(id_list.length)
|
271
|
+
db = check_dbname(db)
|
272
|
+
|
273
|
+
entries = bget(db, id_list, format)
|
274
|
+
|
275
|
+
if style == 'html' then
|
276
|
+
print_html_page(entries)
|
277
|
+
else
|
278
|
+
print_text_page(entries)
|
279
|
+
end
|
280
|
+
|
281
|
+
end
|
282
|
+
|
283
|
+
end #class BioFetch
|
181
284
|
|
182
285
|
|
183
286
|
|
184
287
|
class BioFetchInfo
|
185
288
|
|
289
|
+
include BioFetchCheck
|
186
290
|
include BioFetchError
|
187
|
-
include
|
291
|
+
include ApiBridge
|
188
292
|
|
189
293
|
def initialize(info, db)
|
190
294
|
@db = db
|
295
|
+
|
191
296
|
begin
|
192
|
-
|
297
|
+
check_info(info) ? __send__(info) : raise
|
193
298
|
rescue
|
194
299
|
error6(info)
|
195
300
|
end
|
@@ -197,13 +302,18 @@ class BioFetchInfo
|
|
197
302
|
|
198
303
|
private
|
199
304
|
|
305
|
+
def check_info(meth_name)
|
306
|
+
/\A(dbs|formats|maxids)\z/ =~ meth_name
|
307
|
+
end
|
308
|
+
|
200
309
|
def dbs
|
201
310
|
str = list_databases.sort.join(' ')
|
202
311
|
print_text_page(str)
|
203
312
|
end
|
204
313
|
|
205
314
|
def formats
|
206
|
-
|
315
|
+
db = check_dbname(@db)
|
316
|
+
fasta = " fasta" if check_fasta_ok?(db)
|
207
317
|
str = "default#{fasta}"
|
208
318
|
print_text_page(str)
|
209
319
|
end
|
@@ -213,17 +323,14 @@ class BioFetchInfo
|
|
213
323
|
print_text_page(str)
|
214
324
|
end
|
215
325
|
|
216
|
-
|
217
|
-
# sequence databases supported by Bio::FlatFile.auto
|
218
|
-
/genes|gb|genbank|genpept|rs|refseq|emb|sp|swiss|pir/.match(@db)
|
219
|
-
end
|
220
|
-
|
221
|
-
end
|
326
|
+
end #class BioFetchInfo
|
222
327
|
|
223
328
|
|
224
329
|
|
225
330
|
class BioFetchCGI
|
226
331
|
|
332
|
+
include ApiBridge
|
333
|
+
|
227
334
|
def initialize(cgi)
|
228
335
|
@cgi = cgi
|
229
336
|
show_page
|
@@ -244,11 +351,14 @@ class BioFetchCGI
|
|
244
351
|
end
|
245
352
|
|
246
353
|
def show_query_page
|
247
|
-
html =
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
354
|
+
html = ERB.new(DATA.read)
|
355
|
+
max_id_num = MAX_ID_NUM
|
356
|
+
databases_with_synonyms = list_databases_with_synonyms
|
357
|
+
databases = list_databases
|
358
|
+
script_name = SCRIPT_NAME
|
359
|
+
base_url = BASE_URL
|
360
|
+
@cgi.out({ "type" => "text/html", "charset" => "utf-8" }) do
|
361
|
+
html.result(binding)
|
252
362
|
end
|
253
363
|
end
|
254
364
|
|
@@ -269,7 +379,7 @@ class BioFetchCGI
|
|
269
379
|
end
|
270
380
|
|
271
381
|
def id_list
|
272
|
-
@cgi['id'].split(
|
382
|
+
@cgi['id'].strip.split(/[\,\s]+/)
|
273
383
|
end
|
274
384
|
|
275
385
|
def style
|
@@ -295,9 +405,8 @@ BioFetchCGI.new(CGI.new)
|
|
295
405
|
This program was created during BioHackathon 2002, Tucson and updated
|
296
406
|
in Cape Town :)
|
297
407
|
|
298
|
-
Rewrited in
|
299
|
-
University
|
300
|
-
having internally accessible DBGET server.
|
408
|
+
Rewrited in 2013 to use TogoWS API as the bioruby.org server left from The
|
409
|
+
University of Tokyo and the old SOAP-based KEGG API is discontinued.
|
301
410
|
|
302
411
|
=end
|
303
412
|
|
@@ -308,7 +417,7 @@ __END__
|
|
308
417
|
<HEAD>
|
309
418
|
<LINK href="http://bioruby.org/img/favicon.png" rel="icon" type="image/png">
|
310
419
|
<LINK href="http://bioruby.org/css/bioruby.css" rel="stylesheet" type="text/css">
|
311
|
-
<TITLE>BioFetch interface to
|
420
|
+
<TITLE>BioFetch interface to TogoWS</TITLE>
|
312
421
|
</HEAD>
|
313
422
|
|
314
423
|
<BODY bgcolor="#ffffff">
|
@@ -316,37 +425,26 @@ __END__
|
|
316
425
|
<H1>
|
317
426
|
<IMG src="http://bioruby.org/img/ruby.png" align="middle">
|
318
427
|
BioFetch interface to
|
319
|
-
<A href="http://
|
428
|
+
<A href="http://togows.dbcls.jp/">TogoWS</A>
|
320
429
|
</H1>
|
321
430
|
|
322
|
-
<P>This page allows you to retrieve up to
|
431
|
+
<P>This page allows you to retrieve up to <%= max_id_num %> entries at a time from various up-to-date biological databases.</P>
|
323
432
|
|
324
433
|
<HR>
|
325
434
|
|
326
|
-
<FORM METHOD="post" ENCTYPE="application/x-www-form-urlencoded" action="
|
435
|
+
<FORM METHOD="post" ENCTYPE="application/x-www-form-urlencoded" action="<%= script_name %>">
|
327
436
|
|
328
437
|
<SELECT name="db">
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
<OPTION value="
|
338
|
-
|
339
|
-
<OPTION value="prosite">PROSITE</OPTION>
|
340
|
-
<OPTION value="pmd">PMD</OPTION>
|
341
|
-
<OPTION value="litdb">LITDB</OPTION>
|
342
|
-
<OPTION value="omim">OMIM</OPTION>
|
343
|
-
<OPTION value="ligand">KEGG/LIGAND</OPTION>
|
344
|
-
<OPTION value="pathway">KEGG/PATHWAY</OPTION>
|
345
|
-
<OPTION value="brite">KEGG/BRITE</OPTION>
|
346
|
-
<OPTION value="genes">KEGG/GENES</OPTION>
|
347
|
-
<OPTION value="genome">KEGG/GENOME</OPTION>
|
348
|
-
<OPTION value="linkdb">LinkDB</OPTION>
|
349
|
-
<OPTION value="aaindex">AAindex</OPTION>
|
438
|
+
<% databases_with_synonyms.each do |dbs|
|
439
|
+
a = dbs[1..-1]
|
440
|
+
synonyms = unless a.empty? then
|
441
|
+
" (abbr: " + a.join(", ") + ")"
|
442
|
+
else
|
443
|
+
""
|
444
|
+
end
|
445
|
+
%>
|
446
|
+
<OPTION value="<%= dbs[0] %>"><%= dbs[0] %><%= synonyms %></OPTION>
|
447
|
+
<% end %>
|
350
448
|
</SELECT>
|
351
449
|
|
352
450
|
<INPUT name="id" size="40" type="text" maxlength="1000">
|
@@ -369,7 +467,7 @@ BioFetch interface to
|
|
369
467
|
|
370
468
|
<H2>Direct access</H2>
|
371
469
|
|
372
|
-
<P
|
470
|
+
<P><%= base_url %>?format=(default|fasta|...);style=(html|raw);db=(nuccore|embl|...);id=ID[,ID,ID,...]</P>
|
373
471
|
<P>(NOTE: the option separator ';' can be '&')</P>
|
374
472
|
|
375
473
|
<DL>
|
@@ -380,7 +478,7 @@ BioFetch interface to
|
|
380
478
|
<DD> html|raw
|
381
479
|
|
382
480
|
<DT> <U>db</U> (required)
|
383
|
-
<DD>
|
481
|
+
<DD> <%= databases.join('|') %>
|
384
482
|
|
385
483
|
<DT> <U>id</U> (required)
|
386
484
|
<DD> comma separated list of IDs
|
@@ -392,66 +490,65 @@ BioFetch interface to
|
|
392
490
|
|
393
491
|
<DL>
|
394
492
|
<DT> <A href="?info=dbs">What databases are available?</A>
|
395
|
-
<DD>
|
493
|
+
<DD> <%= base_url %>?info=dbs
|
396
494
|
|
397
495
|
<DT> <A href="?info=formats;db=embl">What formats does the database X have?</A>
|
398
|
-
<DD>
|
496
|
+
<DD> <%= base_url %>?info=formats;db=embl
|
399
497
|
|
400
498
|
<DT> <A href="?info=maxids">How many entries can be retrieved simultaneously?</A>
|
401
|
-
<DD>
|
499
|
+
<DD> <%= base_url %>?info=maxids
|
402
500
|
</DL>
|
403
501
|
|
404
502
|
<H2>Examples</H2>
|
405
503
|
|
406
504
|
<DL>
|
407
|
-
<DT> <A href="?format=default;style=raw;db=
|
408
|
-
<DD>
|
505
|
+
<DT> <A href="?format=default;style=raw;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (default/raw)
|
506
|
+
<DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=AJ617376
|
409
507
|
|
410
|
-
<DT> <A href="?format=fasta;style=raw;db=
|
411
|
-
<DD>
|
508
|
+
<DT> <A href="?format=fasta;style=raw;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (fasta/raw)
|
509
|
+
<DD> <%= base_url %>?format=fasta;style=raw;db=nuccore;id=AJ617376
|
412
510
|
|
413
|
-
<DT> <A href="?format=default;style=html;db=
|
414
|
-
<DD>
|
511
|
+
<DT> <A href="?format=default;style=html;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (default/html)
|
512
|
+
<DD> <%= base_url %>?format=default;style=html;db=nuccore;id=AJ617376
|
415
513
|
|
416
|
-
<DT> <A href="?format=default;style=raw;db=
|
417
|
-
<DD>
|
514
|
+
<DT> <A href="?format=default;style=raw;db=nuccore;id=AJ617376,AJ617377">nuccore/AJ617376,AJ617377</A> (default/raw, multiple)
|
515
|
+
<DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=AJ617376,AJ617377
|
418
516
|
|
419
|
-
<DT> <A href="?format=default;style=raw;db=embl;id=
|
420
|
-
<DD>
|
517
|
+
<DT> <A href="?format=default;style=raw;db=embl;id=J00231">embl/J00231</A> (default/raw)
|
518
|
+
<DD> <%= base_url %>?format=default;style=raw;db=embl;id=J00231
|
421
519
|
|
422
|
-
<DT> <A href="?format=default;style=raw;db=
|
423
|
-
<DD>
|
520
|
+
<DT> <A href="?format=default;style=raw;db=uniprot;id=CYC_BOVIN">uniprot/CYC_BOVIN</A> (default/raw)
|
521
|
+
<DD> <%= base_url %>?format=default;style=raw;db=uniprot;id=CYC_BOVIN
|
424
522
|
|
425
|
-
<DT> <A href="?format=fasta;style=raw;db=
|
426
|
-
<DD>
|
523
|
+
<DT> <A href="?format=fasta;style=raw;db=uniprot;id=CYC_BOVIN">uniprot/CYC_BOVIN</A> (fasta/raw)
|
524
|
+
<DD> <%= base_url %>?format=fasta;style=raw;db=uniprot;id=CYC_BOVIN
|
427
525
|
|
428
|
-
<DT> <A href="?format=default;style=raw;db=genes;id=
|
429
|
-
<DD>
|
526
|
+
<DT> <A href="?format=default;style=raw;db=genes;id=eco%3Ab0015">genes/eco:b0015</A> (default/raw)
|
527
|
+
<DD> <%= base_url %>?format=default;style=raw;db=genes;id=eco%3Ab0015
|
528
|
+
<DD> <%= base_url %>?format=default;style=raw;db=genes;id=eco:b0015
|
430
529
|
|
431
|
-
<DT> <A href="?format=default;style=raw;db=prosite;id=PS00028">ps:PS00028</A> (default/raw)
|
432
|
-
<DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=prosite;id=PS00028
|
433
530
|
</DL>
|
434
531
|
|
435
532
|
<H2>Errors</H2>
|
436
533
|
|
437
534
|
<DL>
|
438
535
|
<DT> <A href="?format=default;style=raw;db=nonexistent;id=AJ617376">Error1</A> sample : DB not found
|
439
|
-
<DD>
|
536
|
+
<DD> <%= base_url %>?format=default;style=raw;db=nonexistent;id=AJ617376
|
440
537
|
|
441
|
-
<DT> <A href="?format=default;style=nonexistent;db=
|
442
|
-
<DD>
|
538
|
+
<DT> <A href="?format=default;style=nonexistent;db=nuccore;id=AJ617376">Error2</A> sample : unknown style
|
539
|
+
<DD> <%= base_url %>?format=default;style=nonexistent;db=nuccore;id=AJ617376
|
443
540
|
|
444
|
-
<DT> <A href="?format=nonexistent;style=raw;db=
|
445
|
-
<DD>
|
541
|
+
<DT> <A href="?format=nonexistent;style=raw;db=nuccore;id=AJ617376">Error3</A> sample : unknown format
|
542
|
+
<DD> <%= base_url %>?format=nonexistent;style=raw;db=nuccore;id=AJ617376
|
446
543
|
|
447
|
-
<DT> <A href="?format=default;style=raw;db=
|
448
|
-
<DD>
|
544
|
+
<DT> <A href="?format=default;style=raw;db=nuccore;id=nonexistent">Error4</A> sample : ID not found
|
545
|
+
<DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=nonexistent
|
449
546
|
|
450
547
|
<DT> <A href="?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51">Error5</A> sample : too many IDs
|
451
|
-
<DD>
|
548
|
+
<DD> <%= base_url %>?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
|
452
549
|
|
453
550
|
<DT> <A href="?info=nonexistent">Error6</A> sample : unknown info
|
454
|
-
<DD>
|
551
|
+
<DD> <%= base_url %>?info=nonexistent"
|
455
552
|
</DL>
|
456
553
|
|
457
554
|
<H2>Other BioFetch implementations</H2>
|