bio 1.4.3.0001 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +39 -33
  3. data/BSDL +22 -0
  4. data/COPYING +2 -2
  5. data/COPYING.ja +36 -36
  6. data/ChangeLog +2404 -1025
  7. data/KNOWN_ISSUES.rdoc +15 -55
  8. data/README.rdoc +17 -23
  9. data/RELEASE_NOTES.rdoc +246 -183
  10. data/Rakefile +3 -2
  11. data/bin/br_biofetch.rb +29 -5
  12. data/bioruby.gemspec +15 -32
  13. data/bioruby.gemspec.erb +10 -20
  14. data/doc/ChangeLog-1.4.3 +1478 -0
  15. data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
  16. data/doc/Tutorial.rd +0 -6
  17. data/doc/Tutorial.rd.html +7 -12
  18. data/doc/Tutorial.rd.ja +960 -1064
  19. data/doc/Tutorial.rd.ja.html +977 -1067
  20. data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
  21. data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
  22. data/gemfiles/Gemfile.travis-rbx +13 -0
  23. data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
  24. data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
  25. data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
  26. data/lib/bio.rb +10 -43
  27. data/lib/bio/alignment.rb +8 -14
  28. data/lib/bio/appl/blast.rb +1 -2
  29. data/lib/bio/appl/blast/format0.rb +18 -7
  30. data/lib/bio/appl/blast/remote.rb +0 -9
  31. data/lib/bio/appl/blast/report.rb +1 -1
  32. data/lib/bio/appl/clustalw/report.rb +3 -1
  33. data/lib/bio/appl/genscan/report.rb +1 -2
  34. data/lib/bio/appl/iprscan/report.rb +1 -2
  35. data/lib/bio/appl/meme/mast.rb +4 -4
  36. data/lib/bio/appl/meme/mast/report.rb +1 -1
  37. data/lib/bio/appl/paml/codeml.rb +2 -2
  38. data/lib/bio/appl/paml/codeml/report.rb +1 -0
  39. data/lib/bio/appl/paml/common.rb +1 -1
  40. data/lib/bio/appl/sosui/report.rb +1 -2
  41. data/lib/bio/command.rb +62 -2
  42. data/lib/bio/data/aa.rb +13 -31
  43. data/lib/bio/data/codontable.rb +1 -2
  44. data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
  45. data/lib/bio/db/biosql/sequence.rb +1 -1
  46. data/lib/bio/db/embl/common.rb +1 -1
  47. data/lib/bio/db/embl/embl.rb +5 -4
  48. data/lib/bio/db/embl/format_embl.rb +3 -3
  49. data/lib/bio/db/embl/sptr.rb +9 -1444
  50. data/lib/bio/db/embl/swissprot.rb +12 -29
  51. data/lib/bio/db/embl/trembl.rb +13 -30
  52. data/lib/bio/db/embl/uniprot.rb +12 -29
  53. data/lib/bio/db/embl/uniprotkb.rb +1455 -0
  54. data/lib/bio/db/fasta.rb +17 -0
  55. data/lib/bio/db/fasta/defline.rb +1 -3
  56. data/lib/bio/db/fastq.rb +1 -1
  57. data/lib/bio/db/genbank/ddbj.rb +9 -5
  58. data/lib/bio/db/genbank/refseq.rb +11 -3
  59. data/lib/bio/db/gff.rb +3 -4
  60. data/lib/bio/db/go.rb +5 -6
  61. data/lib/bio/db/kegg/module.rb +4 -5
  62. data/lib/bio/db/kegg/pathway.rb +4 -5
  63. data/lib/bio/db/kegg/reaction.rb +1 -1
  64. data/lib/bio/db/nexus.rb +3 -2
  65. data/lib/bio/db/pdb/pdb.rb +2 -2
  66. data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
  67. data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
  68. data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
  69. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
  70. data/lib/bio/db/transfac.rb +1 -1
  71. data/lib/bio/io/das.rb +40 -41
  72. data/lib/bio/io/fastacmd.rb +0 -16
  73. data/lib/bio/io/fetch.rb +111 -55
  74. data/lib/bio/io/flatfile/buffer.rb +4 -5
  75. data/lib/bio/io/hinv.rb +2 -3
  76. data/lib/bio/io/ncbirest.rb +43 -6
  77. data/lib/bio/io/pubmed.rb +76 -81
  78. data/lib/bio/io/togows.rb +33 -10
  79. data/lib/bio/map.rb +1 -1
  80. data/lib/bio/pathway.rb +1 -1
  81. data/lib/bio/sequence/compat.rb +1 -1
  82. data/lib/bio/sequence/na.rb +63 -12
  83. data/lib/bio/shell.rb +0 -2
  84. data/lib/bio/shell/core.rb +5 -6
  85. data/lib/bio/shell/interface.rb +3 -4
  86. data/lib/bio/shell/irb.rb +1 -2
  87. data/lib/bio/shell/plugin/entry.rb +2 -3
  88. data/lib/bio/shell/plugin/seq.rb +7 -6
  89. data/lib/bio/shell/setup.rb +1 -2
  90. data/lib/bio/tree.rb +2 -2
  91. data/lib/bio/util/contingency_table.rb +0 -2
  92. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
  93. data/lib/bio/util/sirna.rb +76 -16
  94. data/lib/bio/version.rb +8 -9
  95. data/sample/benchmark_clustalw_report.rb +47 -0
  96. data/sample/biofetch.rb +248 -151
  97. data/setup.rb +6 -7
  98. data/test/data/clustalw/example1-seqnos.aln +58 -0
  99. data/test/network/bio/appl/blast/test_remote.rb +1 -15
  100. data/test/network/bio/appl/test_blast.rb +0 -12
  101. data/test/network/bio/io/test_pubmed.rb +49 -0
  102. data/test/network/bio/io/test_togows.rb +0 -1
  103. data/test/network/bio/test_command.rb +65 -2
  104. data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
  105. data/test/unit/bio/appl/blast/test_report.rb +110 -48
  106. data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
  107. data/test/unit/bio/appl/sim4/test_report.rb +46 -17
  108. data/test/unit/bio/appl/test_blast.rb +2 -2
  109. data/test/unit/bio/db/embl/test_embl.rb +0 -1
  110. data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
  111. data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
  112. data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
  113. data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
  114. data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
  115. data/test/unit/bio/db/test_fasta.rb +41 -1
  116. data/test/unit/bio/db/test_fastq.rb +14 -4
  117. data/test/unit/bio/db/test_gff.rb +2 -2
  118. data/test/unit/bio/db/test_phyloxml.rb +30 -30
  119. data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
  120. data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
  121. data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
  122. data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
  123. data/test/unit/bio/io/test_togows.rb +3 -2
  124. data/test/unit/bio/sequence/test_dblink.rb +1 -1
  125. data/test/unit/bio/sequence/test_na.rb +3 -1
  126. data/test/unit/bio/test_alignment.rb +1 -2
  127. data/test/unit/bio/test_command.rb +5 -4
  128. data/test/unit/bio/test_db.rb +4 -2
  129. data/test/unit/bio/test_pathway.rb +25 -10
  130. data/test/unit/bio/util/test_sirna.rb +22 -22
  131. metadata +656 -1430
  132. data/doc/KEGG_API.rd +0 -1843
  133. data/doc/KEGG_API.rd.ja +0 -1834
  134. data/extconf.rb +0 -2
  135. data/lib/bio/appl/blast/ddbj.rb +0 -131
  136. data/lib/bio/db/kegg/taxonomy.rb +0 -280
  137. data/lib/bio/io/dbget.rb +0 -194
  138. data/lib/bio/io/ddbjrest.rb +0 -344
  139. data/lib/bio/io/ddbjxml.rb +0 -458
  140. data/lib/bio/io/ebisoap.rb +0 -158
  141. data/lib/bio/io/ensembl.rb +0 -229
  142. data/lib/bio/io/higet.rb +0 -73
  143. data/lib/bio/io/keggapi.rb +0 -363
  144. data/lib/bio/io/ncbisoap.rb +0 -156
  145. data/lib/bio/io/soapwsdl.rb +0 -119
  146. data/lib/bio/shell/plugin/keggapi.rb +0 -181
  147. data/lib/bio/shell/plugin/soap.rb +0 -87
  148. data/sample/dbget +0 -37
  149. data/sample/demo_ddbjxml.rb +0 -212
  150. data/sample/demo_kegg_taxonomy.rb +0 -92
  151. data/sample/demo_keggapi.rb +0 -502
  152. data/sample/psortplot_html.rb +0 -214
  153. data/test/network/bio/io/test_ddbjrest.rb +0 -47
  154. data/test/network/bio/io/test_ensembl.rb +0 -230
  155. data/test/network/bio/io/test_soapwsdl.rb +0 -53
  156. data/test/unit/bio/io/test_ddbjxml.rb +0 -81
  157. data/test/unit/bio/io/test_ensembl.rb +0 -111
  158. data/test/unit/bio/io/test_soapwsdl.rb +0 -33
@@ -10,20 +10,19 @@
10
10
  module Bio
11
11
 
12
12
  # BioRuby version (Array containing Integer)
13
- BIORUBY_VERSION = [1, 4, 3].extend(Comparable).freeze
13
+ BIORUBY_VERSION = [1, 5, 0].extend(Comparable).freeze
14
14
 
15
15
  # Extra version specifier (String or nil).
16
- # Existance of the value indicates pre-release version or modified version.
16
+ # Existance of the value indicates development version.
17
17
  #
18
18
  # nil :: Release version.
19
- # ".0000"..".4999" :: Release version with patches.
20
- # ".5000" :: Development unstable version.
21
- # ".5001"..".8999" :: Pre-alpha version.
22
- # "-alphaN" (N=0..99) :: Alpha version.
23
- # "-preN" (N=0..99) :: Pre-release test version.
24
- # "-rcN" (N=0..99) :: Release candidate version.
19
+ # "-dev" :: Development version (with YYYYMMDD digits).
20
+ # ".20150630" :: Development version (specify the date digits).
25
21
  #
26
- BIORUBY_EXTRA_VERSION = ".0001"
22
+ # By default, if the third digit (teeny) of BIORUBY_VERSION is 0,
23
+ # the version is regarded as a development version.
24
+ BIORUBY_EXTRA_VERSION =
25
+ nil #(BIORUBY_VERSION[2] == 0) ? "-dev" : nil
27
26
 
28
27
  # Version identifier, including extra version string (String)
29
28
  # Unlike BIORUBY_VERSION, it is not comparable.
@@ -0,0 +1,47 @@
1
+ #
2
+ # = sample/benchmark_clustalw_report.rb - Benchmark tests for Bio::ClustalW::Report
3
+ #
4
+ # Copyright:: Copyright (C) 2013
5
+ # Andrew Grimm <andrew.j.grimm@gmail.com>
6
+ # License:: The Ruby License
7
+
8
+ require 'pathname'
9
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 1, "test",
10
+ 'bioruby_test_helper.rb')).cleanpath.to_s
11
+
12
+ require 'benchmark'
13
+ require 'bio'
14
+
15
+ class BenchmarkClustalWReport
16
+
17
+ DataDir = File.join(BioRubyTestDataPath, 'clustalw')
18
+ Filenames = [ 'example1.aln', 'example1-seqnos.aln' ]
19
+
20
+ def self.benchmark_clustalw_report
21
+ Filenames.each do |fn|
22
+ print "\n", fn, "\n"
23
+ fullpath = File.join(DataDir, fn)
24
+ self.new(fullpath).benchmark
25
+ end
26
+ end
27
+
28
+ def initialize(aln_filename)
29
+ @text = File.open(aln_filename, 'rb') { |f| f.read }
30
+ @text.freeze
31
+ end
32
+
33
+ def benchmark
34
+ GC.start
35
+ Benchmark.bmbm do |x|
36
+ x.report do
37
+ for i in 1...10_000
38
+ aln = Bio::ClustalW::Report.new(@text)
39
+ aln.alignment
40
+ end
41
+ end
42
+ end
43
+ end
44
+
45
+ end #class BenchmarkClustalWReport
46
+
47
+ BenchmarkClustalWReport.benchmark_clustalw_report
@@ -1,8 +1,10 @@
1
- #!/usr/proj/bioruby/bin/ruby
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
2
3
  #
3
- # biofetch.rb : BioFetch server (interface to GenomeNet/DBGET via KEGG API)
4
+ # biofetch.rb : BioFetch server (interface to TogoWS)
4
5
  #
5
6
  # Copyright (C) 2002-2004 KATAYAMA Toshiaki <k@bioruby.org>
7
+ # 2013 GOTO Naohisa <ng@bioruby.org>
6
8
  #
7
9
  # This program is free software; you can redistribute it and/or modify
8
10
  # it under the terms of the GNU General Public License as published by
@@ -18,15 +20,97 @@
18
20
  # along with this program; if not, write to the Free Software
19
21
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
22
  #
21
- # $Id: biofetch.rb,v 1.16 2005/08/07 10:02:41 k Exp $
22
23
  #
23
24
 
24
25
  require 'cgi'
25
- require 'html/template'
26
- require 'bio/io/keggapi'
26
+ require 'erb'
27
+ require 'open-uri'
28
+ require 'fileutils'
29
+ require 'tempfile'
27
30
 
28
31
  MAX_ID_NUM = 50
29
32
 
33
+ # script name
34
+ SCRIPT_NAME = File.basename(__FILE__)
35
+
36
+ # full URL for this CGI
37
+ BASE_URL = "http://bioruby.org/cgi-bin/#{SCRIPT_NAME}"
38
+
39
+ # cache directory for metadata
40
+ # Note: The cache is only for metadata (database list and format list).
41
+ # Data entries are NOT cached.
42
+ CACHE_DIR = '/tmp/biofetch_rb.cache'
43
+
44
+ # cache lifetime
45
+ CACHE_LIFETIME = 60 * 60 # 1 hour
46
+
47
+ module TogoWS
48
+ TOGOWS_URL = 'http://togows.dbcls.jp/'
49
+
50
+ def togows_database_complete_list
51
+ result = togows_get_cached('/entry/')
52
+ result.to_s.split(/\n/).collect {|x| x.split(/\t/) }
53
+ end
54
+
55
+ def togows_database_formats(db)
56
+ db = CGI.escape(db)
57
+ result = togows_get_cached("/entry/#{db}/?formats")
58
+ end
59
+
60
+ def togows_get(path)
61
+ uristr = TOGOWS_URL + path
62
+ begin
63
+ result = OpenURI.open_uri(uristr).read
64
+ rescue OpenURI::HTTPError
65
+ result = nil
66
+ end
67
+ result
68
+ end
69
+
70
+ private
71
+
72
+ def togows_get_cached(path)
73
+ filepath = path.sub(/\A\//, '').sub(/\/\z/, '')
74
+ filepath = filepath.gsub(/\//, " ")
75
+ filepath = filepath.sub(/\?/, '_')
76
+ filepath = File.join(CACHE_DIR, filepath)
77
+ result = nil
78
+ begin
79
+ if Time.now - File.mtime(filepath) > CACHE_LIFETIME
80
+ # delete expired cache file
81
+ File.delete(filepath)
82
+ end
83
+ result = File.read(filepath)
84
+ rescue IOError, SystemCallError
85
+ result = nil
86
+ end
87
+ unless result then
88
+ # valid cache is not found
89
+ result = togows_get(path)
90
+ if result then
91
+ # create cache directory if not found
92
+ FileUtils.mkdir_p(CACHE_DIR, :mode => 0700)
93
+ # simple security check for the cache dir
94
+ if File.stat(CACHE_DIR).mode & 0022 != 0 then
95
+ raise SecurityError, "CACHE_DIR #{CACHE_DIR} is writeable by others"
96
+ end
97
+
98
+ # write to temporary file
99
+ tmp = Tempfile.open('temp', CACHE_DIR)
100
+ tmp.print result
101
+ tmp.close
102
+ # create a hard link from the temporary to the cache file
103
+ begin
104
+ File.link(tmp.path, filepath)
105
+ rescue IOError, SystemCallError
106
+ end
107
+ # the temporay file will be automatically removed at exit
108
+ end
109
+ end
110
+ result
111
+ end
112
+
113
+ end #module TogoWS
30
114
 
31
115
  module BioFetchError
32
116
 
@@ -36,32 +120,50 @@ module BioFetchError
36
120
  exit
37
121
  end
38
122
 
123
+ def print_html_page(str)
124
+ print "Content-type: text/html; charset=UTF-8\n\n"
125
+ print "<pre>", CGI.escapeHTML(str), "</pre>\n"
126
+ exit
127
+ end
128
+
39
129
  def error1(db)
130
+ db = CGI.escapeHTML(db.to_s) # to avoid potential XSS with old IE
40
131
  str = "ERROR 1 Unknown database [#{db}]."
41
132
  print_text_page(str)
42
133
  end
43
134
 
44
135
  def error2(style)
136
+ style = CGI.escapeHTML(style.to_s) # to avoid potential XSS with old IE
45
137
  str = "ERROR 2 Unknown style [#{style}]."
46
138
  print_text_page(str)
47
139
  end
48
140
 
49
141
  def error3(format, db)
142
+ # to avoid potential XSS with old IE which ignores Content-Type
143
+ db = CGI.escapeHTML(db.to_s)
144
+ format = CGI.escapeHTML(format.to_s)
50
145
  str = "ERROR 3 Format [#{format}] not known for database [#{db}]."
51
146
  print_text_page(str)
52
147
  end
53
148
 
54
149
  def error4(entry_id, db)
150
+ # to avoid potential XSS with old IE which ignores Content-Type
151
+ entry_id = CGI.escapeHTML(entry_id.to_s)
152
+ db = CGI.escapeHTML(db.to_s)
55
153
  str = "ERROR 4 ID [#{entry_id}] not found in database [#{db}]."
56
154
  print_text_page(str)
57
155
  end
58
156
 
59
157
  def error5(count)
158
+ # to avoid potential XSS with old IE which ignores Content-Type
159
+ count = CGI.escapeHTML(count.to_s)
60
160
  str = "ERROR 5 Too many IDs [#{count}]. Max [#{MAX_ID_NUM}] allowed."
61
161
  print_text_page(str)
62
162
  end
63
163
 
64
164
  def error6(info)
165
+ # to avoid potential XSS with old IE which ignores Content-Type
166
+ count = CGI.escapeHTML(info.to_s)
65
167
  str = "ERROR 6 Illegal information request [#{info}]."
66
168
  print_text_page(str)
67
169
  end
@@ -70,23 +172,35 @@ end
70
172
 
71
173
 
72
174
 
73
- module KeggAPI
175
+ module ApiBridge
74
176
 
75
177
  include BioFetchError
178
+ include TogoWS
179
+
180
+ def list_databases_with_synonyms
181
+ togows_database_complete_list
182
+ end
76
183
 
77
184
  def list_databases
78
- serv = Bio::KEGG::API.new
79
- results = serv.list_databases
80
- results.collect {|x| x.entry_id}
185
+ list_databases_with_synonyms.flatten
81
186
  end
82
187
 
83
188
  def bget(db, id_list, format)
84
- serv = Bio::KEGG::API.new
189
+ case format
190
+ when 'fasta'
191
+ format = '.fasta'
192
+ else
193
+ format = ''
194
+ end
195
+ db = CGI.escape(db)
196
+
85
197
  results = ''
86
198
  id_list.each do |query_id|
87
- entry_id = "#{db}:#{query_id}"
88
- result = serv.get_entries([entry_id])
89
- if result.empty?
199
+ query_id = CGI.escape(query_id)
200
+ path = "/entry/#{db}/#{query_id}#{format}"
201
+ result = togows_get(path)
202
+
203
+ if !result or result.empty? or /\AError\: / =~ result then
90
204
  error4(query_id, db)
91
205
  else
92
206
  results << result
@@ -95,78 +209,41 @@ module KeggAPI
95
209
  return results
96
210
  end
97
211
 
98
- end
99
-
100
-
101
-
102
-
103
- class BioFetch
104
-
105
- include BioFetchError
106
- include KeggAPI
107
-
108
- def initialize(db, id_list, style, format)
109
- check_style(style)
110
- check_format(format, db)
111
- check_number_of_id(id_list.length)
112
- check_dbname(db)
212
+ def check_fasta_ok?(db)
213
+ result = togows_database_formats(db)
214
+ /^fasta$/ =~ result.to_s
215
+ end
113
216
 
114
- if /html/.match(style)
115
- goto_html_style_page(db, id_list, format)
116
- end
217
+ end #module ApiBridge
117
218
 
118
- entries = bget(db, id_list, format)
219
+ module BioFetchCheck
119
220
 
120
- if /fasta/.match(format) and entries
121
- entries = convert_to_fasta_format(entries, db)
122
- end
123
-
124
- print_text_page(entries)
125
- end
221
+ include ApiBridge
126
222
 
127
223
  private
128
224
 
129
- def convert_to_fasta_format(str, db)
130
- require 'bio'
131
- require 'stringio'
132
-
133
- fasta = Array.new
134
-
135
- entries = StringIO.new(str)
136
- Bio::FlatFile.auto(entries) do |ff|
137
- ff.each do |entry|
138
- seq = nil
139
- if entry.respond_to?(:seq)
140
- seq = entry.seq
141
- elsif entry.respond_to?(:aaseq)
142
- seq = entry.aaseq
143
- elsif entry.respond_to?(:naseq)
144
- seq = entry.naseq
145
- end
146
- if seq
147
- entry_id = entry.respond_to?(:entry_id) ? entry.entry_id : ''
148
- definition = entry.respond_to?(:definition) ? entry.definition : ''
149
- fasta << seq.to_fasta("#{db}:#{entry_id} #{definition}", 60)
150
- end
151
- end
152
- end
153
- return fasta.join
154
- end
155
-
156
- def goto_html_style_page(db, id_list, format)
157
- url = "http://www.genome.jp/dbget-bin/www_bget"
158
- opt = '-f+' if /fasta/.match(format)
159
- ids = id_list.join('%2B')
160
- print "Location: #{url}?#{opt}#{db}+#{ids}\n\n"
161
- exit
162
- end
163
-
164
225
  def check_style(style)
165
- error2(style) unless /html|raw/.match(style)
226
+ style = style.to_s.downcase
227
+ error2(style) unless /\A(html|raw)\z/.match(style)
228
+ style
166
229
  end
167
230
 
168
231
  def check_format(format, db)
169
- error3(format, db) if format && ! /fasta|default/.match(format)
232
+ fmt = format ? format.to_s.downcase : nil
233
+ case fmt
234
+ when 'fasta'
235
+ db = check_dbname(db)
236
+ fmt = nil unless check_fasta_ok?(db)
237
+ when 'default'
238
+ # do nothing
239
+ when nil
240
+ fmt = 'default'
241
+ else
242
+ fmt = nil
243
+ end
244
+
245
+ error3(format, db) unless fmt
246
+ fmt
170
247
  end
171
248
 
172
249
  def check_number_of_id(num)
@@ -174,22 +251,50 @@ class BioFetch
174
251
  end
175
252
 
176
253
  def check_dbname(db)
254
+ db = db.to_s.downcase
177
255
  error1(db) unless list_databases.include?(db)
256
+ db
178
257
  end
179
258
 
180
- end
259
+ end #module BioFetchCheck
260
+
261
+ class BioFetch
262
+
263
+ include BioFetchCheck
264
+ include BioFetchError
265
+ include ApiBridge
266
+
267
+ def initialize(db, id_list, style, format)
268
+ style = check_style(style)
269
+ format = check_format(format, db)
270
+ check_number_of_id(id_list.length)
271
+ db = check_dbname(db)
272
+
273
+ entries = bget(db, id_list, format)
274
+
275
+ if style == 'html' then
276
+ print_html_page(entries)
277
+ else
278
+ print_text_page(entries)
279
+ end
280
+
281
+ end
282
+
283
+ end #class BioFetch
181
284
 
182
285
 
183
286
 
184
287
  class BioFetchInfo
185
288
 
289
+ include BioFetchCheck
186
290
  include BioFetchError
187
- include KeggAPI
291
+ include ApiBridge
188
292
 
189
293
  def initialize(info, db)
190
294
  @db = db
295
+
191
296
  begin
192
- send(info)
297
+ check_info(info) ? __send__(info) : raise
193
298
  rescue
194
299
  error6(info)
195
300
  end
@@ -197,13 +302,18 @@ class BioFetchInfo
197
302
 
198
303
  private
199
304
 
305
+ def check_info(meth_name)
306
+ /\A(dbs|formats|maxids)\z/ =~ meth_name
307
+ end
308
+
200
309
  def dbs
201
310
  str = list_databases.sort.join(' ')
202
311
  print_text_page(str)
203
312
  end
204
313
 
205
314
  def formats
206
- fasta = " fasta" if check_fasta_ok
315
+ db = check_dbname(@db)
316
+ fasta = " fasta" if check_fasta_ok?(db)
207
317
  str = "default#{fasta}"
208
318
  print_text_page(str)
209
319
  end
@@ -213,17 +323,14 @@ class BioFetchInfo
213
323
  print_text_page(str)
214
324
  end
215
325
 
216
- def check_fasta_ok
217
- # sequence databases supported by Bio::FlatFile.auto
218
- /genes|gb|genbank|genpept|rs|refseq|emb|sp|swiss|pir/.match(@db)
219
- end
220
-
221
- end
326
+ end #class BioFetchInfo
222
327
 
223
328
 
224
329
 
225
330
  class BioFetchCGI
226
331
 
332
+ include ApiBridge
333
+
227
334
  def initialize(cgi)
228
335
  @cgi = cgi
229
336
  show_page
@@ -244,11 +351,14 @@ class BioFetchCGI
244
351
  end
245
352
 
246
353
  def show_query_page
247
- html = HTML::Template.new
248
- html.set_html(DATA.read)
249
- html.param('max_id_num' => MAX_ID_NUM)
250
- @cgi.out do
251
- html.output
354
+ html = ERB.new(DATA.read)
355
+ max_id_num = MAX_ID_NUM
356
+ databases_with_synonyms = list_databases_with_synonyms
357
+ databases = list_databases
358
+ script_name = SCRIPT_NAME
359
+ base_url = BASE_URL
360
+ @cgi.out({ "type" => "text/html", "charset" => "utf-8" }) do
361
+ html.result(binding)
252
362
  end
253
363
  end
254
364
 
@@ -269,7 +379,7 @@ class BioFetchCGI
269
379
  end
270
380
 
271
381
  def id_list
272
- @cgi['id'].split(/\W/) # not only ','
382
+ @cgi['id'].strip.split(/[\,\s]+/)
273
383
  end
274
384
 
275
385
  def style
@@ -295,9 +405,8 @@ BioFetchCGI.new(CGI.new)
295
405
  This program was created during BioHackathon 2002, Tucson and updated
296
406
  in Cape Town :)
297
407
 
298
- Rewrited in 2004 to use KEGG API as the bioruby.org server left from Kyoto
299
- University (where DBGET runs) and the old version could not run without
300
- having internally accessible DBGET server.
408
+ Rewrited in 2013 to use TogoWS API as the bioruby.org server left from The
409
+ University of Tokyo and the old SOAP-based KEGG API is discontinued.
301
410
 
302
411
  =end
303
412
 
@@ -308,7 +417,7 @@ __END__
308
417
  <HEAD>
309
418
  <LINK href="http://bioruby.org/img/favicon.png" rel="icon" type="image/png">
310
419
  <LINK href="http://bioruby.org/css/bioruby.css" rel="stylesheet" type="text/css">
311
- <TITLE>BioFetch interface to GenomeNet/DBGET</TITLE>
420
+ <TITLE>BioFetch interface to TogoWS</TITLE>
312
421
  </HEAD>
313
422
 
314
423
  <BODY bgcolor="#ffffff">
@@ -316,37 +425,26 @@ __END__
316
425
  <H1>
317
426
  <IMG src="http://bioruby.org/img/ruby.png" align="middle">
318
427
  BioFetch interface to
319
- <A href="http://www.genome.jp/dbget/">GenomeNet/DBGET</A>
428
+ <A href="http://togows.dbcls.jp/">TogoWS</A>
320
429
  </H1>
321
430
 
322
- <P>This page allows you to retrieve up to <!var:max_id_num> entries at a time from various up-to-date biological databases.</P>
431
+ <P>This page allows you to retrieve up to <%= max_id_num %> entries at a time from various up-to-date biological databases.</P>
323
432
 
324
433
  <HR>
325
434
 
326
- <FORM METHOD="post" ENCTYPE="application/x-www-form-urlencoded" action="biofetch.rb">
435
+ <FORM METHOD="post" ENCTYPE="application/x-www-form-urlencoded" action="<%= script_name %>">
327
436
 
328
437
  <SELECT name="db">
329
- <OPTION value="genbank">GenBank</OPTION>
330
- <OPTION value="refseq">RefSeq</OPTION>
331
- <OPTION value="embl">EMBL</OPTION>
332
- <OPTION value="swissprot">Swiss-Prot</OPTION>
333
- <OPTION value="pir">PIR</OPTION>
334
- <OPTION value="prf">PRF</OPTION>
335
- <OPTION value="pdb">PDB</OPTION>
336
- <OPTION value="pdbstr">PDBSTR</OPTION>
337
- <OPTION value="epd">EPD</OPTION>
338
- <OPTION value="transfac">TRANSFAC</OPTION>
339
- <OPTION value="prosite">PROSITE</OPTION>
340
- <OPTION value="pmd">PMD</OPTION>
341
- <OPTION value="litdb">LITDB</OPTION>
342
- <OPTION value="omim">OMIM</OPTION>
343
- <OPTION value="ligand">KEGG/LIGAND</OPTION>
344
- <OPTION value="pathway">KEGG/PATHWAY</OPTION>
345
- <OPTION value="brite">KEGG/BRITE</OPTION>
346
- <OPTION value="genes">KEGG/GENES</OPTION>
347
- <OPTION value="genome">KEGG/GENOME</OPTION>
348
- <OPTION value="linkdb">LinkDB</OPTION>
349
- <OPTION value="aaindex">AAindex</OPTION>
438
+ <% databases_with_synonyms.each do |dbs|
439
+ a = dbs[1..-1]
440
+ synonyms = unless a.empty? then
441
+ " (abbr: " + a.join(", ") + ")"
442
+ else
443
+ ""
444
+ end
445
+ %>
446
+ <OPTION value="<%= dbs[0] %>"><%= dbs[0] %><%= synonyms %></OPTION>
447
+ <% end %>
350
448
  </SELECT>
351
449
 
352
450
  <INPUT name="id" size="40" type="text" maxlength="1000">
@@ -369,7 +467,7 @@ BioFetch interface to
369
467
 
370
468
  <H2>Direct access</H2>
371
469
 
372
- <P>http://bioruby.org/cgi-bin/biofetch.rb?format=(default|fasta|...);style=(html|raw);db=(genbank|embl|...);id=ID[,ID,ID,...]</P>
470
+ <P><%= base_url %>?format=(default|fasta|...);style=(html|raw);db=(nuccore|embl|...);id=ID[,ID,ID,...]</P>
373
471
  <P>(NOTE: the option separator ';' can be '&')</P>
374
472
 
375
473
  <DL>
@@ -380,7 +478,7 @@ BioFetch interface to
380
478
  <DD> html|raw
381
479
 
382
480
  <DT> <U>db</U> (required)
383
- <DD> genbank|refseq|embl|swissprot|pir|prf|pdb|pdbstr|epd|transfac|prosite|pmd|litdb|omim|ligand|pathway|brite|genes|genome|linkdb|aaindex|...
481
+ <DD> <%= databases.join('|') %>
384
482
 
385
483
  <DT> <U>id</U> (required)
386
484
  <DD> comma separated list of IDs
@@ -392,66 +490,65 @@ BioFetch interface to
392
490
 
393
491
  <DL>
394
492
  <DT> <A href="?info=dbs">What databases are available?</A>
395
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=dbs
493
+ <DD> <%= base_url %>?info=dbs
396
494
 
397
495
  <DT> <A href="?info=formats;db=embl">What formats does the database X have?</A>
398
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=formats;db=embl
496
+ <DD> <%= base_url %>?info=formats;db=embl
399
497
 
400
498
  <DT> <A href="?info=maxids">How many entries can be retrieved simultaneously?</A>
401
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=maxids
499
+ <DD> <%= base_url %>?info=maxids
402
500
  </DL>
403
501
 
404
502
  <H2>Examples</H2>
405
503
 
406
504
  <DL>
407
- <DT> <A href="?format=default;style=raw;db=genbank;id=AJ617376">gb:AJ617376</A> (default/raw)
408
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376
505
+ <DT> <A href="?format=default;style=raw;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (default/raw)
506
+ <DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=AJ617376
409
507
 
410
- <DT> <A href="?format=fasta;style=raw;db=genbank;id=AJ617376">gb:AJ617376</A> (fasta/raw)
411
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=genbank;id=AJ617376
508
+ <DT> <A href="?format=fasta;style=raw;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (fasta/raw)
509
+ <DD> <%= base_url %>?format=fasta;style=raw;db=nuccore;id=AJ617376
412
510
 
413
- <DT> <A href="?format=default;style=html;db=genbank;id=AJ617376">gb:AJ617376</A> (default/html)
414
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=html;db=genbank;id=AJ617376
511
+ <DT> <A href="?format=default;style=html;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (default/html)
512
+ <DD> <%= base_url %>?format=default;style=html;db=nuccore;id=AJ617376
415
513
 
416
- <DT> <A href="?format=default;style=raw;db=genbank;id=AJ617376,AJ617377">gb:AJ617376,AJ617377</A> (default/raw, multiple)
417
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376,AJ617377
514
+ <DT> <A href="?format=default;style=raw;db=nuccore;id=AJ617376,AJ617377">nuccore/AJ617376,AJ617377</A> (default/raw, multiple)
515
+ <DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=AJ617376,AJ617377
418
516
 
419
- <DT> <A href="?format=default;style=raw;db=embl;id=BUM">embl:BUM</A> (default/raw)
420
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=embl;id=BUM
517
+ <DT> <A href="?format=default;style=raw;db=embl;id=J00231">embl/J00231</A> (default/raw)
518
+ <DD> <%= base_url %>?format=default;style=raw;db=embl;id=J00231
421
519
 
422
- <DT> <A href="?format=default;style=raw;db=swissprot;id=CYC_BOVIN">sp:CYC_BOVIN</A> (default/raw)
423
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=swissprot;id=CYC_BOVIN
520
+ <DT> <A href="?format=default;style=raw;db=uniprot;id=CYC_BOVIN">uniprot/CYC_BOVIN</A> (default/raw)
521
+ <DD> <%= base_url %>?format=default;style=raw;db=uniprot;id=CYC_BOVIN
424
522
 
425
- <DT> <A href="?format=fasta;style=raw;db=swissprot;id=CYC_BOVIN">sp:CYC_BOVIN</A> (fasta/raw)
426
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=swissprot;id=CYC_BOVIN
523
+ <DT> <A href="?format=fasta;style=raw;db=uniprot;id=CYC_BOVIN">uniprot/CYC_BOVIN</A> (fasta/raw)
524
+ <DD> <%= base_url %>?format=fasta;style=raw;db=uniprot;id=CYC_BOVIN
427
525
 
428
- <DT> <A href="?format=default;style=raw;db=genes;id=b0015">genes:b0015</A> (default/raw)
429
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genes;id=b0015
526
+ <DT> <A href="?format=default;style=raw;db=genes;id=eco%3Ab0015">genes/eco:b0015</A> (default/raw)
527
+ <DD> <%= base_url %>?format=default;style=raw;db=genes;id=eco%3Ab0015
528
+ <DD> <%= base_url %>?format=default;style=raw;db=genes;id=eco:b0015
430
529
 
431
- <DT> <A href="?format=default;style=raw;db=prosite;id=PS00028">ps:PS00028</A> (default/raw)
432
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=prosite;id=PS00028
433
530
  </DL>
434
531
 
435
532
  <H2>Errors</H2>
436
533
 
437
534
  <DL>
438
535
  <DT> <A href="?format=default;style=raw;db=nonexistent;id=AJ617376">Error1</A> sample : DB not found
439
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=nonexistent;id=AJ617376"
536
+ <DD> <%= base_url %>?format=default;style=raw;db=nonexistent;id=AJ617376
440
537
 
441
- <DT> <A href="?format=default;style=nonexistent;db=genbank;id=AJ617376">Error2</A> sample : unknown style
442
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=nonexistent;db=genbank;id=AJ617376"
538
+ <DT> <A href="?format=default;style=nonexistent;db=nuccore;id=AJ617376">Error2</A> sample : unknown style
539
+ <DD> <%= base_url %>?format=default;style=nonexistent;db=nuccore;id=AJ617376
443
540
 
444
- <DT> <A href="?format=nonexistent;style=raw;db=genbank;id=AJ617376">Error3</A> sample : unknown format
445
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=nonexistent;style=raw;db=genbank;id=AJ617376"
541
+ <DT> <A href="?format=nonexistent;style=raw;db=nuccore;id=AJ617376">Error3</A> sample : unknown format
542
+ <DD> <%= base_url %>?format=nonexistent;style=raw;db=nuccore;id=AJ617376
446
543
 
447
- <DT> <A href="?format=default;style=raw;db=genbank;id=nonexistent">Error4</A> sample : ID not found
448
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=nonexistent"
544
+ <DT> <A href="?format=default;style=raw;db=nuccore;id=nonexistent">Error4</A> sample : ID not found
545
+ <DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=nonexistent
449
546
 
450
547
  <DT> <A href="?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51">Error5</A> sample : too many IDs
451
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
548
+ <DD> <%= base_url %>?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
452
549
 
453
550
  <DT> <A href="?info=nonexistent">Error6</A> sample : unknown info
454
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=nonexistent"
551
+ <DD> <%= base_url %>?info=nonexistent"
455
552
  </DL>
456
553
 
457
554
  <H2>Other BioFetch implementations</H2>