bio 1.4.3.0001 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (158) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +39 -33
  3. data/BSDL +22 -0
  4. data/COPYING +2 -2
  5. data/COPYING.ja +36 -36
  6. data/ChangeLog +2404 -1025
  7. data/KNOWN_ISSUES.rdoc +15 -55
  8. data/README.rdoc +17 -23
  9. data/RELEASE_NOTES.rdoc +246 -183
  10. data/Rakefile +3 -2
  11. data/bin/br_biofetch.rb +29 -5
  12. data/bioruby.gemspec +15 -32
  13. data/bioruby.gemspec.erb +10 -20
  14. data/doc/ChangeLog-1.4.3 +1478 -0
  15. data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
  16. data/doc/Tutorial.rd +0 -6
  17. data/doc/Tutorial.rd.html +7 -12
  18. data/doc/Tutorial.rd.ja +960 -1064
  19. data/doc/Tutorial.rd.ja.html +977 -1067
  20. data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
  21. data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
  22. data/gemfiles/Gemfile.travis-rbx +13 -0
  23. data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
  24. data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
  25. data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
  26. data/lib/bio.rb +10 -43
  27. data/lib/bio/alignment.rb +8 -14
  28. data/lib/bio/appl/blast.rb +1 -2
  29. data/lib/bio/appl/blast/format0.rb +18 -7
  30. data/lib/bio/appl/blast/remote.rb +0 -9
  31. data/lib/bio/appl/blast/report.rb +1 -1
  32. data/lib/bio/appl/clustalw/report.rb +3 -1
  33. data/lib/bio/appl/genscan/report.rb +1 -2
  34. data/lib/bio/appl/iprscan/report.rb +1 -2
  35. data/lib/bio/appl/meme/mast.rb +4 -4
  36. data/lib/bio/appl/meme/mast/report.rb +1 -1
  37. data/lib/bio/appl/paml/codeml.rb +2 -2
  38. data/lib/bio/appl/paml/codeml/report.rb +1 -0
  39. data/lib/bio/appl/paml/common.rb +1 -1
  40. data/lib/bio/appl/sosui/report.rb +1 -2
  41. data/lib/bio/command.rb +62 -2
  42. data/lib/bio/data/aa.rb +13 -31
  43. data/lib/bio/data/codontable.rb +1 -2
  44. data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
  45. data/lib/bio/db/biosql/sequence.rb +1 -1
  46. data/lib/bio/db/embl/common.rb +1 -1
  47. data/lib/bio/db/embl/embl.rb +5 -4
  48. data/lib/bio/db/embl/format_embl.rb +3 -3
  49. data/lib/bio/db/embl/sptr.rb +9 -1444
  50. data/lib/bio/db/embl/swissprot.rb +12 -29
  51. data/lib/bio/db/embl/trembl.rb +13 -30
  52. data/lib/bio/db/embl/uniprot.rb +12 -29
  53. data/lib/bio/db/embl/uniprotkb.rb +1455 -0
  54. data/lib/bio/db/fasta.rb +17 -0
  55. data/lib/bio/db/fasta/defline.rb +1 -3
  56. data/lib/bio/db/fastq.rb +1 -1
  57. data/lib/bio/db/genbank/ddbj.rb +9 -5
  58. data/lib/bio/db/genbank/refseq.rb +11 -3
  59. data/lib/bio/db/gff.rb +3 -4
  60. data/lib/bio/db/go.rb +5 -6
  61. data/lib/bio/db/kegg/module.rb +4 -5
  62. data/lib/bio/db/kegg/pathway.rb +4 -5
  63. data/lib/bio/db/kegg/reaction.rb +1 -1
  64. data/lib/bio/db/nexus.rb +3 -2
  65. data/lib/bio/db/pdb/pdb.rb +2 -2
  66. data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
  67. data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
  68. data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
  69. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
  70. data/lib/bio/db/transfac.rb +1 -1
  71. data/lib/bio/io/das.rb +40 -41
  72. data/lib/bio/io/fastacmd.rb +0 -16
  73. data/lib/bio/io/fetch.rb +111 -55
  74. data/lib/bio/io/flatfile/buffer.rb +4 -5
  75. data/lib/bio/io/hinv.rb +2 -3
  76. data/lib/bio/io/ncbirest.rb +43 -6
  77. data/lib/bio/io/pubmed.rb +76 -81
  78. data/lib/bio/io/togows.rb +33 -10
  79. data/lib/bio/map.rb +1 -1
  80. data/lib/bio/pathway.rb +1 -1
  81. data/lib/bio/sequence/compat.rb +1 -1
  82. data/lib/bio/sequence/na.rb +63 -12
  83. data/lib/bio/shell.rb +0 -2
  84. data/lib/bio/shell/core.rb +5 -6
  85. data/lib/bio/shell/interface.rb +3 -4
  86. data/lib/bio/shell/irb.rb +1 -2
  87. data/lib/bio/shell/plugin/entry.rb +2 -3
  88. data/lib/bio/shell/plugin/seq.rb +7 -6
  89. data/lib/bio/shell/setup.rb +1 -2
  90. data/lib/bio/tree.rb +2 -2
  91. data/lib/bio/util/contingency_table.rb +0 -2
  92. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
  93. data/lib/bio/util/sirna.rb +76 -16
  94. data/lib/bio/version.rb +8 -9
  95. data/sample/benchmark_clustalw_report.rb +47 -0
  96. data/sample/biofetch.rb +248 -151
  97. data/setup.rb +6 -7
  98. data/test/data/clustalw/example1-seqnos.aln +58 -0
  99. data/test/network/bio/appl/blast/test_remote.rb +1 -15
  100. data/test/network/bio/appl/test_blast.rb +0 -12
  101. data/test/network/bio/io/test_pubmed.rb +49 -0
  102. data/test/network/bio/io/test_togows.rb +0 -1
  103. data/test/network/bio/test_command.rb +65 -2
  104. data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
  105. data/test/unit/bio/appl/blast/test_report.rb +110 -48
  106. data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
  107. data/test/unit/bio/appl/sim4/test_report.rb +46 -17
  108. data/test/unit/bio/appl/test_blast.rb +2 -2
  109. data/test/unit/bio/db/embl/test_embl.rb +0 -1
  110. data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
  111. data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
  112. data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
  113. data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
  114. data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
  115. data/test/unit/bio/db/test_fasta.rb +41 -1
  116. data/test/unit/bio/db/test_fastq.rb +14 -4
  117. data/test/unit/bio/db/test_gff.rb +2 -2
  118. data/test/unit/bio/db/test_phyloxml.rb +30 -30
  119. data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
  120. data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
  121. data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
  122. data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
  123. data/test/unit/bio/io/test_togows.rb +3 -2
  124. data/test/unit/bio/sequence/test_dblink.rb +1 -1
  125. data/test/unit/bio/sequence/test_na.rb +3 -1
  126. data/test/unit/bio/test_alignment.rb +1 -2
  127. data/test/unit/bio/test_command.rb +5 -4
  128. data/test/unit/bio/test_db.rb +4 -2
  129. data/test/unit/bio/test_pathway.rb +25 -10
  130. data/test/unit/bio/util/test_sirna.rb +22 -22
  131. metadata +656 -1430
  132. data/doc/KEGG_API.rd +0 -1843
  133. data/doc/KEGG_API.rd.ja +0 -1834
  134. data/extconf.rb +0 -2
  135. data/lib/bio/appl/blast/ddbj.rb +0 -131
  136. data/lib/bio/db/kegg/taxonomy.rb +0 -280
  137. data/lib/bio/io/dbget.rb +0 -194
  138. data/lib/bio/io/ddbjrest.rb +0 -344
  139. data/lib/bio/io/ddbjxml.rb +0 -458
  140. data/lib/bio/io/ebisoap.rb +0 -158
  141. data/lib/bio/io/ensembl.rb +0 -229
  142. data/lib/bio/io/higet.rb +0 -73
  143. data/lib/bio/io/keggapi.rb +0 -363
  144. data/lib/bio/io/ncbisoap.rb +0 -156
  145. data/lib/bio/io/soapwsdl.rb +0 -119
  146. data/lib/bio/shell/plugin/keggapi.rb +0 -181
  147. data/lib/bio/shell/plugin/soap.rb +0 -87
  148. data/sample/dbget +0 -37
  149. data/sample/demo_ddbjxml.rb +0 -212
  150. data/sample/demo_kegg_taxonomy.rb +0 -92
  151. data/sample/demo_keggapi.rb +0 -502
  152. data/sample/psortplot_html.rb +0 -214
  153. data/test/network/bio/io/test_ddbjrest.rb +0 -47
  154. data/test/network/bio/io/test_ensembl.rb +0 -230
  155. data/test/network/bio/io/test_soapwsdl.rb +0 -53
  156. data/test/unit/bio/io/test_ddbjxml.rb +0 -81
  157. data/test/unit/bio/io/test_ensembl.rb +0 -111
  158. data/test/unit/bio/io/test_soapwsdl.rb +0 -33
@@ -10,20 +10,19 @@
10
10
  module Bio
11
11
 
12
12
  # BioRuby version (Array containing Integer)
13
- BIORUBY_VERSION = [1, 4, 3].extend(Comparable).freeze
13
+ BIORUBY_VERSION = [1, 5, 0].extend(Comparable).freeze
14
14
 
15
15
  # Extra version specifier (String or nil).
16
- # Existance of the value indicates pre-release version or modified version.
16
+ # Existance of the value indicates development version.
17
17
  #
18
18
  # nil :: Release version.
19
- # ".0000"..".4999" :: Release version with patches.
20
- # ".5000" :: Development unstable version.
21
- # ".5001"..".8999" :: Pre-alpha version.
22
- # "-alphaN" (N=0..99) :: Alpha version.
23
- # "-preN" (N=0..99) :: Pre-release test version.
24
- # "-rcN" (N=0..99) :: Release candidate version.
19
+ # "-dev" :: Development version (with YYYYMMDD digits).
20
+ # ".20150630" :: Development version (specify the date digits).
25
21
  #
26
- BIORUBY_EXTRA_VERSION = ".0001"
22
+ # By default, if the third digit (teeny) of BIORUBY_VERSION is 0,
23
+ # the version is regarded as a development version.
24
+ BIORUBY_EXTRA_VERSION =
25
+ nil #(BIORUBY_VERSION[2] == 0) ? "-dev" : nil
27
26
 
28
27
  # Version identifier, including extra version string (String)
29
28
  # Unlike BIORUBY_VERSION, it is not comparable.
@@ -0,0 +1,47 @@
1
+ #
2
+ # = sample/benchmark_clustalw_report.rb - Benchmark tests for Bio::ClustalW::Report
3
+ #
4
+ # Copyright:: Copyright (C) 2013
5
+ # Andrew Grimm <andrew.j.grimm@gmail.com>
6
+ # License:: The Ruby License
7
+
8
+ require 'pathname'
9
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 1, "test",
10
+ 'bioruby_test_helper.rb')).cleanpath.to_s
11
+
12
+ require 'benchmark'
13
+ require 'bio'
14
+
15
+ class BenchmarkClustalWReport
16
+
17
+ DataDir = File.join(BioRubyTestDataPath, 'clustalw')
18
+ Filenames = [ 'example1.aln', 'example1-seqnos.aln' ]
19
+
20
+ def self.benchmark_clustalw_report
21
+ Filenames.each do |fn|
22
+ print "\n", fn, "\n"
23
+ fullpath = File.join(DataDir, fn)
24
+ self.new(fullpath).benchmark
25
+ end
26
+ end
27
+
28
+ def initialize(aln_filename)
29
+ @text = File.open(aln_filename, 'rb') { |f| f.read }
30
+ @text.freeze
31
+ end
32
+
33
+ def benchmark
34
+ GC.start
35
+ Benchmark.bmbm do |x|
36
+ x.report do
37
+ for i in 1...10_000
38
+ aln = Bio::ClustalW::Report.new(@text)
39
+ aln.alignment
40
+ end
41
+ end
42
+ end
43
+ end
44
+
45
+ end #class BenchmarkClustalWReport
46
+
47
+ BenchmarkClustalWReport.benchmark_clustalw_report
@@ -1,8 +1,10 @@
1
- #!/usr/proj/bioruby/bin/ruby
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
2
3
  #
3
- # biofetch.rb : BioFetch server (interface to GenomeNet/DBGET via KEGG API)
4
+ # biofetch.rb : BioFetch server (interface to TogoWS)
4
5
  #
5
6
  # Copyright (C) 2002-2004 KATAYAMA Toshiaki <k@bioruby.org>
7
+ # 2013 GOTO Naohisa <ng@bioruby.org>
6
8
  #
7
9
  # This program is free software; you can redistribute it and/or modify
8
10
  # it under the terms of the GNU General Public License as published by
@@ -18,15 +20,97 @@
18
20
  # along with this program; if not, write to the Free Software
19
21
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
22
  #
21
- # $Id: biofetch.rb,v 1.16 2005/08/07 10:02:41 k Exp $
22
23
  #
23
24
 
24
25
  require 'cgi'
25
- require 'html/template'
26
- require 'bio/io/keggapi'
26
+ require 'erb'
27
+ require 'open-uri'
28
+ require 'fileutils'
29
+ require 'tempfile'
27
30
 
28
31
  MAX_ID_NUM = 50
29
32
 
33
+ # script name
34
+ SCRIPT_NAME = File.basename(__FILE__)
35
+
36
+ # full URL for this CGI
37
+ BASE_URL = "http://bioruby.org/cgi-bin/#{SCRIPT_NAME}"
38
+
39
+ # cache directory for metadata
40
+ # Note: The cache is only for metadata (database list and format list).
41
+ # Data entries are NOT cached.
42
+ CACHE_DIR = '/tmp/biofetch_rb.cache'
43
+
44
+ # cache lifetime
45
+ CACHE_LIFETIME = 60 * 60 # 1 hour
46
+
47
+ module TogoWS
48
+ TOGOWS_URL = 'http://togows.dbcls.jp/'
49
+
50
+ def togows_database_complete_list
51
+ result = togows_get_cached('/entry/')
52
+ result.to_s.split(/\n/).collect {|x| x.split(/\t/) }
53
+ end
54
+
55
+ def togows_database_formats(db)
56
+ db = CGI.escape(db)
57
+ result = togows_get_cached("/entry/#{db}/?formats")
58
+ end
59
+
60
+ def togows_get(path)
61
+ uristr = TOGOWS_URL + path
62
+ begin
63
+ result = OpenURI.open_uri(uristr).read
64
+ rescue OpenURI::HTTPError
65
+ result = nil
66
+ end
67
+ result
68
+ end
69
+
70
+ private
71
+
72
+ def togows_get_cached(path)
73
+ filepath = path.sub(/\A\//, '').sub(/\/\z/, '')
74
+ filepath = filepath.gsub(/\//, " ")
75
+ filepath = filepath.sub(/\?/, '_')
76
+ filepath = File.join(CACHE_DIR, filepath)
77
+ result = nil
78
+ begin
79
+ if Time.now - File.mtime(filepath) > CACHE_LIFETIME
80
+ # delete expired cache file
81
+ File.delete(filepath)
82
+ end
83
+ result = File.read(filepath)
84
+ rescue IOError, SystemCallError
85
+ result = nil
86
+ end
87
+ unless result then
88
+ # valid cache is not found
89
+ result = togows_get(path)
90
+ if result then
91
+ # create cache directory if not found
92
+ FileUtils.mkdir_p(CACHE_DIR, :mode => 0700)
93
+ # simple security check for the cache dir
94
+ if File.stat(CACHE_DIR).mode & 0022 != 0 then
95
+ raise SecurityError, "CACHE_DIR #{CACHE_DIR} is writeable by others"
96
+ end
97
+
98
+ # write to temporary file
99
+ tmp = Tempfile.open('temp', CACHE_DIR)
100
+ tmp.print result
101
+ tmp.close
102
+ # create a hard link from the temporary to the cache file
103
+ begin
104
+ File.link(tmp.path, filepath)
105
+ rescue IOError, SystemCallError
106
+ end
107
+ # the temporay file will be automatically removed at exit
108
+ end
109
+ end
110
+ result
111
+ end
112
+
113
+ end #module TogoWS
30
114
 
31
115
  module BioFetchError
32
116
 
@@ -36,32 +120,50 @@ module BioFetchError
36
120
  exit
37
121
  end
38
122
 
123
+ def print_html_page(str)
124
+ print "Content-type: text/html; charset=UTF-8\n\n"
125
+ print "<pre>", CGI.escapeHTML(str), "</pre>\n"
126
+ exit
127
+ end
128
+
39
129
  def error1(db)
130
+ db = CGI.escapeHTML(db.to_s) # to avoid potential XSS with old IE
40
131
  str = "ERROR 1 Unknown database [#{db}]."
41
132
  print_text_page(str)
42
133
  end
43
134
 
44
135
  def error2(style)
136
+ style = CGI.escapeHTML(style.to_s) # to avoid potential XSS with old IE
45
137
  str = "ERROR 2 Unknown style [#{style}]."
46
138
  print_text_page(str)
47
139
  end
48
140
 
49
141
  def error3(format, db)
142
+ # to avoid potential XSS with old IE which ignores Content-Type
143
+ db = CGI.escapeHTML(db.to_s)
144
+ format = CGI.escapeHTML(format.to_s)
50
145
  str = "ERROR 3 Format [#{format}] not known for database [#{db}]."
51
146
  print_text_page(str)
52
147
  end
53
148
 
54
149
  def error4(entry_id, db)
150
+ # to avoid potential XSS with old IE which ignores Content-Type
151
+ entry_id = CGI.escapeHTML(entry_id.to_s)
152
+ db = CGI.escapeHTML(db.to_s)
55
153
  str = "ERROR 4 ID [#{entry_id}] not found in database [#{db}]."
56
154
  print_text_page(str)
57
155
  end
58
156
 
59
157
  def error5(count)
158
+ # to avoid potential XSS with old IE which ignores Content-Type
159
+ count = CGI.escapeHTML(count.to_s)
60
160
  str = "ERROR 5 Too many IDs [#{count}]. Max [#{MAX_ID_NUM}] allowed."
61
161
  print_text_page(str)
62
162
  end
63
163
 
64
164
  def error6(info)
165
+ # to avoid potential XSS with old IE which ignores Content-Type
166
+ count = CGI.escapeHTML(info.to_s)
65
167
  str = "ERROR 6 Illegal information request [#{info}]."
66
168
  print_text_page(str)
67
169
  end
@@ -70,23 +172,35 @@ end
70
172
 
71
173
 
72
174
 
73
- module KeggAPI
175
+ module ApiBridge
74
176
 
75
177
  include BioFetchError
178
+ include TogoWS
179
+
180
+ def list_databases_with_synonyms
181
+ togows_database_complete_list
182
+ end
76
183
 
77
184
  def list_databases
78
- serv = Bio::KEGG::API.new
79
- results = serv.list_databases
80
- results.collect {|x| x.entry_id}
185
+ list_databases_with_synonyms.flatten
81
186
  end
82
187
 
83
188
  def bget(db, id_list, format)
84
- serv = Bio::KEGG::API.new
189
+ case format
190
+ when 'fasta'
191
+ format = '.fasta'
192
+ else
193
+ format = ''
194
+ end
195
+ db = CGI.escape(db)
196
+
85
197
  results = ''
86
198
  id_list.each do |query_id|
87
- entry_id = "#{db}:#{query_id}"
88
- result = serv.get_entries([entry_id])
89
- if result.empty?
199
+ query_id = CGI.escape(query_id)
200
+ path = "/entry/#{db}/#{query_id}#{format}"
201
+ result = togows_get(path)
202
+
203
+ if !result or result.empty? or /\AError\: / =~ result then
90
204
  error4(query_id, db)
91
205
  else
92
206
  results << result
@@ -95,78 +209,41 @@ module KeggAPI
95
209
  return results
96
210
  end
97
211
 
98
- end
99
-
100
-
101
-
102
-
103
- class BioFetch
104
-
105
- include BioFetchError
106
- include KeggAPI
107
-
108
- def initialize(db, id_list, style, format)
109
- check_style(style)
110
- check_format(format, db)
111
- check_number_of_id(id_list.length)
112
- check_dbname(db)
212
+ def check_fasta_ok?(db)
213
+ result = togows_database_formats(db)
214
+ /^fasta$/ =~ result.to_s
215
+ end
113
216
 
114
- if /html/.match(style)
115
- goto_html_style_page(db, id_list, format)
116
- end
217
+ end #module ApiBridge
117
218
 
118
- entries = bget(db, id_list, format)
219
+ module BioFetchCheck
119
220
 
120
- if /fasta/.match(format) and entries
121
- entries = convert_to_fasta_format(entries, db)
122
- end
123
-
124
- print_text_page(entries)
125
- end
221
+ include ApiBridge
126
222
 
127
223
  private
128
224
 
129
- def convert_to_fasta_format(str, db)
130
- require 'bio'
131
- require 'stringio'
132
-
133
- fasta = Array.new
134
-
135
- entries = StringIO.new(str)
136
- Bio::FlatFile.auto(entries) do |ff|
137
- ff.each do |entry|
138
- seq = nil
139
- if entry.respond_to?(:seq)
140
- seq = entry.seq
141
- elsif entry.respond_to?(:aaseq)
142
- seq = entry.aaseq
143
- elsif entry.respond_to?(:naseq)
144
- seq = entry.naseq
145
- end
146
- if seq
147
- entry_id = entry.respond_to?(:entry_id) ? entry.entry_id : ''
148
- definition = entry.respond_to?(:definition) ? entry.definition : ''
149
- fasta << seq.to_fasta("#{db}:#{entry_id} #{definition}", 60)
150
- end
151
- end
152
- end
153
- return fasta.join
154
- end
155
-
156
- def goto_html_style_page(db, id_list, format)
157
- url = "http://www.genome.jp/dbget-bin/www_bget"
158
- opt = '-f+' if /fasta/.match(format)
159
- ids = id_list.join('%2B')
160
- print "Location: #{url}?#{opt}#{db}+#{ids}\n\n"
161
- exit
162
- end
163
-
164
225
  def check_style(style)
165
- error2(style) unless /html|raw/.match(style)
226
+ style = style.to_s.downcase
227
+ error2(style) unless /\A(html|raw)\z/.match(style)
228
+ style
166
229
  end
167
230
 
168
231
  def check_format(format, db)
169
- error3(format, db) if format && ! /fasta|default/.match(format)
232
+ fmt = format ? format.to_s.downcase : nil
233
+ case fmt
234
+ when 'fasta'
235
+ db = check_dbname(db)
236
+ fmt = nil unless check_fasta_ok?(db)
237
+ when 'default'
238
+ # do nothing
239
+ when nil
240
+ fmt = 'default'
241
+ else
242
+ fmt = nil
243
+ end
244
+
245
+ error3(format, db) unless fmt
246
+ fmt
170
247
  end
171
248
 
172
249
  def check_number_of_id(num)
@@ -174,22 +251,50 @@ class BioFetch
174
251
  end
175
252
 
176
253
  def check_dbname(db)
254
+ db = db.to_s.downcase
177
255
  error1(db) unless list_databases.include?(db)
256
+ db
178
257
  end
179
258
 
180
- end
259
+ end #module BioFetchCheck
260
+
261
+ class BioFetch
262
+
263
+ include BioFetchCheck
264
+ include BioFetchError
265
+ include ApiBridge
266
+
267
+ def initialize(db, id_list, style, format)
268
+ style = check_style(style)
269
+ format = check_format(format, db)
270
+ check_number_of_id(id_list.length)
271
+ db = check_dbname(db)
272
+
273
+ entries = bget(db, id_list, format)
274
+
275
+ if style == 'html' then
276
+ print_html_page(entries)
277
+ else
278
+ print_text_page(entries)
279
+ end
280
+
281
+ end
282
+
283
+ end #class BioFetch
181
284
 
182
285
 
183
286
 
184
287
  class BioFetchInfo
185
288
 
289
+ include BioFetchCheck
186
290
  include BioFetchError
187
- include KeggAPI
291
+ include ApiBridge
188
292
 
189
293
  def initialize(info, db)
190
294
  @db = db
295
+
191
296
  begin
192
- send(info)
297
+ check_info(info) ? __send__(info) : raise
193
298
  rescue
194
299
  error6(info)
195
300
  end
@@ -197,13 +302,18 @@ class BioFetchInfo
197
302
 
198
303
  private
199
304
 
305
+ def check_info(meth_name)
306
+ /\A(dbs|formats|maxids)\z/ =~ meth_name
307
+ end
308
+
200
309
  def dbs
201
310
  str = list_databases.sort.join(' ')
202
311
  print_text_page(str)
203
312
  end
204
313
 
205
314
  def formats
206
- fasta = " fasta" if check_fasta_ok
315
+ db = check_dbname(@db)
316
+ fasta = " fasta" if check_fasta_ok?(db)
207
317
  str = "default#{fasta}"
208
318
  print_text_page(str)
209
319
  end
@@ -213,17 +323,14 @@ class BioFetchInfo
213
323
  print_text_page(str)
214
324
  end
215
325
 
216
- def check_fasta_ok
217
- # sequence databases supported by Bio::FlatFile.auto
218
- /genes|gb|genbank|genpept|rs|refseq|emb|sp|swiss|pir/.match(@db)
219
- end
220
-
221
- end
326
+ end #class BioFetchInfo
222
327
 
223
328
 
224
329
 
225
330
  class BioFetchCGI
226
331
 
332
+ include ApiBridge
333
+
227
334
  def initialize(cgi)
228
335
  @cgi = cgi
229
336
  show_page
@@ -244,11 +351,14 @@ class BioFetchCGI
244
351
  end
245
352
 
246
353
  def show_query_page
247
- html = HTML::Template.new
248
- html.set_html(DATA.read)
249
- html.param('max_id_num' => MAX_ID_NUM)
250
- @cgi.out do
251
- html.output
354
+ html = ERB.new(DATA.read)
355
+ max_id_num = MAX_ID_NUM
356
+ databases_with_synonyms = list_databases_with_synonyms
357
+ databases = list_databases
358
+ script_name = SCRIPT_NAME
359
+ base_url = BASE_URL
360
+ @cgi.out({ "type" => "text/html", "charset" => "utf-8" }) do
361
+ html.result(binding)
252
362
  end
253
363
  end
254
364
 
@@ -269,7 +379,7 @@ class BioFetchCGI
269
379
  end
270
380
 
271
381
  def id_list
272
- @cgi['id'].split(/\W/) # not only ','
382
+ @cgi['id'].strip.split(/[\,\s]+/)
273
383
  end
274
384
 
275
385
  def style
@@ -295,9 +405,8 @@ BioFetchCGI.new(CGI.new)
295
405
  This program was created during BioHackathon 2002, Tucson and updated
296
406
  in Cape Town :)
297
407
 
298
- Rewrited in 2004 to use KEGG API as the bioruby.org server left from Kyoto
299
- University (where DBGET runs) and the old version could not run without
300
- having internally accessible DBGET server.
408
+ Rewrited in 2013 to use TogoWS API as the bioruby.org server left from The
409
+ University of Tokyo and the old SOAP-based KEGG API is discontinued.
301
410
 
302
411
  =end
303
412
 
@@ -308,7 +417,7 @@ __END__
308
417
  <HEAD>
309
418
  <LINK href="http://bioruby.org/img/favicon.png" rel="icon" type="image/png">
310
419
  <LINK href="http://bioruby.org/css/bioruby.css" rel="stylesheet" type="text/css">
311
- <TITLE>BioFetch interface to GenomeNet/DBGET</TITLE>
420
+ <TITLE>BioFetch interface to TogoWS</TITLE>
312
421
  </HEAD>
313
422
 
314
423
  <BODY bgcolor="#ffffff">
@@ -316,37 +425,26 @@ __END__
316
425
  <H1>
317
426
  <IMG src="http://bioruby.org/img/ruby.png" align="middle">
318
427
  BioFetch interface to
319
- <A href="http://www.genome.jp/dbget/">GenomeNet/DBGET</A>
428
+ <A href="http://togows.dbcls.jp/">TogoWS</A>
320
429
  </H1>
321
430
 
322
- <P>This page allows you to retrieve up to <!var:max_id_num> entries at a time from various up-to-date biological databases.</P>
431
+ <P>This page allows you to retrieve up to <%= max_id_num %> entries at a time from various up-to-date biological databases.</P>
323
432
 
324
433
  <HR>
325
434
 
326
- <FORM METHOD="post" ENCTYPE="application/x-www-form-urlencoded" action="biofetch.rb">
435
+ <FORM METHOD="post" ENCTYPE="application/x-www-form-urlencoded" action="<%= script_name %>">
327
436
 
328
437
  <SELECT name="db">
329
- <OPTION value="genbank">GenBank</OPTION>
330
- <OPTION value="refseq">RefSeq</OPTION>
331
- <OPTION value="embl">EMBL</OPTION>
332
- <OPTION value="swissprot">Swiss-Prot</OPTION>
333
- <OPTION value="pir">PIR</OPTION>
334
- <OPTION value="prf">PRF</OPTION>
335
- <OPTION value="pdb">PDB</OPTION>
336
- <OPTION value="pdbstr">PDBSTR</OPTION>
337
- <OPTION value="epd">EPD</OPTION>
338
- <OPTION value="transfac">TRANSFAC</OPTION>
339
- <OPTION value="prosite">PROSITE</OPTION>
340
- <OPTION value="pmd">PMD</OPTION>
341
- <OPTION value="litdb">LITDB</OPTION>
342
- <OPTION value="omim">OMIM</OPTION>
343
- <OPTION value="ligand">KEGG/LIGAND</OPTION>
344
- <OPTION value="pathway">KEGG/PATHWAY</OPTION>
345
- <OPTION value="brite">KEGG/BRITE</OPTION>
346
- <OPTION value="genes">KEGG/GENES</OPTION>
347
- <OPTION value="genome">KEGG/GENOME</OPTION>
348
- <OPTION value="linkdb">LinkDB</OPTION>
349
- <OPTION value="aaindex">AAindex</OPTION>
438
+ <% databases_with_synonyms.each do |dbs|
439
+ a = dbs[1..-1]
440
+ synonyms = unless a.empty? then
441
+ " (abbr: " + a.join(", ") + ")"
442
+ else
443
+ ""
444
+ end
445
+ %>
446
+ <OPTION value="<%= dbs[0] %>"><%= dbs[0] %><%= synonyms %></OPTION>
447
+ <% end %>
350
448
  </SELECT>
351
449
 
352
450
  <INPUT name="id" size="40" type="text" maxlength="1000">
@@ -369,7 +467,7 @@ BioFetch interface to
369
467
 
370
468
  <H2>Direct access</H2>
371
469
 
372
- <P>http://bioruby.org/cgi-bin/biofetch.rb?format=(default|fasta|...);style=(html|raw);db=(genbank|embl|...);id=ID[,ID,ID,...]</P>
470
+ <P><%= base_url %>?format=(default|fasta|...);style=(html|raw);db=(nuccore|embl|...);id=ID[,ID,ID,...]</P>
373
471
  <P>(NOTE: the option separator ';' can be '&')</P>
374
472
 
375
473
  <DL>
@@ -380,7 +478,7 @@ BioFetch interface to
380
478
  <DD> html|raw
381
479
 
382
480
  <DT> <U>db</U> (required)
383
- <DD> genbank|refseq|embl|swissprot|pir|prf|pdb|pdbstr|epd|transfac|prosite|pmd|litdb|omim|ligand|pathway|brite|genes|genome|linkdb|aaindex|...
481
+ <DD> <%= databases.join('|') %>
384
482
 
385
483
  <DT> <U>id</U> (required)
386
484
  <DD> comma separated list of IDs
@@ -392,66 +490,65 @@ BioFetch interface to
392
490
 
393
491
  <DL>
394
492
  <DT> <A href="?info=dbs">What databases are available?</A>
395
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=dbs
493
+ <DD> <%= base_url %>?info=dbs
396
494
 
397
495
  <DT> <A href="?info=formats;db=embl">What formats does the database X have?</A>
398
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=formats;db=embl
496
+ <DD> <%= base_url %>?info=formats;db=embl
399
497
 
400
498
  <DT> <A href="?info=maxids">How many entries can be retrieved simultaneously?</A>
401
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=maxids
499
+ <DD> <%= base_url %>?info=maxids
402
500
  </DL>
403
501
 
404
502
  <H2>Examples</H2>
405
503
 
406
504
  <DL>
407
- <DT> <A href="?format=default;style=raw;db=genbank;id=AJ617376">gb:AJ617376</A> (default/raw)
408
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376
505
+ <DT> <A href="?format=default;style=raw;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (default/raw)
506
+ <DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=AJ617376
409
507
 
410
- <DT> <A href="?format=fasta;style=raw;db=genbank;id=AJ617376">gb:AJ617376</A> (fasta/raw)
411
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=genbank;id=AJ617376
508
+ <DT> <A href="?format=fasta;style=raw;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (fasta/raw)
509
+ <DD> <%= base_url %>?format=fasta;style=raw;db=nuccore;id=AJ617376
412
510
 
413
- <DT> <A href="?format=default;style=html;db=genbank;id=AJ617376">gb:AJ617376</A> (default/html)
414
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=html;db=genbank;id=AJ617376
511
+ <DT> <A href="?format=default;style=html;db=nuccore;id=AJ617376">nuccore/AJ617376</A> (default/html)
512
+ <DD> <%= base_url %>?format=default;style=html;db=nuccore;id=AJ617376
415
513
 
416
- <DT> <A href="?format=default;style=raw;db=genbank;id=AJ617376,AJ617377">gb:AJ617376,AJ617377</A> (default/raw, multiple)
417
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376,AJ617377
514
+ <DT> <A href="?format=default;style=raw;db=nuccore;id=AJ617376,AJ617377">nuccore/AJ617376,AJ617377</A> (default/raw, multiple)
515
+ <DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=AJ617376,AJ617377
418
516
 
419
- <DT> <A href="?format=default;style=raw;db=embl;id=BUM">embl:BUM</A> (default/raw)
420
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=embl;id=BUM
517
+ <DT> <A href="?format=default;style=raw;db=embl;id=J00231">embl/J00231</A> (default/raw)
518
+ <DD> <%= base_url %>?format=default;style=raw;db=embl;id=J00231
421
519
 
422
- <DT> <A href="?format=default;style=raw;db=swissprot;id=CYC_BOVIN">sp:CYC_BOVIN</A> (default/raw)
423
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=swissprot;id=CYC_BOVIN
520
+ <DT> <A href="?format=default;style=raw;db=uniprot;id=CYC_BOVIN">uniprot/CYC_BOVIN</A> (default/raw)
521
+ <DD> <%= base_url %>?format=default;style=raw;db=uniprot;id=CYC_BOVIN
424
522
 
425
- <DT> <A href="?format=fasta;style=raw;db=swissprot;id=CYC_BOVIN">sp:CYC_BOVIN</A> (fasta/raw)
426
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=swissprot;id=CYC_BOVIN
523
+ <DT> <A href="?format=fasta;style=raw;db=uniprot;id=CYC_BOVIN">uniprot/CYC_BOVIN</A> (fasta/raw)
524
+ <DD> <%= base_url %>?format=fasta;style=raw;db=uniprot;id=CYC_BOVIN
427
525
 
428
- <DT> <A href="?format=default;style=raw;db=genes;id=b0015">genes:b0015</A> (default/raw)
429
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genes;id=b0015
526
+ <DT> <A href="?format=default;style=raw;db=genes;id=eco%3Ab0015">genes/eco:b0015</A> (default/raw)
527
+ <DD> <%= base_url %>?format=default;style=raw;db=genes;id=eco%3Ab0015
528
+ <DD> <%= base_url %>?format=default;style=raw;db=genes;id=eco:b0015
430
529
 
431
- <DT> <A href="?format=default;style=raw;db=prosite;id=PS00028">ps:PS00028</A> (default/raw)
432
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=prosite;id=PS00028
433
530
  </DL>
434
531
 
435
532
  <H2>Errors</H2>
436
533
 
437
534
  <DL>
438
535
  <DT> <A href="?format=default;style=raw;db=nonexistent;id=AJ617376">Error1</A> sample : DB not found
439
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=nonexistent;id=AJ617376"
536
+ <DD> <%= base_url %>?format=default;style=raw;db=nonexistent;id=AJ617376
440
537
 
441
- <DT> <A href="?format=default;style=nonexistent;db=genbank;id=AJ617376">Error2</A> sample : unknown style
442
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=nonexistent;db=genbank;id=AJ617376"
538
+ <DT> <A href="?format=default;style=nonexistent;db=nuccore;id=AJ617376">Error2</A> sample : unknown style
539
+ <DD> <%= base_url %>?format=default;style=nonexistent;db=nuccore;id=AJ617376
443
540
 
444
- <DT> <A href="?format=nonexistent;style=raw;db=genbank;id=AJ617376">Error3</A> sample : unknown format
445
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=nonexistent;style=raw;db=genbank;id=AJ617376"
541
+ <DT> <A href="?format=nonexistent;style=raw;db=nuccore;id=AJ617376">Error3</A> sample : unknown format
542
+ <DD> <%= base_url %>?format=nonexistent;style=raw;db=nuccore;id=AJ617376
446
543
 
447
- <DT> <A href="?format=default;style=raw;db=genbank;id=nonexistent">Error4</A> sample : ID not found
448
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=nonexistent"
544
+ <DT> <A href="?format=default;style=raw;db=nuccore;id=nonexistent">Error4</A> sample : ID not found
545
+ <DD> <%= base_url %>?format=default;style=raw;db=nuccore;id=nonexistent
449
546
 
450
547
  <DT> <A href="?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51">Error5</A> sample : too many IDs
451
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
548
+ <DD> <%= base_url %>?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
452
549
 
453
550
  <DT> <A href="?info=nonexistent">Error6</A> sample : unknown info
454
- <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=nonexistent"
551
+ <DD> <%= base_url %>?info=nonexistent"
455
552
  </DL>
456
553
 
457
554
  <H2>Other BioFetch implementations</H2>