bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,306 @@
1
+ #
2
+ # = bio/util/sirna.rb - Class for designing small inhibitory RNAs
3
+ #
4
+ # Copyright:: Copyright (C) 2004, 2005
5
+ # Itoshi NIKAIDO <dritoshi@gmail.com>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: sirna.rb,v 1.6 2005/11/14 15:44:30 nakao Exp $
9
+ #
10
+ # == Bio::SiRNA - Designing siRNA.
11
+ #
12
+ # This class implements the selection rules described by Kumiko Ui-Tei
13
+ # et al. (2004) and Reynolds et al. (2004).
14
+ #
15
+ # == Example
16
+ #
17
+ # seq = Bio::Sequence::NA.new(ARGF.read)
18
+ #
19
+ # sirna = Bio::SiRNA.new(seq)
20
+ # pairs = sirna.design
21
+ #
22
+ # pairs.each do |pair|
23
+ # puts pair.report
24
+ # shrna = Bio::SiRNA::ShRNA.new(pair)
25
+ # shrna.design
26
+ # puts shrna.report
27
+ #
28
+ # puts shrna.top_strand.dna
29
+ # puts shrna.bottom_strand.dna
30
+ # end
31
+ #
32
+ # == References
33
+ #
34
+ # * Kumiko Ui-Tei et al. Guidelines for the selection of highly effective
35
+ # siRNA sequences for mammalian and chick RNA interference.
36
+ # Nucl. Acids. Res. 2004 32: 936-948.
37
+ #
38
+ # * Angela Reynolds et al. Rational siRNA design for RNA interference.
39
+ # Nature Biotech. 2004 22: 326-330.
40
+ #
41
+ #--
42
+ #
43
+ # This library is free software; you can redistribute it and/or
44
+ # modify it under the terms of the GNU Lesser General Public
45
+ # License as published by the Free Software Foundation; either
46
+ # version 2 of the License, or (at your option) any later version.
47
+ #
48
+ # This library is distributed in the hope that it will be useful,
49
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
50
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
51
+ # Lesser General Public License for more details.
52
+ #
53
+ # You should have received a copy of the GNU Lesser General Public
54
+ # License along with this library; if not, write to the Free Software
55
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
56
+ #
57
+ #++
58
+ #
59
+
60
+ require 'bio/sequence'
61
+
62
+ module Bio
63
+
64
+ # = Bio::SiRNA
65
+ # Designing siRNA.
66
+ #
67
+ # This class implements the selection rules described by Kumiko Ui-Tei
68
+ # et al. (2004) and Reynolds et al. (2004).
69
+ class SiRNA
70
+
71
+ # A parameter of size of antisense.
72
+ attr_accessor :antisense_size
73
+
74
+ # A parameter of maximal %GC.
75
+ attr_accessor :max_gc_percent
76
+
77
+ # A parameter of minimum %GC.
78
+ attr_accessor :min_gc_percent
79
+
80
+ # Input is a Bio::Sequence::NA object (the target sequence).
81
+ # Output is a list of Bio::SiRNA::Pair object.
82
+ def initialize(seq, antisense_size = 21, max_gc_percent = 60.0, min_gc_percent = 40.0)
83
+ @seq = seq.rna!
84
+ @pairs = Array.new
85
+ @antisense_size = antisense_size
86
+ @max_gc_percent = max_gc_percent
87
+ @min_gc_percent = min_gc_percent
88
+ end
89
+
90
+ # Ui-Tei's rule.
91
+ def uitei?(target)
92
+ return false unless /^.{2}[GC]/i =~ target
93
+ return false unless /[AU].{2}$/i =~ target
94
+ return false if /[GC]{9}/i =~ target
95
+
96
+ one_third = target.size * 1 / 3
97
+ start_pos = @target_size - one_third - 1
98
+ remain_seq = target.subseq(start_pos, @target_size - 2)
99
+ gc_number = remain_seq.scan(/[AU]/i).size
100
+ return false if gc_number < 5
101
+
102
+ return true
103
+ end
104
+
105
+ # Reynolds' rule.
106
+ def reynolds?(target)
107
+ return false if /[GC]{9}/i =~ target
108
+ return false unless /^.{4}A.{6}U.{2}[AUC].{5}[AU].{2}$/i =~ target
109
+ return true
110
+ end
111
+
112
+ # same as design('uitei').
113
+ def uitei
114
+ design('uitei')
115
+ end
116
+
117
+ # same as design('reynolds').
118
+ def reynolds
119
+ design('reynolds')
120
+ end
121
+
122
+ # rule can be one of 'uitei' (default) and 'reynolds'.
123
+ def design(rule = 'uitei')
124
+ @target_size = @antisense_size + 2
125
+
126
+ target_start = 0
127
+ @seq.window_search(@target_size) do |target|
128
+ antisense = target.subseq(1, @target_size - 2).complement.rna
129
+ sense = target.subseq(3, @target_size)
130
+
131
+ target_start += 1
132
+ target_stop = target_start + @target_size
133
+
134
+ antisense_gc_percent = antisense.gc_percent
135
+ next if antisense_gc_percent > @max_gc_percent
136
+ next if antisense_gc_percent < @min_gc_percent
137
+
138
+ case rule
139
+ when 'uitei'
140
+ next unless uitei?(target)
141
+ when 'reynolds'
142
+ next unless reynolds?(target)
143
+ else
144
+ raise NotImplementedError
145
+ end
146
+
147
+ pair = Bio::SiRNA::Pair.new(target, sense, antisense, target_start, target_stop, rule, antisense_gc_percent)
148
+ @pairs.push(pair)
149
+ end
150
+ return @pairs
151
+ end
152
+
153
+ # = Bio::SiRNA::Pair
154
+ class Pair
155
+
156
+ attr_accessor :target
157
+
158
+ attr_accessor :sense
159
+
160
+ attr_accessor :antisense
161
+
162
+ attr_accessor :start
163
+
164
+ attr_accessor :stop
165
+
166
+ attr_accessor :rule
167
+
168
+ attr_accessor :gc_percent
169
+
170
+ def initialize(target, sense, antisense, start, stop, rule, gc_percent)
171
+ @target = target
172
+ @sense = sense
173
+ @antisense = antisense
174
+ @start = start
175
+ @stop = stop
176
+ @rule = rule
177
+ @gc_percent = gc_percent
178
+ end
179
+
180
+ # human readable report
181
+ def report
182
+ report = "### siRNA\n"
183
+ report << 'Start: ' + @start.to_s + "\n"
184
+ report << 'Stop: ' + @stop.to_s + "\n"
185
+ report << 'Rule: ' + @rule.to_s + "\n"
186
+ report << 'GC %: ' + @gc_percent.to_s + "\n"
187
+ report << 'Target: ' + @target.upcase + "\n"
188
+ report << 'Sense: ' + ' ' + @sense.upcase + "\n"
189
+ report << 'Antisense: ' + @antisense.reverse.upcase + "\n"
190
+ end
191
+
192
+ # computer parsable report
193
+ #def to_s
194
+ # [ @antisense, @start, @stop ].join("\t")
195
+ #end
196
+
197
+ end # class Pair
198
+
199
+
200
+ # = Bio::SiRNA::ShRNA
201
+ # Designing shRNA.
202
+ class ShRNA
203
+
204
+ # aBio::Sequence::NA
205
+ attr_accessor :top_strand
206
+
207
+ # aBio::Sequence::NA
208
+ attr_accessor :bottom_strand
209
+
210
+ # Input is a Bio::SiRNA::Pair object (the target sequence).
211
+ def initialize(pair)
212
+ @pair = pair
213
+ end
214
+
215
+ # only the 'BLOCK-iT' rule is implemented for now.
216
+ def design(method = 'BLOCK-iT')
217
+ case method
218
+ when 'BLOCK-iT'
219
+ block_it
220
+ else
221
+ raise NotImplementedError
222
+ end
223
+ end
224
+
225
+
226
+ # same as design('BLOCK-iT').
227
+ # method can be one of 'piGENE' (default) and 'BLOCK-iT'.
228
+ def block_it(method = 'piGENE')
229
+ top = Bio::Sequence::NA.new('CACC') # top_strand_shrna_overhang
230
+ bot = Bio::Sequence::NA.new('AAAA') # bottom_strand_shrna_overhang
231
+ fwd = @pair.sense
232
+ rev = @pair.sense.complement
233
+
234
+ case method
235
+ when 'BLOCK-iT'
236
+ # From BLOCK-iT's manual
237
+ loop_fwd = Bio::Sequence::NA.new('CGAA')
238
+ loop_rev = loop_fwd.complement
239
+ when 'piGENE'
240
+ # From piGENE document
241
+ loop_fwd = Bio::Sequence::NA.new('GTGTGCTGTCC')
242
+ loop_rev = loop_fwd.complement
243
+ else
244
+ raise NotImplementedError
245
+ end
246
+
247
+ if /^G/i =~ fwd
248
+ @top_strand = top + fwd + loop_fwd + rev
249
+ @bottom_strand = bot + fwd + loop_rev + rev
250
+ else
251
+ @top_strand = top + 'G' + fwd + loop_fwd + rev
252
+ @bottom_strand = bot + fwd + loop_rev + rev + 'C'
253
+ end
254
+ end
255
+
256
+ # human readable report
257
+ def report
258
+ report = "### shRNA\n"
259
+ report << "Top strand shRNA (#{@top_strand.length} nt):\n"
260
+ report << " 5'-#{@top_strand.upcase}-3'\n"
261
+ report << "Bottom strand shRNA (#{@bottom_strand.length} nt):\n"
262
+ report << " 3'-#{@bottom_strand.reverse.upcase}-5'\n"
263
+ end
264
+
265
+ end # class ShRNA
266
+
267
+ end # class SiRNA
268
+
269
+ end # module Bio
270
+
271
+
272
+ if __FILE__ == $0
273
+
274
+ seq = Bio::Sequence::NA.new(ARGF.read)
275
+
276
+ sirna = Bio::SiRNA.new(seq)
277
+ pairs = sirna.design # or .design('uitei') or .uitei or .reynolds
278
+
279
+ pairs.each do |pair|
280
+ puts pair.report
281
+
282
+ shrna = Bio::SiRNA::ShRNA.new(pair)
283
+ shrna.design # or .design('BLOCK-iT') or .block_it
284
+ puts shrna.report
285
+
286
+ puts "# as DNA"
287
+ puts shrna.top_strand.dna
288
+ puts shrna.bottom_strand.dna
289
+ end
290
+
291
+ end
292
+
293
+ =begin
294
+
295
+ = ChangeLog
296
+
297
+ 2005/03/21 Itoshi NIKAIDO <itoshi.nikaido@nifty.com>
298
+ Bio::SiRNA#ShRNA_designer method was changed design method.
299
+
300
+ 2004/06/25
301
+ Bio::ShRNA class was added.
302
+
303
+ 2004/06/17 Itoshi NIKAIDO <itoshi.nikaido@nifty.com>
304
+ We can use shRNA loop sequence from piGene document.
305
+
306
+ =end
data/lib/bioruby.rb ADDED
@@ -0,0 +1,34 @@
1
+ #
2
+ # = bioruby.rb - Loading all BioRuby modules and setup for shell programming
3
+ #
4
+ # Copyright:: Copyright (C) 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: bioruby.rb,v 1.1 2005/12/07 05:12:06 k Exp $
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+
29
+ require 'bio/shell'
30
+
31
+ include Bio::Shell
32
+
33
+ Bio::Shell.setup
34
+
@@ -0,0 +1,475 @@
1
+ #!/usr/proj/bioruby/bin/ruby
2
+ #
3
+ # biofetch.rb : BioFetch server (interface to GenomeNet/DBGET via KEGG API)
4
+ #
5
+ # Copyright (C) 2002-2004 KATAYAMA Toshiaki <k@bioruby.org>
6
+ #
7
+ # This program is free software; you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation; either version 2 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with this program; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: biofetch.rb,v 1.16 2005/08/07 10:02:41 k Exp $
22
+ #
23
+
24
+ require 'cgi'
25
+ require 'html/template'
26
+ require 'bio/io/keggapi'
27
+
28
+ MAX_ID_NUM = 50
29
+
30
+
31
+ module BioFetchError
32
+
33
+ def print_text_page(str)
34
+ print "Content-type: text/plain; charset=UTF-8\n\n"
35
+ puts str
36
+ exit
37
+ end
38
+
39
+ def error1(db)
40
+ str = "ERROR 1 Unknown database [#{db}]."
41
+ print_text_page(str)
42
+ end
43
+
44
+ def error2(style)
45
+ str = "ERROR 2 Unknown style [#{style}]."
46
+ print_text_page(str)
47
+ end
48
+
49
+ def error3(format, db)
50
+ str = "ERROR 3 Format [#{format}] not known for database [#{db}]."
51
+ print_text_page(str)
52
+ end
53
+
54
+ def error4(entry_id, db)
55
+ str = "ERROR 4 ID [#{entry_id}] not found in database [#{db}]."
56
+ print_text_page(str)
57
+ end
58
+
59
+ def error5(count)
60
+ str = "ERROR 5 Too many IDs [#{count}]. Max [#{MAX_ID_NUM}] allowed."
61
+ print_text_page(str)
62
+ end
63
+
64
+ def error6(info)
65
+ str = "ERROR 6 Illegal information request [#{info}]."
66
+ print_text_page(str)
67
+ end
68
+
69
+ end
70
+
71
+
72
+
73
+ module KeggAPI
74
+
75
+ include BioFetchError
76
+
77
+ def list_databases
78
+ serv = Bio::KEGG::API.new
79
+ results = serv.list_databases
80
+ results.collect {|x| x.entry_id}
81
+ end
82
+
83
+ def bget(db, id_list, format)
84
+ serv = Bio::KEGG::API.new
85
+ results = ''
86
+ id_list.each do |query_id|
87
+ entry_id = "#{db}:#{query_id}"
88
+ result = serv.get_entries([entry_id])
89
+ if result.empty?
90
+ error4(query_id, db)
91
+ else
92
+ results << result
93
+ end
94
+ end
95
+ return results
96
+ end
97
+
98
+ end
99
+
100
+
101
+
102
+
103
+ class BioFetch
104
+
105
+ include BioFetchError
106
+ include KeggAPI
107
+
108
+ def initialize(db, id_list, style, format)
109
+ check_style(style)
110
+ check_format(format, db)
111
+ check_number_of_id(id_list.length)
112
+ check_dbname(db)
113
+
114
+ if /html/.match(style)
115
+ goto_html_style_page(db, id_list, format)
116
+ end
117
+
118
+ entries = bget(db, id_list, format)
119
+
120
+ if /fasta/.match(format) and entries
121
+ entries = convert_to_fasta_format(entries, db)
122
+ end
123
+
124
+ print_text_page(entries)
125
+ end
126
+
127
+ private
128
+
129
+ def convert_to_fasta_format(str, db)
130
+ require 'bio'
131
+ require 'stringio'
132
+
133
+ fasta = Array.new
134
+
135
+ entries = StringIO.new(str)
136
+ Bio::FlatFile.auto(entries) do |ff|
137
+ ff.each do |entry|
138
+ seq = nil
139
+ if entry.respond_to?(:seq)
140
+ seq = entry.seq
141
+ elsif entry.respond_to?(:aaseq)
142
+ seq = entry.aaseq
143
+ elsif entry.respond_to?(:naseq)
144
+ seq = entry.naseq
145
+ end
146
+ if seq
147
+ entry_id = entry.respond_to?(:entry_id) ? entry.entry_id : ''
148
+ definition = entry.respond_to?(:definition) ? entry.definition : ''
149
+ fasta << seq.to_fasta("#{db}:#{entry_id} #{definition}", 60)
150
+ end
151
+ end
152
+ end
153
+ return fasta.join
154
+ end
155
+
156
+ def goto_html_style_page(db, id_list, format)
157
+ url = "http://www.genome.jp/dbget-bin/www_bget"
158
+ opt = '-f+' if /fasta/.match(format)
159
+ ids = id_list.join('%2B')
160
+ print "Location: #{url}?#{opt}#{db}+#{ids}\n\n"
161
+ exit
162
+ end
163
+
164
+ def check_style(style)
165
+ error2(style) unless /html|raw/.match(style)
166
+ end
167
+
168
+ def check_format(format, db)
169
+ error3(format, db) if format && ! /fasta|default/.match(format)
170
+ end
171
+
172
+ def check_number_of_id(num)
173
+ error5(num) if num > MAX_ID_NUM
174
+ end
175
+
176
+ def check_dbname(db)
177
+ error1(db) unless list_databases.include?(db)
178
+ end
179
+
180
+ end
181
+
182
+
183
+
184
+ class BioFetchInfo
185
+
186
+ include BioFetchError
187
+ include KeggAPI
188
+
189
+ def initialize(info, db)
190
+ @db = db
191
+ begin
192
+ send(info)
193
+ rescue
194
+ error6(info)
195
+ end
196
+ end
197
+
198
+ private
199
+
200
+ def dbs
201
+ str = list_databases.sort.join(' ')
202
+ print_text_page(str)
203
+ end
204
+
205
+ def formats
206
+ fasta = " fasta" if check_fasta_ok
207
+ str = "default#{fasta}"
208
+ print_text_page(str)
209
+ end
210
+
211
+ def maxids
212
+ str = MAX_ID_NUM.to_s
213
+ print_text_page(str)
214
+ end
215
+
216
+ def check_fasta_ok
217
+ # sequence databases supported by Bio::FlatFile.auto
218
+ /genes|gb|genbank|genpept|rs|refseq|emb|sp|swiss|pir/.match(@db)
219
+ end
220
+
221
+ end
222
+
223
+
224
+
225
+ class BioFetchCGI
226
+
227
+ def initialize(cgi)
228
+ @cgi = cgi
229
+ show_page
230
+ end
231
+
232
+ private
233
+
234
+ def show_page
235
+ if info.empty?
236
+ if id_list.empty?
237
+ show_query_page
238
+ else
239
+ show_result_page(db, id_list, style, format)
240
+ end
241
+ else
242
+ show_info_page(info, db)
243
+ end
244
+ end
245
+
246
+ def show_query_page
247
+ html = HTML::Template.new
248
+ html.set_html(DATA.read)
249
+ html.param('max_id_num' => MAX_ID_NUM)
250
+ @cgi.out do
251
+ html.output
252
+ end
253
+ end
254
+
255
+ def show_result_page(db, id_list, style, format)
256
+ BioFetch.new(db, id_list, style, format)
257
+ end
258
+
259
+ def show_info_page(info, db)
260
+ BioFetchInfo.new(info, db)
261
+ end
262
+
263
+ def info
264
+ @cgi['info'].downcase
265
+ end
266
+
267
+ def db
268
+ @cgi['db'].downcase
269
+ end
270
+
271
+ def id_list
272
+ @cgi['id'].split(/\W/) # not only ','
273
+ end
274
+
275
+ def style
276
+ s = @cgi['style'].downcase
277
+ return s.empty? ? "html" : s
278
+ end
279
+
280
+ def format
281
+ f = @cgi['format'].downcase
282
+ return f.empty? ? "default" : f
283
+ end
284
+
285
+ end
286
+
287
+
288
+
289
+ BioFetchCGI.new(CGI.new)
290
+
291
+
292
+
293
+ =begin
294
+
295
+ This program was created during BioHackathon 2002, Tucson and updated
296
+ in Cape Town :)
297
+
298
+ Rewrited in 2004 to use KEGG API as the bioruby.org server left from Kyoto
299
+ University (where DBGET runs) and the old version could not run without
300
+ having internally accessible DBGET server.
301
+
302
+ =end
303
+
304
+
305
+ __END__
306
+
307
+ <HTML>
308
+ <HEAD>
309
+ <LINK href="http://bioruby.org/img/favicon.png" rel="icon" type="image/png">
310
+ <LINK href="http://bioruby.org/css/bioruby.css" rel="stylesheet" type="text/css">
311
+ <TITLE>BioFetch interface to GenomeNet/DBGET</TITLE>
312
+ </HEAD>
313
+
314
+ <BODY bgcolor="#ffffff">
315
+
316
+ <H1>
317
+ <IMG src="http://bioruby.org/img/ruby.png" align="middle">
318
+ BioFetch interface to
319
+ <A href="http://www.genome.jp/dbget/">GenomeNet/DBGET</A>
320
+ </H1>
321
+
322
+ <P>This page allows you to retrieve up to <!var:max_id_num> entries at a time from various up-to-date biological databases.</P>
323
+
324
+ <HR>
325
+
326
+ <FORM METHOD="post" ENCTYPE="application/x-www-form-urlencoded" action="biofetch.rb">
327
+
328
+ <SELECT name="db">
329
+ <OPTION value="genbank">GenBank</OPTION>
330
+ <OPTION value="refseq">RefSeq</OPTION>
331
+ <OPTION value="embl">EMBL</OPTION>
332
+ <OPTION value="swissprot">Swiss-Prot</OPTION>
333
+ <OPTION value="pir">PIR</OPTION>
334
+ <OPTION value="prf">PRF</OPTION>
335
+ <OPTION value="pdb">PDB</OPTION>
336
+ <OPTION value="pdbstr">PDBSTR</OPTION>
337
+ <OPTION value="epd">EPD</OPTION>
338
+ <OPTION value="transfac">TRANSFAC</OPTION>
339
+ <OPTION value="prosite">PROSITE</OPTION>
340
+ <OPTION value="pmd">PMD</OPTION>
341
+ <OPTION value="litdb">LITDB</OPTION>
342
+ <OPTION value="omim">OMIM</OPTION>
343
+ <OPTION value="ligand">KEGG/LIGAND</OPTION>
344
+ <OPTION value="pathway">KEGG/PATHWAY</OPTION>
345
+ <OPTION value="brite">KEGG/BRITE</OPTION>
346
+ <OPTION value="genes">KEGG/GENES</OPTION>
347
+ <OPTION value="genome">KEGG/GENOME</OPTION>
348
+ <OPTION value="linkdb">LinkDB</OPTION>
349
+ <OPTION value="aaindex">AAindex</OPTION>
350
+ </SELECT>
351
+
352
+ <INPUT name="id" size="40" type="text" maxlength="1000">
353
+
354
+ <SELECT name="format">
355
+ <OPTION value="default">Default</OPTION>
356
+ <OPTION value="fasta">Fasta</OPTION>
357
+ </SELECT>
358
+
359
+ <SELECT name="style">
360
+ <OPTION value="raw">Raw</OPTION>
361
+ <OPTION value="html">HTML</OPTION>
362
+ </SELECT>
363
+
364
+ <INPUT type="submit">
365
+
366
+ </FORM>
367
+
368
+ <HR>
369
+
370
+ <H2>Direct access</H2>
371
+
372
+ <P>http://bioruby.org/cgi-bin/biofetch.rb?format=(default|fasta|...);style=(html|raw);db=(genbank|embl|...);id=ID[,ID,ID,...]</P>
373
+ <P>(NOTE: the option separator ';' can be '&')</P>
374
+
375
+ <DL>
376
+ <DT> <U>format</U> (optional)
377
+ <DD> default|fasta|...
378
+
379
+ <DT> <U>style</U> (required)
380
+ <DD> html|raw
381
+
382
+ <DT> <U>db</U> (required)
383
+ <DD> genbank|refseq|embl|swissprot|pir|prf|pdb|pdbstr|epd|transfac|prosite|pmd|litdb|omim|ligand|pathway|brite|genes|genome|linkdb|aaindex|...
384
+
385
+ <DT> <U>id</U> (required)
386
+ <DD> comma separated list of IDs
387
+ </DL>
388
+
389
+ <P>See the <A href="http://obda.open-bio.org/">BioFetch specification</A> for more details.</P>
390
+
391
+ <H2>Server informations</H2>
392
+
393
+ <DL>
394
+ <DT> <A href="?info=dbs">What databases are available?</A>
395
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=dbs
396
+
397
+ <DT> <A href="?info=formats;db=embl">What formats does the database X have?</A>
398
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=formats;db=embl
399
+
400
+ <DT> <A href="?info=maxids">How many entries can be retrieved simultaneously?</A>
401
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=maxids
402
+ </DL>
403
+
404
+ <H2>Examples</H2>
405
+
406
+ <DL>
407
+ <DT> <A href="?format=default;style=raw;db=genbank;id=AJ617376">gb:AJ617376</A> (default/raw)
408
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376
409
+
410
+ <DT> <A href="?format=fasta;style=raw;db=genbank;id=AJ617376">gb:AJ617376</A> (fasta/raw)
411
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=genbank;id=AJ617376
412
+
413
+ <DT> <A href="?format=default;style=html;db=genbank;id=AJ617376">gb:AJ617376</A> (default/html)
414
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=html;db=genbank;id=AJ617376
415
+
416
+ <DT> <A href="?format=default;style=raw;db=genbank;id=AJ617376,AJ617377">gb:AJ617376,AJ617377</A> (default/raw, multiple)
417
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376,AJ617377
418
+
419
+ <DT> <A href="?format=default;style=raw;db=embl;id=BUM">embl:BUM</A> (default/raw)
420
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=embl;id=BUM
421
+
422
+ <DT> <A href="?format=default;style=raw;db=swissprot;id=CYC_BOVIN">sp:CYC_BOVIN</A> (default/raw)
423
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=swissprot;id=CYC_BOVIN
424
+
425
+ <DT> <A href="?format=fasta;style=raw;db=swissprot;id=CYC_BOVIN">sp:CYC_BOVIN</A> (fasta/raw)
426
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=swissprot;id=CYC_BOVIN
427
+
428
+ <DT> <A href="?format=default;style=raw;db=genes;id=b0015">genes:b0015</A> (default/raw)
429
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genes;id=b0015
430
+
431
+ <DT> <A href="?format=default;style=raw;db=prosite;id=PS00028">ps:PS00028</A> (default/raw)
432
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=prosite;id=PS00028
433
+ </DL>
434
+
435
+ <H2>Errors</H2>
436
+
437
+ <DL>
438
+ <DT> <A href="?format=default;style=raw;db=nonexistent;id=AJ617376">Error1</A> sample : DB not found
439
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=nonexistent;id=AJ617376"
440
+
441
+ <DT> <A href="?format=default;style=nonexistent;db=genbank;id=AJ617376">Error2</A> sample : unknown style
442
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=nonexistent;db=genbank;id=AJ617376"
443
+
444
+ <DT> <A href="?format=nonexistent;style=raw;db=genbank;id=AJ617376">Error3</A> sample : unknown format
445
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=nonexistent;style=raw;db=genbank;id=AJ617376"
446
+
447
+ <DT> <A href="?format=default;style=raw;db=genbank;id=nonexistent">Error4</A> sample : ID not found
448
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=nonexistent"
449
+
450
+ <DT> <A href="?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51">Error5</A> sample : too many IDs
451
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
452
+
453
+ <DT> <A href="?info=nonexistent">Error6</A> sample : unknown info
454
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=nonexistent"
455
+ </DL>
456
+
457
+ <H2>Other BioFetch implementations</H2>
458
+
459
+ <UL>
460
+ <LI> <A href="http://www.ebi.ac.uk/cgi-bin/dbfetch">dbfetch at EBI</A>
461
+ </UL>
462
+
463
+ <HR>
464
+
465
+ <DIV align=right>
466
+ <I>
467
+ staff@Bio<span class="ruby">Ruby</span>.org
468
+ </I>
469
+ <BR>
470
+ <BR>
471
+ <A href="http://bioruby.org/"><IMG border=0 src="/img/banner.gif"></A>
472
+ </DIV>
473
+
474
+ </BODY>
475
+ </HTML>