bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,306 @@
1
+ #
2
+ # = bio/util/sirna.rb - Class for designing small inhibitory RNAs
3
+ #
4
+ # Copyright:: Copyright (C) 2004, 2005
5
+ # Itoshi NIKAIDO <dritoshi@gmail.com>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: sirna.rb,v 1.6 2005/11/14 15:44:30 nakao Exp $
9
+ #
10
+ # == Bio::SiRNA - Designing siRNA.
11
+ #
12
+ # This class implements the selection rules described by Kumiko Ui-Tei
13
+ # et al. (2004) and Reynolds et al. (2004).
14
+ #
15
+ # == Example
16
+ #
17
+ # seq = Bio::Sequence::NA.new(ARGF.read)
18
+ #
19
+ # sirna = Bio::SiRNA.new(seq)
20
+ # pairs = sirna.design
21
+ #
22
+ # pairs.each do |pair|
23
+ # puts pair.report
24
+ # shrna = Bio::SiRNA::ShRNA.new(pair)
25
+ # shrna.design
26
+ # puts shrna.report
27
+ #
28
+ # puts shrna.top_strand.dna
29
+ # puts shrna.bottom_strand.dna
30
+ # end
31
+ #
32
+ # == References
33
+ #
34
+ # * Kumiko Ui-Tei et al. Guidelines for the selection of highly effective
35
+ # siRNA sequences for mammalian and chick RNA interference.
36
+ # Nucl. Acids. Res. 2004 32: 936-948.
37
+ #
38
+ # * Angela Reynolds et al. Rational siRNA design for RNA interference.
39
+ # Nature Biotech. 2004 22: 326-330.
40
+ #
41
+ #--
42
+ #
43
+ # This library is free software; you can redistribute it and/or
44
+ # modify it under the terms of the GNU Lesser General Public
45
+ # License as published by the Free Software Foundation; either
46
+ # version 2 of the License, or (at your option) any later version.
47
+ #
48
+ # This library is distributed in the hope that it will be useful,
49
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
50
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
51
+ # Lesser General Public License for more details.
52
+ #
53
+ # You should have received a copy of the GNU Lesser General Public
54
+ # License along with this library; if not, write to the Free Software
55
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
56
+ #
57
+ #++
58
+ #
59
+
60
+ require 'bio/sequence'
61
+
62
+ module Bio
63
+
64
+ # = Bio::SiRNA
65
+ # Designing siRNA.
66
+ #
67
+ # This class implements the selection rules described by Kumiko Ui-Tei
68
+ # et al. (2004) and Reynolds et al. (2004).
69
+ class SiRNA
70
+
71
+ # A parameter of size of antisense.
72
+ attr_accessor :antisense_size
73
+
74
+ # A parameter of maximal %GC.
75
+ attr_accessor :max_gc_percent
76
+
77
+ # A parameter of minimum %GC.
78
+ attr_accessor :min_gc_percent
79
+
80
+ # Input is a Bio::Sequence::NA object (the target sequence).
81
+ # Output is a list of Bio::SiRNA::Pair object.
82
+ def initialize(seq, antisense_size = 21, max_gc_percent = 60.0, min_gc_percent = 40.0)
83
+ @seq = seq.rna!
84
+ @pairs = Array.new
85
+ @antisense_size = antisense_size
86
+ @max_gc_percent = max_gc_percent
87
+ @min_gc_percent = min_gc_percent
88
+ end
89
+
90
+ # Ui-Tei's rule.
91
+ def uitei?(target)
92
+ return false unless /^.{2}[GC]/i =~ target
93
+ return false unless /[AU].{2}$/i =~ target
94
+ return false if /[GC]{9}/i =~ target
95
+
96
+ one_third = target.size * 1 / 3
97
+ start_pos = @target_size - one_third - 1
98
+ remain_seq = target.subseq(start_pos, @target_size - 2)
99
+ gc_number = remain_seq.scan(/[AU]/i).size
100
+ return false if gc_number < 5
101
+
102
+ return true
103
+ end
104
+
105
+ # Reynolds' rule.
106
+ def reynolds?(target)
107
+ return false if /[GC]{9}/i =~ target
108
+ return false unless /^.{4}A.{6}U.{2}[AUC].{5}[AU].{2}$/i =~ target
109
+ return true
110
+ end
111
+
112
+ # same as design('uitei').
113
+ def uitei
114
+ design('uitei')
115
+ end
116
+
117
+ # same as design('reynolds').
118
+ def reynolds
119
+ design('reynolds')
120
+ end
121
+
122
+ # rule can be one of 'uitei' (default) and 'reynolds'.
123
+ def design(rule = 'uitei')
124
+ @target_size = @antisense_size + 2
125
+
126
+ target_start = 0
127
+ @seq.window_search(@target_size) do |target|
128
+ antisense = target.subseq(1, @target_size - 2).complement.rna
129
+ sense = target.subseq(3, @target_size)
130
+
131
+ target_start += 1
132
+ target_stop = target_start + @target_size
133
+
134
+ antisense_gc_percent = antisense.gc_percent
135
+ next if antisense_gc_percent > @max_gc_percent
136
+ next if antisense_gc_percent < @min_gc_percent
137
+
138
+ case rule
139
+ when 'uitei'
140
+ next unless uitei?(target)
141
+ when 'reynolds'
142
+ next unless reynolds?(target)
143
+ else
144
+ raise NotImplementedError
145
+ end
146
+
147
+ pair = Bio::SiRNA::Pair.new(target, sense, antisense, target_start, target_stop, rule, antisense_gc_percent)
148
+ @pairs.push(pair)
149
+ end
150
+ return @pairs
151
+ end
152
+
153
+ # = Bio::SiRNA::Pair
154
+ class Pair
155
+
156
+ attr_accessor :target
157
+
158
+ attr_accessor :sense
159
+
160
+ attr_accessor :antisense
161
+
162
+ attr_accessor :start
163
+
164
+ attr_accessor :stop
165
+
166
+ attr_accessor :rule
167
+
168
+ attr_accessor :gc_percent
169
+
170
+ def initialize(target, sense, antisense, start, stop, rule, gc_percent)
171
+ @target = target
172
+ @sense = sense
173
+ @antisense = antisense
174
+ @start = start
175
+ @stop = stop
176
+ @rule = rule
177
+ @gc_percent = gc_percent
178
+ end
179
+
180
+ # human readable report
181
+ def report
182
+ report = "### siRNA\n"
183
+ report << 'Start: ' + @start.to_s + "\n"
184
+ report << 'Stop: ' + @stop.to_s + "\n"
185
+ report << 'Rule: ' + @rule.to_s + "\n"
186
+ report << 'GC %: ' + @gc_percent.to_s + "\n"
187
+ report << 'Target: ' + @target.upcase + "\n"
188
+ report << 'Sense: ' + ' ' + @sense.upcase + "\n"
189
+ report << 'Antisense: ' + @antisense.reverse.upcase + "\n"
190
+ end
191
+
192
+ # computer parsable report
193
+ #def to_s
194
+ # [ @antisense, @start, @stop ].join("\t")
195
+ #end
196
+
197
+ end # class Pair
198
+
199
+
200
+ # = Bio::SiRNA::ShRNA
201
+ # Designing shRNA.
202
+ class ShRNA
203
+
204
+ # aBio::Sequence::NA
205
+ attr_accessor :top_strand
206
+
207
+ # aBio::Sequence::NA
208
+ attr_accessor :bottom_strand
209
+
210
+ # Input is a Bio::SiRNA::Pair object (the target sequence).
211
+ def initialize(pair)
212
+ @pair = pair
213
+ end
214
+
215
+ # only the 'BLOCK-iT' rule is implemented for now.
216
+ def design(method = 'BLOCK-iT')
217
+ case method
218
+ when 'BLOCK-iT'
219
+ block_it
220
+ else
221
+ raise NotImplementedError
222
+ end
223
+ end
224
+
225
+
226
+ # same as design('BLOCK-iT').
227
+ # method can be one of 'piGENE' (default) and 'BLOCK-iT'.
228
+ def block_it(method = 'piGENE')
229
+ top = Bio::Sequence::NA.new('CACC') # top_strand_shrna_overhang
230
+ bot = Bio::Sequence::NA.new('AAAA') # bottom_strand_shrna_overhang
231
+ fwd = @pair.sense
232
+ rev = @pair.sense.complement
233
+
234
+ case method
235
+ when 'BLOCK-iT'
236
+ # From BLOCK-iT's manual
237
+ loop_fwd = Bio::Sequence::NA.new('CGAA')
238
+ loop_rev = loop_fwd.complement
239
+ when 'piGENE'
240
+ # From piGENE document
241
+ loop_fwd = Bio::Sequence::NA.new('GTGTGCTGTCC')
242
+ loop_rev = loop_fwd.complement
243
+ else
244
+ raise NotImplementedError
245
+ end
246
+
247
+ if /^G/i =~ fwd
248
+ @top_strand = top + fwd + loop_fwd + rev
249
+ @bottom_strand = bot + fwd + loop_rev + rev
250
+ else
251
+ @top_strand = top + 'G' + fwd + loop_fwd + rev
252
+ @bottom_strand = bot + fwd + loop_rev + rev + 'C'
253
+ end
254
+ end
255
+
256
+ # human readable report
257
+ def report
258
+ report = "### shRNA\n"
259
+ report << "Top strand shRNA (#{@top_strand.length} nt):\n"
260
+ report << " 5'-#{@top_strand.upcase}-3'\n"
261
+ report << "Bottom strand shRNA (#{@bottom_strand.length} nt):\n"
262
+ report << " 3'-#{@bottom_strand.reverse.upcase}-5'\n"
263
+ end
264
+
265
+ end # class ShRNA
266
+
267
+ end # class SiRNA
268
+
269
+ end # module Bio
270
+
271
+
272
+ if __FILE__ == $0
273
+
274
+ seq = Bio::Sequence::NA.new(ARGF.read)
275
+
276
+ sirna = Bio::SiRNA.new(seq)
277
+ pairs = sirna.design # or .design('uitei') or .uitei or .reynolds
278
+
279
+ pairs.each do |pair|
280
+ puts pair.report
281
+
282
+ shrna = Bio::SiRNA::ShRNA.new(pair)
283
+ shrna.design # or .design('BLOCK-iT') or .block_it
284
+ puts shrna.report
285
+
286
+ puts "# as DNA"
287
+ puts shrna.top_strand.dna
288
+ puts shrna.bottom_strand.dna
289
+ end
290
+
291
+ end
292
+
293
+ =begin
294
+
295
+ = ChangeLog
296
+
297
+ 2005/03/21 Itoshi NIKAIDO <itoshi.nikaido@nifty.com>
298
+ Bio::SiRNA#ShRNA_designer method was changed design method.
299
+
300
+ 2004/06/25
301
+ Bio::ShRNA class was added.
302
+
303
+ 2004/06/17 Itoshi NIKAIDO <itoshi.nikaido@nifty.com>
304
+ We can use shRNA loop sequence from piGene document.
305
+
306
+ =end
data/lib/bioruby.rb ADDED
@@ -0,0 +1,34 @@
1
+ #
2
+ # = bioruby.rb - Loading all BioRuby modules and setup for shell programming
3
+ #
4
+ # Copyright:: Copyright (C) 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: bioruby.rb,v 1.1 2005/12/07 05:12:06 k Exp $
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+
29
+ require 'bio/shell'
30
+
31
+ include Bio::Shell
32
+
33
+ Bio::Shell.setup
34
+
@@ -0,0 +1,475 @@
1
+ #!/usr/proj/bioruby/bin/ruby
2
+ #
3
+ # biofetch.rb : BioFetch server (interface to GenomeNet/DBGET via KEGG API)
4
+ #
5
+ # Copyright (C) 2002-2004 KATAYAMA Toshiaki <k@bioruby.org>
6
+ #
7
+ # This program is free software; you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation; either version 2 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with this program; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: biofetch.rb,v 1.16 2005/08/07 10:02:41 k Exp $
22
+ #
23
+
24
+ require 'cgi'
25
+ require 'html/template'
26
+ require 'bio/io/keggapi'
27
+
28
+ MAX_ID_NUM = 50
29
+
30
+
31
+ module BioFetchError
32
+
33
+ def print_text_page(str)
34
+ print "Content-type: text/plain; charset=UTF-8\n\n"
35
+ puts str
36
+ exit
37
+ end
38
+
39
+ def error1(db)
40
+ str = "ERROR 1 Unknown database [#{db}]."
41
+ print_text_page(str)
42
+ end
43
+
44
+ def error2(style)
45
+ str = "ERROR 2 Unknown style [#{style}]."
46
+ print_text_page(str)
47
+ end
48
+
49
+ def error3(format, db)
50
+ str = "ERROR 3 Format [#{format}] not known for database [#{db}]."
51
+ print_text_page(str)
52
+ end
53
+
54
+ def error4(entry_id, db)
55
+ str = "ERROR 4 ID [#{entry_id}] not found in database [#{db}]."
56
+ print_text_page(str)
57
+ end
58
+
59
+ def error5(count)
60
+ str = "ERROR 5 Too many IDs [#{count}]. Max [#{MAX_ID_NUM}] allowed."
61
+ print_text_page(str)
62
+ end
63
+
64
+ def error6(info)
65
+ str = "ERROR 6 Illegal information request [#{info}]."
66
+ print_text_page(str)
67
+ end
68
+
69
+ end
70
+
71
+
72
+
73
+ module KeggAPI
74
+
75
+ include BioFetchError
76
+
77
+ def list_databases
78
+ serv = Bio::KEGG::API.new
79
+ results = serv.list_databases
80
+ results.collect {|x| x.entry_id}
81
+ end
82
+
83
+ def bget(db, id_list, format)
84
+ serv = Bio::KEGG::API.new
85
+ results = ''
86
+ id_list.each do |query_id|
87
+ entry_id = "#{db}:#{query_id}"
88
+ result = serv.get_entries([entry_id])
89
+ if result.empty?
90
+ error4(query_id, db)
91
+ else
92
+ results << result
93
+ end
94
+ end
95
+ return results
96
+ end
97
+
98
+ end
99
+
100
+
101
+
102
+
103
+ class BioFetch
104
+
105
+ include BioFetchError
106
+ include KeggAPI
107
+
108
+ def initialize(db, id_list, style, format)
109
+ check_style(style)
110
+ check_format(format, db)
111
+ check_number_of_id(id_list.length)
112
+ check_dbname(db)
113
+
114
+ if /html/.match(style)
115
+ goto_html_style_page(db, id_list, format)
116
+ end
117
+
118
+ entries = bget(db, id_list, format)
119
+
120
+ if /fasta/.match(format) and entries
121
+ entries = convert_to_fasta_format(entries, db)
122
+ end
123
+
124
+ print_text_page(entries)
125
+ end
126
+
127
+ private
128
+
129
+ def convert_to_fasta_format(str, db)
130
+ require 'bio'
131
+ require 'stringio'
132
+
133
+ fasta = Array.new
134
+
135
+ entries = StringIO.new(str)
136
+ Bio::FlatFile.auto(entries) do |ff|
137
+ ff.each do |entry|
138
+ seq = nil
139
+ if entry.respond_to?(:seq)
140
+ seq = entry.seq
141
+ elsif entry.respond_to?(:aaseq)
142
+ seq = entry.aaseq
143
+ elsif entry.respond_to?(:naseq)
144
+ seq = entry.naseq
145
+ end
146
+ if seq
147
+ entry_id = entry.respond_to?(:entry_id) ? entry.entry_id : ''
148
+ definition = entry.respond_to?(:definition) ? entry.definition : ''
149
+ fasta << seq.to_fasta("#{db}:#{entry_id} #{definition}", 60)
150
+ end
151
+ end
152
+ end
153
+ return fasta.join
154
+ end
155
+
156
+ def goto_html_style_page(db, id_list, format)
157
+ url = "http://www.genome.jp/dbget-bin/www_bget"
158
+ opt = '-f+' if /fasta/.match(format)
159
+ ids = id_list.join('%2B')
160
+ print "Location: #{url}?#{opt}#{db}+#{ids}\n\n"
161
+ exit
162
+ end
163
+
164
+ def check_style(style)
165
+ error2(style) unless /html|raw/.match(style)
166
+ end
167
+
168
+ def check_format(format, db)
169
+ error3(format, db) if format && ! /fasta|default/.match(format)
170
+ end
171
+
172
+ def check_number_of_id(num)
173
+ error5(num) if num > MAX_ID_NUM
174
+ end
175
+
176
+ def check_dbname(db)
177
+ error1(db) unless list_databases.include?(db)
178
+ end
179
+
180
+ end
181
+
182
+
183
+
184
+ class BioFetchInfo
185
+
186
+ include BioFetchError
187
+ include KeggAPI
188
+
189
+ def initialize(info, db)
190
+ @db = db
191
+ begin
192
+ send(info)
193
+ rescue
194
+ error6(info)
195
+ end
196
+ end
197
+
198
+ private
199
+
200
+ def dbs
201
+ str = list_databases.sort.join(' ')
202
+ print_text_page(str)
203
+ end
204
+
205
+ def formats
206
+ fasta = " fasta" if check_fasta_ok
207
+ str = "default#{fasta}"
208
+ print_text_page(str)
209
+ end
210
+
211
+ def maxids
212
+ str = MAX_ID_NUM.to_s
213
+ print_text_page(str)
214
+ end
215
+
216
+ def check_fasta_ok
217
+ # sequence databases supported by Bio::FlatFile.auto
218
+ /genes|gb|genbank|genpept|rs|refseq|emb|sp|swiss|pir/.match(@db)
219
+ end
220
+
221
+ end
222
+
223
+
224
+
225
+ class BioFetchCGI
226
+
227
+ def initialize(cgi)
228
+ @cgi = cgi
229
+ show_page
230
+ end
231
+
232
+ private
233
+
234
+ def show_page
235
+ if info.empty?
236
+ if id_list.empty?
237
+ show_query_page
238
+ else
239
+ show_result_page(db, id_list, style, format)
240
+ end
241
+ else
242
+ show_info_page(info, db)
243
+ end
244
+ end
245
+
246
+ def show_query_page
247
+ html = HTML::Template.new
248
+ html.set_html(DATA.read)
249
+ html.param('max_id_num' => MAX_ID_NUM)
250
+ @cgi.out do
251
+ html.output
252
+ end
253
+ end
254
+
255
+ def show_result_page(db, id_list, style, format)
256
+ BioFetch.new(db, id_list, style, format)
257
+ end
258
+
259
+ def show_info_page(info, db)
260
+ BioFetchInfo.new(info, db)
261
+ end
262
+
263
+ def info
264
+ @cgi['info'].downcase
265
+ end
266
+
267
+ def db
268
+ @cgi['db'].downcase
269
+ end
270
+
271
+ def id_list
272
+ @cgi['id'].split(/\W/) # not only ','
273
+ end
274
+
275
+ def style
276
+ s = @cgi['style'].downcase
277
+ return s.empty? ? "html" : s
278
+ end
279
+
280
+ def format
281
+ f = @cgi['format'].downcase
282
+ return f.empty? ? "default" : f
283
+ end
284
+
285
+ end
286
+
287
+
288
+
289
+ BioFetchCGI.new(CGI.new)
290
+
291
+
292
+
293
+ =begin
294
+
295
+ This program was created during BioHackathon 2002, Tucson and updated
296
+ in Cape Town :)
297
+
298
+ Rewrited in 2004 to use KEGG API as the bioruby.org server left from Kyoto
299
+ University (where DBGET runs) and the old version could not run without
300
+ having internally accessible DBGET server.
301
+
302
+ =end
303
+
304
+
305
+ __END__
306
+
307
+ <HTML>
308
+ <HEAD>
309
+ <LINK href="http://bioruby.org/img/favicon.png" rel="icon" type="image/png">
310
+ <LINK href="http://bioruby.org/css/bioruby.css" rel="stylesheet" type="text/css">
311
+ <TITLE>BioFetch interface to GenomeNet/DBGET</TITLE>
312
+ </HEAD>
313
+
314
+ <BODY bgcolor="#ffffff">
315
+
316
+ <H1>
317
+ <IMG src="http://bioruby.org/img/ruby.png" align="middle">
318
+ BioFetch interface to
319
+ <A href="http://www.genome.jp/dbget/">GenomeNet/DBGET</A>
320
+ </H1>
321
+
322
+ <P>This page allows you to retrieve up to <!var:max_id_num> entries at a time from various up-to-date biological databases.</P>
323
+
324
+ <HR>
325
+
326
+ <FORM METHOD="post" ENCTYPE="application/x-www-form-urlencoded" action="biofetch.rb">
327
+
328
+ <SELECT name="db">
329
+ <OPTION value="genbank">GenBank</OPTION>
330
+ <OPTION value="refseq">RefSeq</OPTION>
331
+ <OPTION value="embl">EMBL</OPTION>
332
+ <OPTION value="swissprot">Swiss-Prot</OPTION>
333
+ <OPTION value="pir">PIR</OPTION>
334
+ <OPTION value="prf">PRF</OPTION>
335
+ <OPTION value="pdb">PDB</OPTION>
336
+ <OPTION value="pdbstr">PDBSTR</OPTION>
337
+ <OPTION value="epd">EPD</OPTION>
338
+ <OPTION value="transfac">TRANSFAC</OPTION>
339
+ <OPTION value="prosite">PROSITE</OPTION>
340
+ <OPTION value="pmd">PMD</OPTION>
341
+ <OPTION value="litdb">LITDB</OPTION>
342
+ <OPTION value="omim">OMIM</OPTION>
343
+ <OPTION value="ligand">KEGG/LIGAND</OPTION>
344
+ <OPTION value="pathway">KEGG/PATHWAY</OPTION>
345
+ <OPTION value="brite">KEGG/BRITE</OPTION>
346
+ <OPTION value="genes">KEGG/GENES</OPTION>
347
+ <OPTION value="genome">KEGG/GENOME</OPTION>
348
+ <OPTION value="linkdb">LinkDB</OPTION>
349
+ <OPTION value="aaindex">AAindex</OPTION>
350
+ </SELECT>
351
+
352
+ <INPUT name="id" size="40" type="text" maxlength="1000">
353
+
354
+ <SELECT name="format">
355
+ <OPTION value="default">Default</OPTION>
356
+ <OPTION value="fasta">Fasta</OPTION>
357
+ </SELECT>
358
+
359
+ <SELECT name="style">
360
+ <OPTION value="raw">Raw</OPTION>
361
+ <OPTION value="html">HTML</OPTION>
362
+ </SELECT>
363
+
364
+ <INPUT type="submit">
365
+
366
+ </FORM>
367
+
368
+ <HR>
369
+
370
+ <H2>Direct access</H2>
371
+
372
+ <P>http://bioruby.org/cgi-bin/biofetch.rb?format=(default|fasta|...);style=(html|raw);db=(genbank|embl|...);id=ID[,ID,ID,...]</P>
373
+ <P>(NOTE: the option separator ';' can be '&')</P>
374
+
375
+ <DL>
376
+ <DT> <U>format</U> (optional)
377
+ <DD> default|fasta|...
378
+
379
+ <DT> <U>style</U> (required)
380
+ <DD> html|raw
381
+
382
+ <DT> <U>db</U> (required)
383
+ <DD> genbank|refseq|embl|swissprot|pir|prf|pdb|pdbstr|epd|transfac|prosite|pmd|litdb|omim|ligand|pathway|brite|genes|genome|linkdb|aaindex|...
384
+
385
+ <DT> <U>id</U> (required)
386
+ <DD> comma separated list of IDs
387
+ </DL>
388
+
389
+ <P>See the <A href="http://obda.open-bio.org/">BioFetch specification</A> for more details.</P>
390
+
391
+ <H2>Server informations</H2>
392
+
393
+ <DL>
394
+ <DT> <A href="?info=dbs">What databases are available?</A>
395
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=dbs
396
+
397
+ <DT> <A href="?info=formats;db=embl">What formats does the database X have?</A>
398
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=formats;db=embl
399
+
400
+ <DT> <A href="?info=maxids">How many entries can be retrieved simultaneously?</A>
401
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=maxids
402
+ </DL>
403
+
404
+ <H2>Examples</H2>
405
+
406
+ <DL>
407
+ <DT> <A href="?format=default;style=raw;db=genbank;id=AJ617376">gb:AJ617376</A> (default/raw)
408
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376
409
+
410
+ <DT> <A href="?format=fasta;style=raw;db=genbank;id=AJ617376">gb:AJ617376</A> (fasta/raw)
411
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=genbank;id=AJ617376
412
+
413
+ <DT> <A href="?format=default;style=html;db=genbank;id=AJ617376">gb:AJ617376</A> (default/html)
414
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=html;db=genbank;id=AJ617376
415
+
416
+ <DT> <A href="?format=default;style=raw;db=genbank;id=AJ617376,AJ617377">gb:AJ617376,AJ617377</A> (default/raw, multiple)
417
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376,AJ617377
418
+
419
+ <DT> <A href="?format=default;style=raw;db=embl;id=BUM">embl:BUM</A> (default/raw)
420
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=embl;id=BUM
421
+
422
+ <DT> <A href="?format=default;style=raw;db=swissprot;id=CYC_BOVIN">sp:CYC_BOVIN</A> (default/raw)
423
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=swissprot;id=CYC_BOVIN
424
+
425
+ <DT> <A href="?format=fasta;style=raw;db=swissprot;id=CYC_BOVIN">sp:CYC_BOVIN</A> (fasta/raw)
426
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=swissprot;id=CYC_BOVIN
427
+
428
+ <DT> <A href="?format=default;style=raw;db=genes;id=b0015">genes:b0015</A> (default/raw)
429
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genes;id=b0015
430
+
431
+ <DT> <A href="?format=default;style=raw;db=prosite;id=PS00028">ps:PS00028</A> (default/raw)
432
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=prosite;id=PS00028
433
+ </DL>
434
+
435
+ <H2>Errors</H2>
436
+
437
+ <DL>
438
+ <DT> <A href="?format=default;style=raw;db=nonexistent;id=AJ617376">Error1</A> sample : DB not found
439
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=nonexistent;id=AJ617376"
440
+
441
+ <DT> <A href="?format=default;style=nonexistent;db=genbank;id=AJ617376">Error2</A> sample : unknown style
442
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=nonexistent;db=genbank;id=AJ617376"
443
+
444
+ <DT> <A href="?format=nonexistent;style=raw;db=genbank;id=AJ617376">Error3</A> sample : unknown format
445
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=nonexistent;style=raw;db=genbank;id=AJ617376"
446
+
447
+ <DT> <A href="?format=default;style=raw;db=genbank;id=nonexistent">Error4</A> sample : ID not found
448
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=nonexistent"
449
+
450
+ <DT> <A href="?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51">Error5</A> sample : too many IDs
451
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
452
+
453
+ <DT> <A href="?info=nonexistent">Error6</A> sample : unknown info
454
+ <DD> http://bioruby.org/cgi-bin/biofetch.rb?info=nonexistent"
455
+ </DL>
456
+
457
+ <H2>Other BioFetch implementations</H2>
458
+
459
+ <UL>
460
+ <LI> <A href="http://www.ebi.ac.uk/cgi-bin/dbfetch">dbfetch at EBI</A>
461
+ </UL>
462
+
463
+ <HR>
464
+
465
+ <DIV align=right>
466
+ <I>
467
+ staff@Bio<span class="ruby">Ruby</span>.org
468
+ </I>
469
+ <BR>
470
+ <BR>
471
+ <A href="http://bioruby.org/"><IMG border=0 src="/img/banner.gif"></A>
472
+ </DIV>
473
+
474
+ </BODY>
475
+ </HTML>