bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,228 @@
1
+ #
2
+ # = bio/shell/plugin/codon.rb - plugin for the codon table
3
+ #
4
+ # Copyright:: Copyright (C) 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: codon.rb,v 1.12 2005/12/19 02:34:24 k Exp $
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+
29
+ module Bio::Shell
30
+
31
+ class ColoredCodonTable
32
+
33
+ @@properties = {
34
+ :basic => %w( H K R ),
35
+ :polar => %w( S T Y Q N S ),
36
+ :acidic => %w( D E ),
37
+ :nonpolar => %w( F L I M V P A C W G ),
38
+ :stop => %w( * ),
39
+ }
40
+
41
+ def initialize(number, cuhash = nil)
42
+ @aacode = Bio::AminoAcid.names
43
+ @table = Bio::CodonTable[number]
44
+ @number = number
45
+ @cuhash = cuhash
46
+ setup_colors
47
+ if Bio::Shell.config[:color]
48
+ generate_colored_text
49
+ else
50
+ generate_mono_text
51
+ end
52
+ end
53
+ attr_reader :table
54
+
55
+ def setup_colors
56
+ esc_seq = Bio::Shell.esc_seq
57
+
58
+ @colors = {
59
+ :text => esc_seq[:none],
60
+ :aa => esc_seq[:green],
61
+ :start => esc_seq[:red],
62
+ :stop => esc_seq[:red],
63
+ :basic => esc_seq[:cyan],
64
+ :polar => esc_seq[:blue],
65
+ :acidic => esc_seq[:magenta],
66
+ :nonpolar => esc_seq[:yellow],
67
+ }
68
+ end
69
+
70
+ def generate_mono_text
71
+ @table.each do |codon, aa|
72
+ if aa == '*'
73
+ code = "STOP"
74
+ aa = '' unless @cuhash
75
+ else
76
+ code = @aacode[aa]
77
+ end
78
+ if @cuhash
79
+ percent = @cuhash[codon].to_s.rjust(6)
80
+ eval("@#{codon} = '#{aa}#{percent}'")
81
+ else
82
+ eval("@#{codon} = ' #{code} #{aa} '")
83
+ end
84
+ end
85
+
86
+ @hydrophilic = [
87
+ @@properties[:basic].join(" "), "(basic),",
88
+ @@properties[:polar].join(" "), "(polar),",
89
+ @@properties[:acidic].join(" "), "(acidic)",
90
+ ].join(" ")
91
+ @hydrophobic = @@properties[:nonpolar].join(" ") + " (nonpolar)"
92
+ end
93
+
94
+ def generate_colored_text
95
+ @table.each do |codon, aa|
96
+ property, = @@properties.detect {|key, list| list.include?(aa)}
97
+
98
+ if aa == '*'
99
+ color_code = "#{@colors[:stop]}STOP"
100
+ if @cuhash
101
+ color_aa = "#{@colors[:stop]}#{aa}"
102
+ else
103
+ color_aa = ''
104
+ end
105
+ else
106
+ color_code = "#{@colors[property]}#{@aacode[aa]}"
107
+ if @table.start_codon?(codon)
108
+ if @cuhash
109
+ color_aa = "#{@colors[:aa]}#{aa}"
110
+ else
111
+ color_aa = "#{@colors[:start]}#{aa}"
112
+ end
113
+ else
114
+ if @cuhash
115
+ color_aa = "#{@colors[property]}#{aa}"
116
+ else
117
+ color_aa = "#{@colors[:aa]}#{aa}"
118
+ end
119
+ end
120
+ end
121
+
122
+ if @cuhash
123
+ percent = @cuhash[codon].to_s.rjust(6)
124
+ eval("@#{codon} = '#{color_aa}#{@colors[:text]}#{percent}'")
125
+ else
126
+ eval("@#{codon} = ' #{color_code} #{color_aa}#{@colors[:text]} '")
127
+ end
128
+ end
129
+
130
+ @hydrophilic = [
131
+ "#{@colors[:basic]}basic#{@colors[:text]},",
132
+ "#{@colors[:polar]}polar#{@colors[:text]},",
133
+ "#{@colors[:acidic]}acidic#{@colors[:text]}"
134
+ ].join(" ")
135
+ @hydrophobic = "#{@colors[:nonpolar]}nonpolar"
136
+ end
137
+
138
+ def output
139
+ header = <<-END
140
+ #
141
+ # = Codon table #{@number} : #{@table.definition}
142
+ #
143
+ # hydrophilic: #{@hydrophilic}
144
+ # hydrophobic: #{@hydrophobic}
145
+ END
146
+ table = <<-END
147
+ #
148
+ # *---------------------------------------------*
149
+ # | | 2nd | |
150
+ # | 1st |-------------------------------| 3rd |
151
+ # | | U | C | A | G | |
152
+ # |-------+-------+-------+-------+-------+-----|
153
+ # | U U |#{@ttt}|#{@tct}|#{@tat}|#{@tgt}| u |
154
+ # | U U |#{@ttc}|#{@tcc}|#{@tac}|#{@tgc}| c |
155
+ # | U U |#{@tta}|#{@tca}|#{@taa}|#{@tga}| a |
156
+ # | UUU |#{@ttg}|#{@tcg}|#{@tag}|#{@tgg}| g |
157
+ # |-------+-------+-------+-------+-------+-----|
158
+ # | CCCC |#{@ctt}|#{@cct}|#{@cat}|#{@cgt}| u |
159
+ # | C |#{@ctc}|#{@ccc}|#{@cac}|#{@cgc}| c |
160
+ # | C |#{@cta}|#{@cca}|#{@caa}|#{@cga}| a |
161
+ # | CCCC |#{@ctg}|#{@ccg}|#{@cag}|#{@cgg}| g |
162
+ # |-------+-------+-------+-------+-------+-----|
163
+ # | A |#{@att}|#{@act}|#{@aat}|#{@agt}| u |
164
+ # | A A |#{@atc}|#{@acc}|#{@aac}|#{@agc}| c |
165
+ # | AAAAA |#{@ata}|#{@aca}|#{@aaa}|#{@aga}| a |
166
+ # | A A |#{@atg}|#{@acg}|#{@aag}|#{@agg}| g |
167
+ # |-------+-------+-------+-------+-------+-----|
168
+ # | GGGG |#{@gtt}|#{@gct}|#{@gat}|#{@ggt}| u |
169
+ # | G |#{@gtc}|#{@gcc}|#{@gac}|#{@ggc}| c |
170
+ # | G GGG |#{@gta}|#{@gca}|#{@gaa}|#{@gga}| a |
171
+ # | GG G |#{@gtg}|#{@gcg}|#{@gag}|#{@ggg}| g |
172
+ # *---------------------------------------------*
173
+ #
174
+ END
175
+ if @cuhash
176
+ text = table
177
+ else
178
+ text = header + table
179
+ end
180
+ if Bio::Shell.config[:color]
181
+ text.gsub(/^\s+#/, @colors[:text])
182
+ else
183
+ text.gsub(/^\s+#/, '')
184
+ end
185
+ end
186
+
187
+ end
188
+
189
+ private
190
+
191
+ def codontable(num = 1, codon_usage = nil)
192
+ cct = ColoredCodonTable.new(num, codon_usage)
193
+ if codon_usage
194
+ return cct
195
+ else
196
+ puts cct.output
197
+ return cct.table
198
+ end
199
+ end
200
+
201
+ def codontables
202
+ tables = Bio::CodonTable::DEFINITIONS
203
+ tables.sort.each do |i, definition|
204
+ puts "#{i}\t#{definition}"
205
+ end
206
+ return tables
207
+ end
208
+
209
+ def aminoacids
210
+ names = Bio::AminoAcid.names
211
+ names.sort.each do |aa, code|
212
+ if aa.length == 1
213
+ puts "#{aa}\t#{code}\t#{names[code]}"
214
+ end
215
+ end
216
+ return names
217
+ end
218
+
219
+ def nucleicacids
220
+ names = Bio::NucleicAcid.names
221
+ %w(a t g c u r y w s k m b v h d n).each do |base|
222
+ puts "#{base}\t#{names[base]}\t#{names[base.upcase]}"
223
+ end
224
+ return names
225
+ end
226
+
227
+ end
228
+
@@ -0,0 +1,85 @@
1
+ #
2
+ # = bio/shell/plugin/entry.rb - extract entry and sequence
3
+ #
4
+ # Copyright:: Copyright (C) 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: entry.rb,v 1.4 2005/12/07 05:12:07 k Exp $
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+
29
+ module Bio::Shell
30
+
31
+ private
32
+
33
+ # Obtain a Bio::Sequence::NA (DNA) or a Bio::Sequence::AA (Amino Acid)
34
+ # sequence from
35
+ # * String -- "atgcatgc" or "MQKKP"
36
+ # * IO -- io = IO.popen("gzip -dc db.gz") (first entry only)
37
+ # * "filename" -- "gbvrl.gbk" (first entry only)
38
+ # * "db:entry" -- "embl:BUM" (entry is retrieved by the ent method)
39
+ def seq(arg)
40
+ seq = ""
41
+ if arg.kind_of?(Bio::Sequence)
42
+ seq = arg
43
+ elsif arg.respond_to?(:gets) or File.exists?(arg)
44
+ ent = flatauto(arg)
45
+ elsif arg[/:/]
46
+ str = ent(arg)
47
+ ent = flatparse(str)
48
+ else
49
+ tmp = arg
50
+ end
51
+
52
+ if ent.respond_to?(:seq)
53
+ tmp = ent.seq
54
+ elsif ent.respond_to?(:naseq)
55
+ seq = ent.naseq
56
+ elsif ent.respond_to?(:aaseq)
57
+ seq = ent.aaseq
58
+ end
59
+
60
+ if tmp and tmp.is_a?(String) and not tmp.empty?
61
+ seq = Bio::Sequence.auto(tmp)
62
+ end
63
+ return seq
64
+ end
65
+
66
+ # Obtain a database entry from
67
+ # * IO -- IO object (first entry only)
68
+ # * "filename" -- local file (first entry only)
69
+ # * "db:entry" -- local bioflat, OBDA, KEGG API
70
+ def ent(arg)
71
+ entry = ""
72
+ db, entry_id = arg.to_s.strip.split(/:/)
73
+ if arg.respond_to?(:gets) or File.exists?(arg)
74
+ entry = flatfile(arg)
75
+ elsif Bio::Shell.find_flat_dir(db)
76
+ entry = flatsearch(db, entry_id)
77
+ elsif obdadbs.include?(db)
78
+ entry = obdaentry(db, entry_id)
79
+ else
80
+ entry = bget(arg)
81
+ end
82
+ return entry
83
+ end
84
+
85
+ end
@@ -0,0 +1,119 @@
1
+ #
2
+ # = bio/shell/plugin/flatfile.rb - plugin for flatfile database
3
+ #
4
+ # Copyright:: Copyright (C) 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: flatfile.rb,v 1.11 2005/11/30 01:57:18 k Exp $
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+
29
+ module Bio::Shell
30
+
31
+ private
32
+
33
+ def flatfile(filename)
34
+ if block_given?
35
+ Bio::FlatFile.auto(filename) do |flat|
36
+ flat.each do |entry|
37
+ yield flat.entry_raw
38
+ end
39
+ end
40
+ else
41
+ entry = ''
42
+ Bio::FlatFile.auto(filename) do |flat|
43
+ flat.next_entry
44
+ entry = flat.entry_raw
45
+ end
46
+ return entry
47
+ end
48
+ end
49
+
50
+ def flatauto(filename)
51
+ if block_given?
52
+ Bio::FlatFile.auto(filename) do |flat|
53
+ flat.each do |entry|
54
+ yield entry
55
+ end
56
+ end
57
+ else
58
+ entry = ''
59
+ Bio::FlatFile.auto(filename) do |flat|
60
+ entry = flat.next_entry
61
+ end
62
+ return entry
63
+ end
64
+ end
65
+
66
+ def flatparse(entry)
67
+ if cls = Bio::FlatFile.autodetect(entry)
68
+ return cls.new(entry)
69
+ end
70
+ end
71
+
72
+ def flatfasta(fastafile, *flatfiles)
73
+ puts "Saving fasta file (#{fastafile}) ... "
74
+ File.open(fastafile, "w") do |fasta|
75
+ flatfiles.each do |flatfile|
76
+ puts " converting -- #{flatfile}"
77
+ Bio::FlatFile.auto(flatfile) do |flat|
78
+ flat.each do |entry|
79
+ header = "#{entry.entry_id} #{entry.definition}"
80
+ fasta.puts entry.seq.to_fasta(header, 50)
81
+ end
82
+ end
83
+ end
84
+ end
85
+ puts "done"
86
+ end
87
+
88
+ def flatindex(dbname, *flatfiles)
89
+ dir = Bio::Shell.create_flat_dir(dbname)
90
+ begin
91
+ print "Creating BioFlat index (#{dir}) ... "
92
+ bdb = format = options = nil
93
+ Bio::FlatFileIndex.makeindex(bdb, dir, format, options, *flatfiles)
94
+ puts "done"
95
+ rescue
96
+ warn "Error: Failed to create index (#{dir}) : #{$!}"
97
+ end
98
+ end
99
+
100
+ def flatsearch(dbname, keyword)
101
+ dir = Bio::Shell.find_flat_dir(dbname)
102
+ unless dir
103
+ warn "Error: Failed to open database (#{dbname})"
104
+ return
105
+ end
106
+ entry = ''
107
+ Bio::FlatFileIndex.open(dir) do |db|
108
+ if results = db.include?(keyword)
109
+ results.each do |entry_id|
110
+ entry << db.search_primary(entry_id).to_s
111
+ end
112
+ else
113
+ warn "Error: No hits found in #{dbname} (#{keyword})"
114
+ end
115
+ end
116
+ return entry
117
+ end
118
+
119
+ end
@@ -0,0 +1,187 @@
1
+ #
2
+ # = bio/shell/plugin/keggapi.rb - plugin for KEGG API
3
+ #
4
+ # Copyright:: Copyright (C) 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: keggapi.rb,v 1.8 2005/12/18 15:47:33 k Exp $
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+
29
+ module Bio::Shell
30
+
31
+ module Private
32
+ def keggapi_definition2tab(list)
33
+ ary = []
34
+ list.each do |entry|
35
+ ary << "#{entry.entry_id}:\t#{entry.definition}"
36
+ end
37
+ return ary
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def keggapi
44
+ @keggapi ||= Bio::KEGG::API.new
45
+ end
46
+
47
+ # DBGET
48
+
49
+ def binfo(db = "all")
50
+ result = keggapi.binfo(db)
51
+ puts result
52
+ return result
53
+ end
54
+
55
+ def bfind(str)
56
+ result = keggapi.bfind(str)
57
+ return result
58
+ end
59
+
60
+ def bget(str)
61
+ result = keggapi.bget(str)
62
+ if block_given?
63
+ yield result
64
+ else
65
+ return result
66
+ end
67
+ end
68
+
69
+ def btit(str)
70
+ result = keggapi.btit(str)
71
+ return result
72
+ end
73
+
74
+ def bconv(str)
75
+ result = keggapi.bconv(str)
76
+ return result
77
+ end
78
+
79
+ # DATABASES
80
+
81
+ def keggdbs
82
+ list = keggapi.list_databases
83
+ result = Bio::Shell.keggapi_definition2tab(list).join("\n")
84
+ puts result
85
+ return list.map {|x| x.entry_id}
86
+ end
87
+
88
+ def keggorgs
89
+ list = keggapi.list_organisms
90
+ result = Bio::Shell.keggapi_definition2tab(list).sort.join("\n")
91
+ puts result
92
+ return list.map {|x| x.entry_id}
93
+ end
94
+
95
+ def keggpathways(org = "map")
96
+ list = keggapi.list_pathways(org)
97
+ result = Bio::Shell.keggapi_definition2tab(list).join("\n")
98
+ puts result
99
+ return list.map {|x| x.entry_id}
100
+ end
101
+
102
+ def kegggenomeseq(org)
103
+ result = ""
104
+ require 'net/ftp'
105
+ Net::FTP.open("ftp.genome.jp", "anonymous") do |ftp|
106
+ path = "/pub/kegg/genomes/#{org}"
107
+ list = ftp.nlst(path)
108
+ file = list.grep(/.*genome$/).shift
109
+ if file
110
+ open("ftp://ftp.genome.jp/#{file}") do |file|
111
+ result = file.read
112
+ end
113
+ end
114
+ end
115
+ return result
116
+ end
117
+
118
+ end
119
+
120
+ =begin
121
+
122
+ == BioRuby extensions
123
+
124
+ --- get_all_best_best_neighbors_by_gene(genes_id)
125
+ --- get_all_best_neighbors_by_gene(genes_id)
126
+ --- get_all_reverse_best_neighbors_by_gene(genes_id)
127
+ --- get_all_paralogs_by_gene(genes_id)
128
+ --- get_all_genes_by_motifs(motif_id_list)
129
+ --- get_all_oc_members_by_gene(genes_id)
130
+ --- get_all_pc_members_by_gene(genes_id)
131
+ --- get_all_genes_by_organism(org)
132
+ --- get_all_linkdb_by_entry(entry_id, db)
133
+ --- save_image(url, filename = nil)
134
+ --- get_entries(ary = [])
135
+ --- get_aaseqs(ary = [])
136
+ --- get_naseqs(ary = [])
137
+ --- get_definitions(ary = [])
138
+
139
+ == Original KEGG API methods
140
+
141
+ --- get_linkdb_by_entry(entry_id, db, start, max_results)
142
+ --- get_best_best_neighbors_by_gene(genes_id, start, max_results)
143
+ --- get_best_neighbors_by_gene(genes_id, start, max_results)
144
+ --- get_reverse_best_neighbors_by_gene(genes_id, start, max_results)
145
+ --- get_paralogs_by_gene(genes_id, start, max_results)
146
+ --- get_motifs_by_gene(genes_id, db)
147
+ --- get_genes_by_motifs(motif_id_list, start, max_results)
148
+ --- get_ko_by_gene(genes_id)
149
+ --- get_ko_by_ko_class(ko_class_id)
150
+ --- get_genes_by_ko_class(ko_class_id, org, start, max_results)
151
+ --- get_genes_by_ko(ko_id, org)
152
+ --- get_oc_members_by_gene(genes_id, start, max_results)
153
+ --- get_pc_members_by_gene(genes_id, start, max_results)
154
+ --- mark_pathway_by_objects(pathway_id, object_id_list)
155
+ --- color_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
156
+ --- get_html_of_marked_pathway_by_objects(pathway_id, object_id_list)
157
+ --- get_html_of_colored_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
158
+ --- get_genes_by_pathway(pathway_id)
159
+ --- get_enzymes_by_pathway(pathway_id)
160
+ --- get_compounds_by_pathway(pathway_id)
161
+ --- get_glycans_by_pathway(pathway_id)
162
+ --- get_reactions_by_pathway(pathway_id)
163
+ --- get_kos_by_pathway(pathway_id)
164
+ --- get_pathways_by_genes(genes_id_list)
165
+ --- get_pathways_by_enzymes(enzyme_id_list)
166
+ --- get_pathways_by_compounds(compound_id_list)
167
+ --- get_pathways_by_glycans(glycan_id_list)
168
+ --- get_pathways_by_reactions(reaction_id_list)
169
+ --- get_pathways_by_kos(ko_id_list, org)
170
+ --- get_linked_pathways(pathway_id)
171
+ --- get_genes_by_enzyme(enzyme_id, org)
172
+ --- get_enzymes_by_gene(genes_id)
173
+ --- get_enzymes_by_compound(compound_id)
174
+ --- get_enzymes_by_glycan(glycan_id)
175
+ --- get_enzymes_by_reaction(reaction_id)
176
+ --- get_compounds_by_enzyme(enzyme_id)
177
+ --- get_compounds_by_reaction(reaction_id)
178
+ --- get_glycans_by_enzyme(enzyme_id)
179
+ --- get_glycans_by_reaction(reaction_id)
180
+ --- get_reactions_by_enzyme(enzyme_id)
181
+ --- get_reactions_by_compound(compound_id)
182
+ --- get_reactions_by_glycan(glycan_id)
183
+ --- get_genes_by_organism(org, start, max_results)
184
+ --- get_number_of_genes_by_organism(org)
185
+ --- convert_mol_to_kcf(mol_text)
186
+
187
+ =end