bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,213 @@
1
+ #
2
+ # bio/db/kegg/glycan.rb - KEGG GLYCAN database class
3
+ #
4
+ # Copyright (C) 2004 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: glycan.rb,v 1.2 2005/09/08 01:22:11 k Exp $
21
+ #
22
+
23
+ require 'bio/db'
24
+
25
+ module Bio
26
+
27
+ class KEGG
28
+
29
+ class GLYCAN < KEGGDB
30
+
31
+ DELIMITER = RS = "\n///\n"
32
+ TAGSIZE = 12
33
+
34
+ def initialize(entry)
35
+ super(entry, TAGSIZE)
36
+ end
37
+
38
+ # ENTRY
39
+ def entry_id
40
+ unless @data['ENTRY']
41
+ @data['ENTRY'] = fetch('ENTRY').split(/\s+/).first
42
+ end
43
+ @data['ENTRY']
44
+ end
45
+
46
+ # NAME
47
+ def name
48
+ field_fetch('NAME')
49
+ end
50
+
51
+ # COMPOSITION
52
+ def composition
53
+ unless @data['COMPOSITION']
54
+ hash = Hash.new(0)
55
+ fetch('COMPOSITION').scan(/\((\S+)\)(\d+)/).each do |key, val|
56
+ hash[key] = val.to_i
57
+ end
58
+ @data['COMPOSITION'] = hash
59
+ end
60
+ @data['COMPOSITION']
61
+ end
62
+
63
+ # MASS
64
+ def mass
65
+ unless @data['MASS']
66
+ hash = Hash.new
67
+ fetch('MASS').scan(/(\S+)\s+\((\S+)\)/).each do |val, key|
68
+ hash[key] = val.to_f
69
+ end
70
+ @data['MASS'] = hash
71
+ end
72
+ @data['MASS']
73
+ end
74
+
75
+ # CLASS
76
+ def keggclass
77
+ field_fetch('CLASS')
78
+ end
79
+
80
+ # BINDING
81
+ def bindings
82
+ unless @data['BINDING']
83
+ ary = Array.new
84
+ lines = lines_fetch('BINDING')
85
+ lines.each do |line|
86
+ if /^\S/.match(line)
87
+ ary << line
88
+ else
89
+ ary.last << " #{line.strip}"
90
+ end
91
+ end
92
+ @data['BINDING'] = ary
93
+ end
94
+ @data['BINDING']
95
+ end
96
+
97
+ # COMPOUND
98
+ def compounds
99
+ unless @data['COMPOUND']
100
+ @data['COMPOUND'] = fetch('COMPOUND').split(/\s+/)
101
+ end
102
+ @data['COMPOUND']
103
+ end
104
+
105
+ # REACTION
106
+ def reactions
107
+ unless @data['REACTION']
108
+ @data['REACTION'] = fetch('REACTION').split(/\s+/)
109
+ end
110
+ @data['REACTION']
111
+ end
112
+
113
+ # PATHWAY
114
+ def pathways
115
+ lines_fetch('PATHWAY')
116
+ end
117
+
118
+ # ENZYME
119
+ def enzymes
120
+ unless @data['ENZYME']
121
+ field = fetch('ENZYME')
122
+ if /\(/.match(field) # old version
123
+ @data['ENZYME'] = field.scan(/\S+ \(\S+\)/)
124
+ else
125
+ @data['ENZYME'] = field.scan(/\S+/)
126
+ end
127
+ end
128
+ @data['ENZYME']
129
+ end
130
+
131
+ # ORTHOLOG
132
+ def orthologs
133
+ unless @data['ORTHOLOG']
134
+ ary = Array.new
135
+ lines = lines_fetch('ORTHOLOG')
136
+ lines.each do |line|
137
+ if /^\S/.match(line)
138
+ ary << line
139
+ else
140
+ ary.last << " #{line.strip}"
141
+ end
142
+ end
143
+ @data['ORTHOLOG'] = ary
144
+ end
145
+ @data['ORTHOLOG']
146
+ end
147
+
148
+ # REFERENCE
149
+ def references
150
+ unless @data['REFERENCE']
151
+ ary = Array.new
152
+ lines = lines_fetch('REFERENCE')
153
+ lines.each do |line|
154
+ if /^\d+\s+\[PMID/.match(line)
155
+ ary << line
156
+ else
157
+ ary.last << " #{line.strip}"
158
+ end
159
+ end
160
+ @data['REFERENCE'] = ary
161
+ end
162
+ @data['REFERENCE']
163
+ end
164
+
165
+ # DBLINKS
166
+ def dblinks
167
+ unless @data['DBLINKS']
168
+ ary = Array.new
169
+ lines = lines_fetch('DBLINKS')
170
+ lines.each do |line|
171
+ if /^\S/.match(line)
172
+ ary << line
173
+ else
174
+ ary.last << " #{line.strip}"
175
+ end
176
+ end
177
+ @data['DBLINKS'] = ary
178
+ end
179
+ @data['DBLINKS']
180
+ end
181
+
182
+ # ATOM, BOND
183
+ def kcf
184
+ return "#{get('NODE')}#{get('EDGE')}"
185
+ end
186
+
187
+ end
188
+
189
+ end
190
+
191
+ end
192
+
193
+
194
+ if __FILE__ == $0
195
+ entry = ARGF.read # gl:G00024
196
+ gl = Bio::KEGG::GLYCAN.new(entry)
197
+ p gl.entry_id
198
+ p gl.name
199
+ p gl.composition
200
+ p gl.mass
201
+ p gl.keggclass
202
+ p gl.bindings
203
+ p gl.compounds
204
+ p gl.reactions
205
+ p gl.pathways
206
+ p gl.enzymes
207
+ p gl.orthologs
208
+ p gl.references
209
+ p gl.dblinks
210
+ p gl.kcf
211
+ end
212
+
213
+
@@ -0,0 +1,418 @@
1
+ #
2
+ # bio/db/kegg/keggtab.rb - KEGG keggtab class
3
+ #
4
+ # Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # Copyright (C) 2003 KATAYAMA Toshiaki <k@bioruby.org>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: keggtab.rb,v 1.7 2005/09/26 13:00:07 k Exp $
22
+ #
23
+
24
+ module Bio
25
+ class KEGG
26
+
27
+ class Keggtab
28
+
29
+ def initialize(file_path, bioroot = nil)
30
+ @bioroot = ENV['BIOROOT'] || bioroot
31
+ @db_names = Hash.new
32
+ @database = Hash.new
33
+ @taxonomy = Hash.new
34
+ parse_keggtab(File.open(file_path).read)
35
+ end
36
+ attr_reader :bioroot, :db_names
37
+
38
+
39
+ # Bio::KEGG::Keggtab::DB
40
+
41
+ class DB
42
+ def initialize(db_name, db_type, db_path, db_abbrev)
43
+ @name = db_name
44
+ @type = db_type
45
+ @path = db_path
46
+ @abbrev = db_abbrev
47
+ @aliases = Array.new
48
+ end
49
+ attr_reader :name, :type, :path, :abbrev, :aliases
50
+ alias korg abbrev
51
+ alias keggorg abbrev
52
+ end
53
+
54
+
55
+ # DB section
56
+
57
+ def database(db_abbrev = nil)
58
+ if db_abbrev
59
+ @database[db_abbrev]
60
+ else
61
+ @database
62
+ end
63
+ end
64
+
65
+ def aliases(db_abbrev)
66
+ if @database[db_abbrev]
67
+ @database[db_abbrev].aliases
68
+ end
69
+ end
70
+
71
+ def name(db_abbrev)
72
+ if @database[db_abbrev]
73
+ @database[db_abbrev].name
74
+ end
75
+ end
76
+
77
+ def path(db_abbrev)
78
+ if @database[db_abbrev]
79
+ file = @database[db_abbrev].name
80
+ if @bioroot
81
+ "#{@database[db_abbrev].path.sub(/\$BIOROOT/,@bioroot)}/#{file}"
82
+ else
83
+ "#{@database[db_abbrev].path}/#{file}"
84
+ end
85
+ end
86
+ end
87
+
88
+
89
+ def alias_list(db_name)
90
+ if @db_names[db_name]
91
+ @db_names[db_name].aliases
92
+ end
93
+ end
94
+
95
+ def db_path(db_name)
96
+ if @bioroot
97
+ "#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}"
98
+ else
99
+ "#{@db_names[db_name].path}/#{db_name}"
100
+ end
101
+ end
102
+
103
+ def db_by_abbrev(db_abbrev)
104
+ @db_names.each do |k, db|
105
+ return db if db.abbrev == db_abbrev
106
+ end
107
+ return nil
108
+ end
109
+
110
+ def name_by_abbrev(db_abbrev)
111
+ db_by_abbrev(db_abbrev).name
112
+ end
113
+
114
+ def db_path_by_abbrev(db_abbrev)
115
+ db_name = name_by_abbrev(db_abbrev)
116
+ db_path(db_name)
117
+ end
118
+
119
+
120
+ # Taxonomy section
121
+
122
+ def taxonomy(node = nil)
123
+ if node
124
+ @taxonomy[node]
125
+ else
126
+ @taxonomy
127
+ end
128
+ end
129
+
130
+ def taxa_list
131
+ @taxonomy.keys.sort
132
+ end
133
+
134
+ def child_nodes(node = 'genes')
135
+ return @taxonomy[node]
136
+ end
137
+
138
+ def taxo2korgs(node = 'genes')
139
+ if node.length == 3
140
+ return node
141
+ else
142
+ if @taxonomy[node]
143
+ tmp = Array.new
144
+ @taxonomy[node].each do |x|
145
+ tmp.push(taxo2korgs(x))
146
+ end
147
+ return tmp
148
+ else
149
+ return nil
150
+ end
151
+ end
152
+ end
153
+ alias taxo2keggorgs taxo2korgs
154
+ alias taxon2korgs taxo2korgs
155
+ alias taxon2keggorgs taxo2korgs
156
+
157
+ def korg2taxo(keggorg)
158
+ tmp = Array.new
159
+ traverse = Proc.new {|keggorg|
160
+ @taxonomy.each do |k,v|
161
+ if v.include?(keggorg)
162
+ tmp.push(k)
163
+ traverse.call(k)
164
+ break
165
+ end
166
+ end
167
+ }
168
+ traverse.call(keggorg)
169
+ return tmp
170
+ end
171
+ alias keggorg2taxo korg2taxo
172
+ alias korg2taxonomy korg2taxo
173
+ alias keggorg2taxonomy korg2taxo
174
+
175
+
176
+ private
177
+
178
+ def parse_keggtab(keggtab)
179
+ in_taxonomy = nil
180
+ keggtab.each do |line|
181
+ case line
182
+ when /^# Taxonomy/ # beginning of the taxonomy section
183
+ in_taxonomy = true
184
+ when /^#|^$/
185
+ next
186
+ when /(^\w\S+)\s+(\w+)\s+(\$\S+)\s+(\w+)/ # db
187
+ db_name = $1
188
+ db_type = $2
189
+ db_path = $3
190
+ db_abbrev = $4
191
+ @db_names[db_name] =
192
+ Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev)
193
+ when /(^\w\S+)\s+alias\s+(\w.+\w)/ # alias
194
+ db_alias = $1
195
+ db_name = $2#.downcase
196
+ if in_taxonomy
197
+ @taxonomy.update(db_alias => db_name.split('+'))
198
+ elsif @db_names[db_name]
199
+ @db_names[db_name].aliases.push(db_alias)
200
+ end
201
+ end
202
+ end
203
+ # convert keys-by-names hash @db_names to keys-by-abbrev hash @database
204
+ @db_names.each do |k,v|
205
+ @database[v.abbrev] = v
206
+ end
207
+ end
208
+
209
+ end
210
+
211
+ end
212
+ end
213
+
214
+
215
+
216
+ if __FILE__ == $0
217
+
218
+ begin
219
+ require 'pp'
220
+ alias p pp
221
+ rescue LoadError
222
+ end
223
+
224
+ if ARGV.empty?
225
+ prefix = ENV['BIOROOT'] || '/bio'
226
+ keggtab_file = "#{prefix}/etc/keggtab"
227
+ else
228
+ keggtab_file = ARGV.shift
229
+ end
230
+
231
+ puts "= Initialize: keggtab = Bio::KEGG::Keggtab.new(file)"
232
+ keggtab = Bio::KEGG::Keggtab.new(keggtab_file)
233
+
234
+
235
+ puts "\n--- Bio::KEGG::Keggtab#bioroot # -> String"
236
+ p keggtab.bioroot
237
+
238
+
239
+ puts "\n== Methods for DB section"
240
+
241
+ puts "\n--- Bio::KEGG::Keggtab#database # -> Hash"
242
+ p keggtab.database
243
+
244
+ puts "\n--- Bio::KEGG::Keggtab#database('eco') # -> Keggtab::DB"
245
+ p keggtab.database('eco')
246
+
247
+ puts "\n--- Bio::KEGG::Keggtab#name('eco') # -> String"
248
+ p keggtab.name('eco')
249
+
250
+ puts "\n--- Bio::KEGG::Keggtab#path('eco') # -> String"
251
+ p keggtab.path('eco')
252
+
253
+ puts "\n--- Bio::KEGG::Keggtab#aliases(abbrev) # -> Array"
254
+ puts "\n++ keggtab.aliases('eco')"
255
+ p keggtab.aliases('eco')
256
+ puts "\n++ keggtab.aliases('vg')"
257
+ p keggtab.aliases('vg')
258
+
259
+
260
+ puts "\n== Methods for Taxonomy section"
261
+
262
+ puts "\n--- Bio::KEGG::Keggtab#taxonomy # -> Hash"
263
+ p keggtab.taxonomy
264
+
265
+ puts "\n--- Bio::KEGG::Keggtab#taxonomy('archaea') # -> Hash"
266
+ p keggtab.taxonomy('archaea')
267
+
268
+ puts "\n--- Bio::KEGG::Keggtab#taxa_list # -> Array"
269
+ p keggtab.taxa_list
270
+
271
+ puts "\n--- Bio::KEGG::Keggtab#taxo2korgs(node) # -> Array"
272
+ puts "\n++ keggtab.taxo2korgs('proteobeta')"
273
+ p keggtab.taxo2korgs('proteobeta')
274
+ puts "\n++ keggtab.taxo2korgs('eubacteria')"
275
+ p keggtab.taxo2korgs('eubacteria')
276
+ puts "\n++ keggtab.taxo2korgs('archaea')"
277
+ p keggtab.taxo2korgs('archaea')
278
+ puts "\n++ keggtab.taxo2korgs('eukaryotes')"
279
+ p keggtab.taxo2korgs('eukaryotes')
280
+
281
+ puts "\n--- Bio::KEGG::Keggtab#korg2taxo(keggorg) # -> Array"
282
+ puts "\n++ keggtab.korg2taxo('eco')"
283
+ p keggtab.korg2taxo('eco')
284
+ puts "\n++ keggtab.korg2taxo('plants')"
285
+ p keggtab.korg2taxo('plants')
286
+
287
+ end
288
+
289
+
290
+
291
+ =begin
292
+
293
+ The keggtab file is included in
294
+
295
+ * ((URL:ftp://ftp.genome.jp/pub/kegg/tarfiles/genes.weekly.last.tar.Z>))
296
+
297
+ File format is something like
298
+
299
+ # KEGGTAB
300
+ #
301
+ # name type directory abbreviation
302
+ #
303
+ enzyme enzyme $BIOROOT/db/ideas/ligand ec
304
+ ec alias enzyme
305
+ (snip)
306
+ # Human
307
+ h.sapiens genes $BIOROOT/db/kegg/genes hsa
308
+ H.sapiens alias h.sapiens
309
+ hsa alias h.sapiens
310
+ (snip)
311
+ #
312
+ # Taxonomy
313
+ #
314
+ (snip)
315
+ animals alias hsa+mmu+rno+dre+dme+cel
316
+ eukaryotes alias animals+plants+protists+fungi
317
+ genes alias eubacteria+archaea+eukaryotes
318
+
319
+ = Bio::KEGG::Keggtab
320
+
321
+ --- Bio::KEGG::Keggtab.new(file_path, bioroot = nil)
322
+
323
+ Path for keggtab file and optionally set bioroot top directory.
324
+ Environmental variable BIOROOT overrides bioroot.
325
+
326
+ --- Bio::KEGG::Keggtab#database -> Hash
327
+
328
+ Returns a hash containing DB definition section of the keggtab file.
329
+
330
+ --- Bio::KEGG::Keggtab#database(db_abbrev) -> Keggtab::DB
331
+
332
+ Returns a Keggtab::DB object.
333
+
334
+ --- Bio::KEGG::Keggtab#taxonomy -> Hash
335
+
336
+ Returns a hash containing Taxonomy section of the keggtab file.
337
+
338
+ --- Bio::KEGG::Keggtab#taxonomy(node) -> Array
339
+
340
+ Returns a List of all child nodes belongs to the label node.
341
+ (e.g. "eukaryotes" -> ["animals", "plants", "protists", "fungi"], ...)
342
+
343
+ --- Bio::KEGG::Keggtab#bioroot -> String
344
+
345
+ Returns a string of the BIOROOT path prefix.
346
+
347
+ --- Bio::KEGG::Keggtab#name(db_abbrev) -> String
348
+
349
+ Returns a canonical database name for the abbreviation.
350
+ (e.g. 'ec' -> 'enzyme', 'hsa' -> 'h.sapies', ...)
351
+
352
+ --- Bio::KEGG::Keggtab#aliases(db_abbrev) -> Array
353
+
354
+ Returns an Array containing all alias names for the database.
355
+ (e.g. 'hsa' -> ["H.sapiens", "hsa"], 'hpj' -> ["H.pylori_J99", "hpj"])
356
+
357
+ --- Bio::KEGG::Keggtab#path(db_abbrev) -> String
358
+
359
+ Returns an absolute path for the flat file database.
360
+ (e.g. '/bio/db/kegg/genes', ...)
361
+
362
+ --- Bio::KEGG::Keggtab#taxa_list -> Array
363
+
364
+ List of all node labels from Taxonomy section.
365
+ (e.g. ["actinobacteria", "animals", "archaea", "bacillales", ...)
366
+
367
+ --- Bio::KEGG::Keggtab#taxo2korgs(taxon) -> Array
368
+
369
+ Returns an array of organism names included in the specified taxon
370
+ label. (e.g. 'proteobeta' -> ["nme", "nma", "rso"])
371
+ This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases.
372
+
373
+ --- Bio::KEGG::Keggtab#korg2taxo(keggorg) -> Array
374
+
375
+ Returns an array of taxonomy names the organism belongs.
376
+ (e.g. 'eco' -> ['proteogamma','proteobacteria','eubacteria','genes'])
377
+ This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy.
378
+
379
+ * following methods are deprecated
380
+
381
+ --- Bio::KEGG::Keggtab#db_names[db_name] -> Keggtab::DB
382
+ --- Bio::KEGG::Keggtab#db_by_abbrev(db_abbrev) -> Keggtab::DB
383
+ --- Bio::KEGG::Keggtab#alias_list(db_name) -> Array
384
+ --- Bio::KEGG::Keggtab#name_by_abbrev(db_abbrev) -> String
385
+ --- Bio::KEGG::Keggtab#db_path(db_name) -> String
386
+ --- Bio::KEGG::Keggtab#db_path_by_abbrev(keggorg) -> String
387
+
388
+
389
+ == Bio::KEGG::Keggtab::DB
390
+
391
+ --- Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev)
392
+
393
+ Create a container object for database definitions.
394
+
395
+ --- Bio::KEGG::Keggtab::DB#name -> String
396
+
397
+ Database name. (e.g. 'enzyme', 'h.sapies', 'e.coli', ...)
398
+
399
+ --- Bio::KEGG::Keggtab::DB#type -> String
400
+
401
+ Definition type. (e.g. 'enzyme', 'alias', 'genes', ...)
402
+
403
+ --- Bio::KEGG::Keggtab::DB#path -> String
404
+
405
+ Database flat file path. (e.g. '$BIOROOT/db/kegg/genes', ...)
406
+
407
+ --- Bio::KEGG::Keggtab::DB#abbrev -> String
408
+
409
+ Short name for the database. (e.g. 'ec', 'hsa', 'eco', ...)
410
+ korg and keggorg are alias for abbrev method.
411
+
412
+ --- Bio::KEGG::Keggtab::DB#aliases -> Array
413
+
414
+ Array containing all alias names for the database.
415
+ (e.g. ["H.sapiens", "hsa"], ["E.coli", "eco"], ...)
416
+
417
+ =end
418
+