bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/kegg/glycan.rb - KEGG GLYCAN database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2004 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: glycan.rb,v 1.2 2005/09/08 01:22:11 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class KEGG
|
|
28
|
+
|
|
29
|
+
class GLYCAN < KEGGDB
|
|
30
|
+
|
|
31
|
+
DELIMITER = RS = "\n///\n"
|
|
32
|
+
TAGSIZE = 12
|
|
33
|
+
|
|
34
|
+
def initialize(entry)
|
|
35
|
+
super(entry, TAGSIZE)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# ENTRY
|
|
39
|
+
def entry_id
|
|
40
|
+
unless @data['ENTRY']
|
|
41
|
+
@data['ENTRY'] = fetch('ENTRY').split(/\s+/).first
|
|
42
|
+
end
|
|
43
|
+
@data['ENTRY']
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# NAME
|
|
47
|
+
def name
|
|
48
|
+
field_fetch('NAME')
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# COMPOSITION
|
|
52
|
+
def composition
|
|
53
|
+
unless @data['COMPOSITION']
|
|
54
|
+
hash = Hash.new(0)
|
|
55
|
+
fetch('COMPOSITION').scan(/\((\S+)\)(\d+)/).each do |key, val|
|
|
56
|
+
hash[key] = val.to_i
|
|
57
|
+
end
|
|
58
|
+
@data['COMPOSITION'] = hash
|
|
59
|
+
end
|
|
60
|
+
@data['COMPOSITION']
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# MASS
|
|
64
|
+
def mass
|
|
65
|
+
unless @data['MASS']
|
|
66
|
+
hash = Hash.new
|
|
67
|
+
fetch('MASS').scan(/(\S+)\s+\((\S+)\)/).each do |val, key|
|
|
68
|
+
hash[key] = val.to_f
|
|
69
|
+
end
|
|
70
|
+
@data['MASS'] = hash
|
|
71
|
+
end
|
|
72
|
+
@data['MASS']
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# CLASS
|
|
76
|
+
def keggclass
|
|
77
|
+
field_fetch('CLASS')
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# BINDING
|
|
81
|
+
def bindings
|
|
82
|
+
unless @data['BINDING']
|
|
83
|
+
ary = Array.new
|
|
84
|
+
lines = lines_fetch('BINDING')
|
|
85
|
+
lines.each do |line|
|
|
86
|
+
if /^\S/.match(line)
|
|
87
|
+
ary << line
|
|
88
|
+
else
|
|
89
|
+
ary.last << " #{line.strip}"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
@data['BINDING'] = ary
|
|
93
|
+
end
|
|
94
|
+
@data['BINDING']
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# COMPOUND
|
|
98
|
+
def compounds
|
|
99
|
+
unless @data['COMPOUND']
|
|
100
|
+
@data['COMPOUND'] = fetch('COMPOUND').split(/\s+/)
|
|
101
|
+
end
|
|
102
|
+
@data['COMPOUND']
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# REACTION
|
|
106
|
+
def reactions
|
|
107
|
+
unless @data['REACTION']
|
|
108
|
+
@data['REACTION'] = fetch('REACTION').split(/\s+/)
|
|
109
|
+
end
|
|
110
|
+
@data['REACTION']
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# PATHWAY
|
|
114
|
+
def pathways
|
|
115
|
+
lines_fetch('PATHWAY')
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# ENZYME
|
|
119
|
+
def enzymes
|
|
120
|
+
unless @data['ENZYME']
|
|
121
|
+
field = fetch('ENZYME')
|
|
122
|
+
if /\(/.match(field) # old version
|
|
123
|
+
@data['ENZYME'] = field.scan(/\S+ \(\S+\)/)
|
|
124
|
+
else
|
|
125
|
+
@data['ENZYME'] = field.scan(/\S+/)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
@data['ENZYME']
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# ORTHOLOG
|
|
132
|
+
def orthologs
|
|
133
|
+
unless @data['ORTHOLOG']
|
|
134
|
+
ary = Array.new
|
|
135
|
+
lines = lines_fetch('ORTHOLOG')
|
|
136
|
+
lines.each do |line|
|
|
137
|
+
if /^\S/.match(line)
|
|
138
|
+
ary << line
|
|
139
|
+
else
|
|
140
|
+
ary.last << " #{line.strip}"
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
@data['ORTHOLOG'] = ary
|
|
144
|
+
end
|
|
145
|
+
@data['ORTHOLOG']
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# REFERENCE
|
|
149
|
+
def references
|
|
150
|
+
unless @data['REFERENCE']
|
|
151
|
+
ary = Array.new
|
|
152
|
+
lines = lines_fetch('REFERENCE')
|
|
153
|
+
lines.each do |line|
|
|
154
|
+
if /^\d+\s+\[PMID/.match(line)
|
|
155
|
+
ary << line
|
|
156
|
+
else
|
|
157
|
+
ary.last << " #{line.strip}"
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
@data['REFERENCE'] = ary
|
|
161
|
+
end
|
|
162
|
+
@data['REFERENCE']
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# DBLINKS
|
|
166
|
+
def dblinks
|
|
167
|
+
unless @data['DBLINKS']
|
|
168
|
+
ary = Array.new
|
|
169
|
+
lines = lines_fetch('DBLINKS')
|
|
170
|
+
lines.each do |line|
|
|
171
|
+
if /^\S/.match(line)
|
|
172
|
+
ary << line
|
|
173
|
+
else
|
|
174
|
+
ary.last << " #{line.strip}"
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
@data['DBLINKS'] = ary
|
|
178
|
+
end
|
|
179
|
+
@data['DBLINKS']
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# ATOM, BOND
|
|
183
|
+
def kcf
|
|
184
|
+
return "#{get('NODE')}#{get('EDGE')}"
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
if __FILE__ == $0
|
|
195
|
+
entry = ARGF.read # gl:G00024
|
|
196
|
+
gl = Bio::KEGG::GLYCAN.new(entry)
|
|
197
|
+
p gl.entry_id
|
|
198
|
+
p gl.name
|
|
199
|
+
p gl.composition
|
|
200
|
+
p gl.mass
|
|
201
|
+
p gl.keggclass
|
|
202
|
+
p gl.bindings
|
|
203
|
+
p gl.compounds
|
|
204
|
+
p gl.reactions
|
|
205
|
+
p gl.pathways
|
|
206
|
+
p gl.enzymes
|
|
207
|
+
p gl.orthologs
|
|
208
|
+
p gl.references
|
|
209
|
+
p gl.dblinks
|
|
210
|
+
p gl.kcf
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
|
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/kegg/keggtab.rb - KEGG keggtab class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
|
|
5
|
+
# Copyright (C) 2003 KATAYAMA Toshiaki <k@bioruby.org>
|
|
6
|
+
#
|
|
7
|
+
# This library is free software; you can redistribute it and/or
|
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
9
|
+
# License as published by the Free Software Foundation; either
|
|
10
|
+
# version 2 of the License, or (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15
|
+
# Lesser General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
18
|
+
# License along with this library; if not, write to the Free Software
|
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
|
+
#
|
|
21
|
+
# $Id: keggtab.rb,v 1.7 2005/09/26 13:00:07 k Exp $
|
|
22
|
+
#
|
|
23
|
+
|
|
24
|
+
module Bio
|
|
25
|
+
class KEGG
|
|
26
|
+
|
|
27
|
+
class Keggtab
|
|
28
|
+
|
|
29
|
+
def initialize(file_path, bioroot = nil)
|
|
30
|
+
@bioroot = ENV['BIOROOT'] || bioroot
|
|
31
|
+
@db_names = Hash.new
|
|
32
|
+
@database = Hash.new
|
|
33
|
+
@taxonomy = Hash.new
|
|
34
|
+
parse_keggtab(File.open(file_path).read)
|
|
35
|
+
end
|
|
36
|
+
attr_reader :bioroot, :db_names
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Bio::KEGG::Keggtab::DB
|
|
40
|
+
|
|
41
|
+
class DB
|
|
42
|
+
def initialize(db_name, db_type, db_path, db_abbrev)
|
|
43
|
+
@name = db_name
|
|
44
|
+
@type = db_type
|
|
45
|
+
@path = db_path
|
|
46
|
+
@abbrev = db_abbrev
|
|
47
|
+
@aliases = Array.new
|
|
48
|
+
end
|
|
49
|
+
attr_reader :name, :type, :path, :abbrev, :aliases
|
|
50
|
+
alias korg abbrev
|
|
51
|
+
alias keggorg abbrev
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# DB section
|
|
56
|
+
|
|
57
|
+
def database(db_abbrev = nil)
|
|
58
|
+
if db_abbrev
|
|
59
|
+
@database[db_abbrev]
|
|
60
|
+
else
|
|
61
|
+
@database
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def aliases(db_abbrev)
|
|
66
|
+
if @database[db_abbrev]
|
|
67
|
+
@database[db_abbrev].aliases
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def name(db_abbrev)
|
|
72
|
+
if @database[db_abbrev]
|
|
73
|
+
@database[db_abbrev].name
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def path(db_abbrev)
|
|
78
|
+
if @database[db_abbrev]
|
|
79
|
+
file = @database[db_abbrev].name
|
|
80
|
+
if @bioroot
|
|
81
|
+
"#{@database[db_abbrev].path.sub(/\$BIOROOT/,@bioroot)}/#{file}"
|
|
82
|
+
else
|
|
83
|
+
"#{@database[db_abbrev].path}/#{file}"
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def alias_list(db_name)
|
|
90
|
+
if @db_names[db_name]
|
|
91
|
+
@db_names[db_name].aliases
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def db_path(db_name)
|
|
96
|
+
if @bioroot
|
|
97
|
+
"#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}"
|
|
98
|
+
else
|
|
99
|
+
"#{@db_names[db_name].path}/#{db_name}"
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def db_by_abbrev(db_abbrev)
|
|
104
|
+
@db_names.each do |k, db|
|
|
105
|
+
return db if db.abbrev == db_abbrev
|
|
106
|
+
end
|
|
107
|
+
return nil
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def name_by_abbrev(db_abbrev)
|
|
111
|
+
db_by_abbrev(db_abbrev).name
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def db_path_by_abbrev(db_abbrev)
|
|
115
|
+
db_name = name_by_abbrev(db_abbrev)
|
|
116
|
+
db_path(db_name)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# Taxonomy section
|
|
121
|
+
|
|
122
|
+
def taxonomy(node = nil)
|
|
123
|
+
if node
|
|
124
|
+
@taxonomy[node]
|
|
125
|
+
else
|
|
126
|
+
@taxonomy
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def taxa_list
|
|
131
|
+
@taxonomy.keys.sort
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def child_nodes(node = 'genes')
|
|
135
|
+
return @taxonomy[node]
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def taxo2korgs(node = 'genes')
|
|
139
|
+
if node.length == 3
|
|
140
|
+
return node
|
|
141
|
+
else
|
|
142
|
+
if @taxonomy[node]
|
|
143
|
+
tmp = Array.new
|
|
144
|
+
@taxonomy[node].each do |x|
|
|
145
|
+
tmp.push(taxo2korgs(x))
|
|
146
|
+
end
|
|
147
|
+
return tmp
|
|
148
|
+
else
|
|
149
|
+
return nil
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
alias taxo2keggorgs taxo2korgs
|
|
154
|
+
alias taxon2korgs taxo2korgs
|
|
155
|
+
alias taxon2keggorgs taxo2korgs
|
|
156
|
+
|
|
157
|
+
def korg2taxo(keggorg)
|
|
158
|
+
tmp = Array.new
|
|
159
|
+
traverse = Proc.new {|keggorg|
|
|
160
|
+
@taxonomy.each do |k,v|
|
|
161
|
+
if v.include?(keggorg)
|
|
162
|
+
tmp.push(k)
|
|
163
|
+
traverse.call(k)
|
|
164
|
+
break
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
}
|
|
168
|
+
traverse.call(keggorg)
|
|
169
|
+
return tmp
|
|
170
|
+
end
|
|
171
|
+
alias keggorg2taxo korg2taxo
|
|
172
|
+
alias korg2taxonomy korg2taxo
|
|
173
|
+
alias keggorg2taxonomy korg2taxo
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
private
|
|
177
|
+
|
|
178
|
+
def parse_keggtab(keggtab)
|
|
179
|
+
in_taxonomy = nil
|
|
180
|
+
keggtab.each do |line|
|
|
181
|
+
case line
|
|
182
|
+
when /^# Taxonomy/ # beginning of the taxonomy section
|
|
183
|
+
in_taxonomy = true
|
|
184
|
+
when /^#|^$/
|
|
185
|
+
next
|
|
186
|
+
when /(^\w\S+)\s+(\w+)\s+(\$\S+)\s+(\w+)/ # db
|
|
187
|
+
db_name = $1
|
|
188
|
+
db_type = $2
|
|
189
|
+
db_path = $3
|
|
190
|
+
db_abbrev = $4
|
|
191
|
+
@db_names[db_name] =
|
|
192
|
+
Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev)
|
|
193
|
+
when /(^\w\S+)\s+alias\s+(\w.+\w)/ # alias
|
|
194
|
+
db_alias = $1
|
|
195
|
+
db_name = $2#.downcase
|
|
196
|
+
if in_taxonomy
|
|
197
|
+
@taxonomy.update(db_alias => db_name.split('+'))
|
|
198
|
+
elsif @db_names[db_name]
|
|
199
|
+
@db_names[db_name].aliases.push(db_alias)
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
# convert keys-by-names hash @db_names to keys-by-abbrev hash @database
|
|
204
|
+
@db_names.each do |k,v|
|
|
205
|
+
@database[v.abbrev] = v
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
if __FILE__ == $0
|
|
217
|
+
|
|
218
|
+
begin
|
|
219
|
+
require 'pp'
|
|
220
|
+
alias p pp
|
|
221
|
+
rescue LoadError
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
if ARGV.empty?
|
|
225
|
+
prefix = ENV['BIOROOT'] || '/bio'
|
|
226
|
+
keggtab_file = "#{prefix}/etc/keggtab"
|
|
227
|
+
else
|
|
228
|
+
keggtab_file = ARGV.shift
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
puts "= Initialize: keggtab = Bio::KEGG::Keggtab.new(file)"
|
|
232
|
+
keggtab = Bio::KEGG::Keggtab.new(keggtab_file)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
puts "\n--- Bio::KEGG::Keggtab#bioroot # -> String"
|
|
236
|
+
p keggtab.bioroot
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
puts "\n== Methods for DB section"
|
|
240
|
+
|
|
241
|
+
puts "\n--- Bio::KEGG::Keggtab#database # -> Hash"
|
|
242
|
+
p keggtab.database
|
|
243
|
+
|
|
244
|
+
puts "\n--- Bio::KEGG::Keggtab#database('eco') # -> Keggtab::DB"
|
|
245
|
+
p keggtab.database('eco')
|
|
246
|
+
|
|
247
|
+
puts "\n--- Bio::KEGG::Keggtab#name('eco') # -> String"
|
|
248
|
+
p keggtab.name('eco')
|
|
249
|
+
|
|
250
|
+
puts "\n--- Bio::KEGG::Keggtab#path('eco') # -> String"
|
|
251
|
+
p keggtab.path('eco')
|
|
252
|
+
|
|
253
|
+
puts "\n--- Bio::KEGG::Keggtab#aliases(abbrev) # -> Array"
|
|
254
|
+
puts "\n++ keggtab.aliases('eco')"
|
|
255
|
+
p keggtab.aliases('eco')
|
|
256
|
+
puts "\n++ keggtab.aliases('vg')"
|
|
257
|
+
p keggtab.aliases('vg')
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
puts "\n== Methods for Taxonomy section"
|
|
261
|
+
|
|
262
|
+
puts "\n--- Bio::KEGG::Keggtab#taxonomy # -> Hash"
|
|
263
|
+
p keggtab.taxonomy
|
|
264
|
+
|
|
265
|
+
puts "\n--- Bio::KEGG::Keggtab#taxonomy('archaea') # -> Hash"
|
|
266
|
+
p keggtab.taxonomy('archaea')
|
|
267
|
+
|
|
268
|
+
puts "\n--- Bio::KEGG::Keggtab#taxa_list # -> Array"
|
|
269
|
+
p keggtab.taxa_list
|
|
270
|
+
|
|
271
|
+
puts "\n--- Bio::KEGG::Keggtab#taxo2korgs(node) # -> Array"
|
|
272
|
+
puts "\n++ keggtab.taxo2korgs('proteobeta')"
|
|
273
|
+
p keggtab.taxo2korgs('proteobeta')
|
|
274
|
+
puts "\n++ keggtab.taxo2korgs('eubacteria')"
|
|
275
|
+
p keggtab.taxo2korgs('eubacteria')
|
|
276
|
+
puts "\n++ keggtab.taxo2korgs('archaea')"
|
|
277
|
+
p keggtab.taxo2korgs('archaea')
|
|
278
|
+
puts "\n++ keggtab.taxo2korgs('eukaryotes')"
|
|
279
|
+
p keggtab.taxo2korgs('eukaryotes')
|
|
280
|
+
|
|
281
|
+
puts "\n--- Bio::KEGG::Keggtab#korg2taxo(keggorg) # -> Array"
|
|
282
|
+
puts "\n++ keggtab.korg2taxo('eco')"
|
|
283
|
+
p keggtab.korg2taxo('eco')
|
|
284
|
+
puts "\n++ keggtab.korg2taxo('plants')"
|
|
285
|
+
p keggtab.korg2taxo('plants')
|
|
286
|
+
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
=begin
|
|
292
|
+
|
|
293
|
+
The keggtab file is included in
|
|
294
|
+
|
|
295
|
+
* ((URL:ftp://ftp.genome.jp/pub/kegg/tarfiles/genes.weekly.last.tar.Z>))
|
|
296
|
+
|
|
297
|
+
File format is something like
|
|
298
|
+
|
|
299
|
+
# KEGGTAB
|
|
300
|
+
#
|
|
301
|
+
# name type directory abbreviation
|
|
302
|
+
#
|
|
303
|
+
enzyme enzyme $BIOROOT/db/ideas/ligand ec
|
|
304
|
+
ec alias enzyme
|
|
305
|
+
(snip)
|
|
306
|
+
# Human
|
|
307
|
+
h.sapiens genes $BIOROOT/db/kegg/genes hsa
|
|
308
|
+
H.sapiens alias h.sapiens
|
|
309
|
+
hsa alias h.sapiens
|
|
310
|
+
(snip)
|
|
311
|
+
#
|
|
312
|
+
# Taxonomy
|
|
313
|
+
#
|
|
314
|
+
(snip)
|
|
315
|
+
animals alias hsa+mmu+rno+dre+dme+cel
|
|
316
|
+
eukaryotes alias animals+plants+protists+fungi
|
|
317
|
+
genes alias eubacteria+archaea+eukaryotes
|
|
318
|
+
|
|
319
|
+
= Bio::KEGG::Keggtab
|
|
320
|
+
|
|
321
|
+
--- Bio::KEGG::Keggtab.new(file_path, bioroot = nil)
|
|
322
|
+
|
|
323
|
+
Path for keggtab file and optionally set bioroot top directory.
|
|
324
|
+
Environmental variable BIOROOT overrides bioroot.
|
|
325
|
+
|
|
326
|
+
--- Bio::KEGG::Keggtab#database -> Hash
|
|
327
|
+
|
|
328
|
+
Returns a hash containing DB definition section of the keggtab file.
|
|
329
|
+
|
|
330
|
+
--- Bio::KEGG::Keggtab#database(db_abbrev) -> Keggtab::DB
|
|
331
|
+
|
|
332
|
+
Returns a Keggtab::DB object.
|
|
333
|
+
|
|
334
|
+
--- Bio::KEGG::Keggtab#taxonomy -> Hash
|
|
335
|
+
|
|
336
|
+
Returns a hash containing Taxonomy section of the keggtab file.
|
|
337
|
+
|
|
338
|
+
--- Bio::KEGG::Keggtab#taxonomy(node) -> Array
|
|
339
|
+
|
|
340
|
+
Returns a List of all child nodes belongs to the label node.
|
|
341
|
+
(e.g. "eukaryotes" -> ["animals", "plants", "protists", "fungi"], ...)
|
|
342
|
+
|
|
343
|
+
--- Bio::KEGG::Keggtab#bioroot -> String
|
|
344
|
+
|
|
345
|
+
Returns a string of the BIOROOT path prefix.
|
|
346
|
+
|
|
347
|
+
--- Bio::KEGG::Keggtab#name(db_abbrev) -> String
|
|
348
|
+
|
|
349
|
+
Returns a canonical database name for the abbreviation.
|
|
350
|
+
(e.g. 'ec' -> 'enzyme', 'hsa' -> 'h.sapies', ...)
|
|
351
|
+
|
|
352
|
+
--- Bio::KEGG::Keggtab#aliases(db_abbrev) -> Array
|
|
353
|
+
|
|
354
|
+
Returns an Array containing all alias names for the database.
|
|
355
|
+
(e.g. 'hsa' -> ["H.sapiens", "hsa"], 'hpj' -> ["H.pylori_J99", "hpj"])
|
|
356
|
+
|
|
357
|
+
--- Bio::KEGG::Keggtab#path(db_abbrev) -> String
|
|
358
|
+
|
|
359
|
+
Returns an absolute path for the flat file database.
|
|
360
|
+
(e.g. '/bio/db/kegg/genes', ...)
|
|
361
|
+
|
|
362
|
+
--- Bio::KEGG::Keggtab#taxa_list -> Array
|
|
363
|
+
|
|
364
|
+
List of all node labels from Taxonomy section.
|
|
365
|
+
(e.g. ["actinobacteria", "animals", "archaea", "bacillales", ...)
|
|
366
|
+
|
|
367
|
+
--- Bio::KEGG::Keggtab#taxo2korgs(taxon) -> Array
|
|
368
|
+
|
|
369
|
+
Returns an array of organism names included in the specified taxon
|
|
370
|
+
label. (e.g. 'proteobeta' -> ["nme", "nma", "rso"])
|
|
371
|
+
This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases.
|
|
372
|
+
|
|
373
|
+
--- Bio::KEGG::Keggtab#korg2taxo(keggorg) -> Array
|
|
374
|
+
|
|
375
|
+
Returns an array of taxonomy names the organism belongs.
|
|
376
|
+
(e.g. 'eco' -> ['proteogamma','proteobacteria','eubacteria','genes'])
|
|
377
|
+
This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy.
|
|
378
|
+
|
|
379
|
+
* following methods are deprecated
|
|
380
|
+
|
|
381
|
+
--- Bio::KEGG::Keggtab#db_names[db_name] -> Keggtab::DB
|
|
382
|
+
--- Bio::KEGG::Keggtab#db_by_abbrev(db_abbrev) -> Keggtab::DB
|
|
383
|
+
--- Bio::KEGG::Keggtab#alias_list(db_name) -> Array
|
|
384
|
+
--- Bio::KEGG::Keggtab#name_by_abbrev(db_abbrev) -> String
|
|
385
|
+
--- Bio::KEGG::Keggtab#db_path(db_name) -> String
|
|
386
|
+
--- Bio::KEGG::Keggtab#db_path_by_abbrev(keggorg) -> String
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
== Bio::KEGG::Keggtab::DB
|
|
390
|
+
|
|
391
|
+
--- Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev)
|
|
392
|
+
|
|
393
|
+
Create a container object for database definitions.
|
|
394
|
+
|
|
395
|
+
--- Bio::KEGG::Keggtab::DB#name -> String
|
|
396
|
+
|
|
397
|
+
Database name. (e.g. 'enzyme', 'h.sapies', 'e.coli', ...)
|
|
398
|
+
|
|
399
|
+
--- Bio::KEGG::Keggtab::DB#type -> String
|
|
400
|
+
|
|
401
|
+
Definition type. (e.g. 'enzyme', 'alias', 'genes', ...)
|
|
402
|
+
|
|
403
|
+
--- Bio::KEGG::Keggtab::DB#path -> String
|
|
404
|
+
|
|
405
|
+
Database flat file path. (e.g. '$BIOROOT/db/kegg/genes', ...)
|
|
406
|
+
|
|
407
|
+
--- Bio::KEGG::Keggtab::DB#abbrev -> String
|
|
408
|
+
|
|
409
|
+
Short name for the database. (e.g. 'ec', 'hsa', 'eco', ...)
|
|
410
|
+
korg and keggorg are alias for abbrev method.
|
|
411
|
+
|
|
412
|
+
--- Bio::KEGG::Keggtab::DB#aliases -> Array
|
|
413
|
+
|
|
414
|
+
Array containing all alias names for the database.
|
|
415
|
+
(e.g. ["H.sapiens", "hsa"], ["E.coli", "eco"], ...)
|
|
416
|
+
|
|
417
|
+
=end
|
|
418
|
+
|