bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/kegg/genes.rb - KEGG/GENES database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: genes.rb,v 0.22 2005/11/09 12:30:07 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class KEGG
|
|
28
|
+
|
|
29
|
+
class GENES < KEGGDB
|
|
30
|
+
|
|
31
|
+
DELIMITER = RS = "\n///\n"
|
|
32
|
+
TAGSIZE = 12
|
|
33
|
+
|
|
34
|
+
def initialize(entry)
|
|
35
|
+
super(entry, TAGSIZE)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def entry
|
|
40
|
+
unless @data['ENTRY']
|
|
41
|
+
hash = Hash.new('')
|
|
42
|
+
if get('ENTRY').length > 30
|
|
43
|
+
e = get('ENTRY')
|
|
44
|
+
hash['id'] = e[12..29].strip
|
|
45
|
+
hash['division'] = e[30..39].strip
|
|
46
|
+
hash['organism'] = e[40..80].strip
|
|
47
|
+
end
|
|
48
|
+
@data['ENTRY'] = hash
|
|
49
|
+
end
|
|
50
|
+
@data['ENTRY']
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def entry_id
|
|
54
|
+
entry['id']
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def division
|
|
58
|
+
entry['division'] # CDS, tRNA etc.
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def organism
|
|
62
|
+
entry['organism'] # H.sapiens etc.
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def name
|
|
66
|
+
field_fetch('NAME')
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def genes
|
|
70
|
+
name.split(', ')
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def gene
|
|
74
|
+
genes.first
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def definition
|
|
78
|
+
field_fetch('DEFINITION')
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def eclinks
|
|
82
|
+
# definition.slice(/\[EC:(.*?)\]/, 1) # ruby >= 1.7
|
|
83
|
+
# definition.scan(/\[EC:(.*?)\]/).flatten
|
|
84
|
+
if /\[EC:(.*?)\]/.match(definition)
|
|
85
|
+
$1.split(/\s+/)
|
|
86
|
+
else
|
|
87
|
+
[]
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def splinks
|
|
92
|
+
# definition.slice(/\[SP:(.*?)\]/, 1) # ruby >= 1.7
|
|
93
|
+
# definition.scan(/\[SP:(.*?)\]/).flatten
|
|
94
|
+
if /\[SP:(.*?)\]/.match(definition)
|
|
95
|
+
$1.split(/\s+/)
|
|
96
|
+
else
|
|
97
|
+
[]
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def keggclass
|
|
102
|
+
field_fetch('CLASS')
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def pathways
|
|
106
|
+
keggclass.scan(/\[PATH:(.*?)\]/).flatten
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def position
|
|
110
|
+
unless @data['POSITION']
|
|
111
|
+
@data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
|
|
112
|
+
end
|
|
113
|
+
@data['POSITION']
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def gbposition
|
|
117
|
+
position.sub(/.*?:/, '')
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def chromosome
|
|
121
|
+
if position =~ /:/
|
|
122
|
+
position.sub(/:.*/, '')
|
|
123
|
+
else
|
|
124
|
+
nil
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def dblinks
|
|
129
|
+
unless @data['DBLINKS']
|
|
130
|
+
hash = {}
|
|
131
|
+
get('DBLINKS').scan(/(\S+):\s*(.*)\n?/).each do |db, str|
|
|
132
|
+
id_array = str.strip.split(/\s+/)
|
|
133
|
+
hash[db] = id_array
|
|
134
|
+
end
|
|
135
|
+
@data['DBLINKS'] = hash
|
|
136
|
+
end
|
|
137
|
+
@data['DBLINKS'] # Hash of Array of DB IDs in DBLINKS
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def codon_usage(codon = nil)
|
|
141
|
+
unless @data['CODON_USAGE']
|
|
142
|
+
ary = []
|
|
143
|
+
get('CODON_USAGE').sub(/.*/,'').each_line do |line| # cut 1st line
|
|
144
|
+
line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
|
|
145
|
+
ary.push(cu.to_i)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
@data['CODON_USAGE'] = ary
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
if codon
|
|
152
|
+
h = { 't' => 0, 'c' => 1, 'a' => 2, 'g' => 3 }
|
|
153
|
+
x, y, z = codon.downcase.scan(/\w/)
|
|
154
|
+
codon_num = h[x] * 16 + h[y] * 4 + h[z]
|
|
155
|
+
@data['CODON_USAGE'][codon_num] # CODON_USAGE of the codon
|
|
156
|
+
else
|
|
157
|
+
return @data['CODON_USAGE'] # Array of CODON_USAGE (default)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def cu
|
|
162
|
+
hash = Hash.new
|
|
163
|
+
list = codon_usage
|
|
164
|
+
base = %w(t c a g)
|
|
165
|
+
base.each_with_index do |x, i|
|
|
166
|
+
base.each_with_index do |y, j|
|
|
167
|
+
base.each_with_index do |z, k|
|
|
168
|
+
hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k]
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
return hash
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def aaseq
|
|
176
|
+
unless @data['AASEQ']
|
|
177
|
+
@data['AASEQ'] = Sequence::AA.new(fetch('AASEQ').gsub(/[\s\d\/]+/, ''))
|
|
178
|
+
end
|
|
179
|
+
@data['AASEQ']
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def aalen
|
|
183
|
+
@data['AALEN'] = aaseq.length
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def ntseq
|
|
187
|
+
unless @data['NTSEQ']
|
|
188
|
+
@data['NTSEQ'] = Sequence::NA.new(fetch('NTSEQ').gsub(/[\s\d\/]+/, ''))
|
|
189
|
+
end
|
|
190
|
+
@data['NTSEQ']
|
|
191
|
+
end
|
|
192
|
+
alias naseq ntseq
|
|
193
|
+
|
|
194
|
+
def ntlen
|
|
195
|
+
@data['NTLEN'] = ntseq.length
|
|
196
|
+
end
|
|
197
|
+
alias nalen ntlen
|
|
198
|
+
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
if __FILE__ == $0
|
|
208
|
+
|
|
209
|
+
require 'bio/io/fetch'
|
|
210
|
+
|
|
211
|
+
e = Bio::Fetch.query('genes', 'b0002')
|
|
212
|
+
g = Bio::KEGG::GENES.new(e)
|
|
213
|
+
|
|
214
|
+
p g.entry
|
|
215
|
+
p g.entry_id
|
|
216
|
+
p g.division
|
|
217
|
+
p g.name
|
|
218
|
+
p g.gene
|
|
219
|
+
p g.definition
|
|
220
|
+
p g.keggclass
|
|
221
|
+
p g.position
|
|
222
|
+
p g.dblinks
|
|
223
|
+
p g.codon_usage
|
|
224
|
+
p g.cu
|
|
225
|
+
p g.aaseq
|
|
226
|
+
p g.aalen
|
|
227
|
+
p g.naseq
|
|
228
|
+
p g.nalen
|
|
229
|
+
p g.eclinks
|
|
230
|
+
p g.splinks
|
|
231
|
+
p g.pathways
|
|
232
|
+
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
=begin
|
|
237
|
+
|
|
238
|
+
= Bio::KEGG::GENES
|
|
239
|
+
|
|
240
|
+
=== Initialize
|
|
241
|
+
|
|
242
|
+
--- Bio::KEGG::GENES.new
|
|
243
|
+
|
|
244
|
+
=== ENTRY
|
|
245
|
+
|
|
246
|
+
--- Bio::KEGG::GENES#entry -> Hash
|
|
247
|
+
--- Bio::KEGG::GENES#entry_id -> String
|
|
248
|
+
--- Bio::KEGG::GENES#division -> String
|
|
249
|
+
--- Bio::KEGG::GENES#organism -> String
|
|
250
|
+
|
|
251
|
+
=== NAME
|
|
252
|
+
|
|
253
|
+
--- Bio::KEGG::GENES#name -> String
|
|
254
|
+
--- Bio::KEGG::GENES#genes -> Array
|
|
255
|
+
--- Bio::KEGG::GENES#gene -> String
|
|
256
|
+
|
|
257
|
+
=== DEFINITION
|
|
258
|
+
|
|
259
|
+
--- Bio::KEGG::GENES#definition -> String
|
|
260
|
+
--- Bio::KEGG::GENES#eclinks -> Array
|
|
261
|
+
--- Bio::KEGG::GENES#splinks -> Array
|
|
262
|
+
|
|
263
|
+
=== CLASS
|
|
264
|
+
|
|
265
|
+
--- Bio::KEGG::GENES#keggclass -> String
|
|
266
|
+
--- Bio::KEGG::GENES#pathways -> Array
|
|
267
|
+
|
|
268
|
+
=== POSITION
|
|
269
|
+
|
|
270
|
+
--- Bio::KEGG::GENES#position -> String
|
|
271
|
+
|
|
272
|
+
=== DBLINKS
|
|
273
|
+
|
|
274
|
+
--- Bio::KEGG::GENES#dblinks -> Hash
|
|
275
|
+
|
|
276
|
+
=== CODON_USAGE
|
|
277
|
+
|
|
278
|
+
--- Bio::KEGG::GENES#codon_usage(codon = nil) -> Array or Fixnum
|
|
279
|
+
--- Bio::KEGG::GENES#cu -> Hash
|
|
280
|
+
|
|
281
|
+
=== AASEQ
|
|
282
|
+
|
|
283
|
+
--- Bio::KEGG::GENES#aaseq -> Bio::Sequence::AA
|
|
284
|
+
--- Bio::KEGG::GENES#aalen -> Fixnum
|
|
285
|
+
|
|
286
|
+
=== NTSEQ
|
|
287
|
+
|
|
288
|
+
--- Bio::KEGG::GENES#ntseq -> Bio::Sequence::NA
|
|
289
|
+
--- Bio::KEGG::GENES#naseq -> Bio::Sequence::NA
|
|
290
|
+
--- Bio::KEGG::GENES#ntlen -> Fixnum
|
|
291
|
+
--- Bio::KEGG::GENES#nalen -> Fixnum
|
|
292
|
+
|
|
293
|
+
=end
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/kegg/genome.rb - KEGG/GENOME database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: genome.rb,v 0.14 2005/09/08 01:22:11 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class KEGG
|
|
28
|
+
|
|
29
|
+
class GENOME < KEGGDB
|
|
30
|
+
|
|
31
|
+
DELIMITER = RS = "\n///\n"
|
|
32
|
+
TAGSIZE = 12
|
|
33
|
+
|
|
34
|
+
def initialize(entry)
|
|
35
|
+
super(entry, TAGSIZE)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# ENTRY
|
|
40
|
+
def entry_id
|
|
41
|
+
field_fetch('ENTRY')
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# NAME
|
|
45
|
+
def name
|
|
46
|
+
field_fetch('NAME')
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# DEFINITION
|
|
50
|
+
def definition
|
|
51
|
+
field_fetch('DEFINITION')
|
|
52
|
+
end
|
|
53
|
+
alias organism definition
|
|
54
|
+
|
|
55
|
+
# TAXONOMY
|
|
56
|
+
def taxonomy
|
|
57
|
+
unless @data['TAXONOMY']
|
|
58
|
+
taxid, lineage = subtag2array(get('TAXONOMY'))
|
|
59
|
+
taxid = taxid ? truncate(tag_cut(taxid)) : ''
|
|
60
|
+
lineage = lineage ? truncate(tag_cut(lineage)) : ''
|
|
61
|
+
@data['TAXONOMY'] = {
|
|
62
|
+
'taxid' => taxid,
|
|
63
|
+
'lineage' => lineage,
|
|
64
|
+
}
|
|
65
|
+
@data['TAXONOMY'].default = ''
|
|
66
|
+
end
|
|
67
|
+
@data['TAXONOMY']
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def taxid
|
|
71
|
+
taxonomy['taxid']
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def lineage
|
|
75
|
+
taxonomy['lineage']
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# COMMENT
|
|
79
|
+
def comment
|
|
80
|
+
field_fetch('COMMENT')
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# REFERENCE
|
|
84
|
+
def references
|
|
85
|
+
unless @data['REFERENCE']
|
|
86
|
+
ary = []
|
|
87
|
+
toptag2array(get('REFERENCE')).each do |ref|
|
|
88
|
+
hash = Hash.new('')
|
|
89
|
+
subtag2array(ref).each do |field|
|
|
90
|
+
case tag_get(field)
|
|
91
|
+
when /AUTHORS/
|
|
92
|
+
authors = truncate(tag_cut(field))
|
|
93
|
+
authors = authors.split(', ')
|
|
94
|
+
authors[-1] = authors[-1].split(/\s+and\s+/)
|
|
95
|
+
authors = authors.flatten.map { |a| a.sub(',', ', ') }
|
|
96
|
+
hash['authors'] = authors
|
|
97
|
+
when /TITLE/
|
|
98
|
+
hash['title'] = truncate(tag_cut(field))
|
|
99
|
+
when /JOURNAL/
|
|
100
|
+
journal = truncate(tag_cut(field))
|
|
101
|
+
if journal =~ /(.*) (\d+):(\d+)-(\d+) \((\d+)\) \[UI:(\d+)\]$/
|
|
102
|
+
hash['journal'] = $1
|
|
103
|
+
hash['volume'] = $2
|
|
104
|
+
hash['pages'] = $3
|
|
105
|
+
hash['year'] = $5
|
|
106
|
+
hash['medline'] = $6
|
|
107
|
+
else
|
|
108
|
+
hash['journal'] = journal
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
ary.push(Reference.new(hash))
|
|
113
|
+
end
|
|
114
|
+
@data['REFERENCE'] = References.new(ary)
|
|
115
|
+
end
|
|
116
|
+
@data['REFERENCE']
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# CHROMOSOME
|
|
120
|
+
def chromosomes
|
|
121
|
+
unless @data['CHROMOSOME']
|
|
122
|
+
@data['CHROMOSOME'] = []
|
|
123
|
+
toptag2array(get('CHROMOSOME')).each do |chr|
|
|
124
|
+
hash = Hash.new('')
|
|
125
|
+
subtag2array(chr).each do |field|
|
|
126
|
+
hash[tag_get(field)] = truncate(tag_cut(field))
|
|
127
|
+
end
|
|
128
|
+
@data['CHROMOSOME'].push(hash)
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
@data['CHROMOSOME']
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# PLASMID
|
|
135
|
+
def plasmids
|
|
136
|
+
unless @data['PLASMID']
|
|
137
|
+
@data['PLASMID'] = []
|
|
138
|
+
toptag2array(get('PLASMID')).each do |chr|
|
|
139
|
+
hash = Hash.new('')
|
|
140
|
+
subtag2array(chr).each do |field|
|
|
141
|
+
hash[tag_get(field)] = truncate(tag_cut(field))
|
|
142
|
+
end
|
|
143
|
+
@data['PLASMID'].push(hash)
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
@data['PLASMID']
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# SCAFFOLD
|
|
150
|
+
def scaffolds
|
|
151
|
+
unless @data['SCAFFOLD']
|
|
152
|
+
@data['SCAFFOLD'] = []
|
|
153
|
+
toptag2array(get('SCAFFOLD')).each do |chr|
|
|
154
|
+
hash = Hash.new('')
|
|
155
|
+
subtag2array(chr).each do |field|
|
|
156
|
+
hash[tag_get(field)] = truncate(tag_cut(field))
|
|
157
|
+
end
|
|
158
|
+
@data['SCAFFOLD'].push(hash)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
@data['SCAFFOLD']
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# STATISTICS
|
|
165
|
+
def statistics
|
|
166
|
+
unless @data['STATISTICS']
|
|
167
|
+
hash = Hash.new(0.0)
|
|
168
|
+
get('STATISTICS').each_line do |line|
|
|
169
|
+
case line
|
|
170
|
+
when /nucleotides:\s+(\d+)/
|
|
171
|
+
hash['nalen'] = $1.to_i
|
|
172
|
+
when /protein genes:\s+(\d+)/
|
|
173
|
+
hash['num_gene'] = $1.to_i
|
|
174
|
+
when /RNA genes:\s+(\d+)/
|
|
175
|
+
hash['num_rna'] = $1.to_i
|
|
176
|
+
when /G\+C content:\s+(\d+.\d+)/
|
|
177
|
+
hash['gc'] = $1.to_f
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
@data['STATISTICS'] = hash
|
|
181
|
+
end
|
|
182
|
+
@data['STATISTICS']
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def nalen
|
|
186
|
+
statistics['nalen']
|
|
187
|
+
end
|
|
188
|
+
alias length nalen
|
|
189
|
+
|
|
190
|
+
def num_gene
|
|
191
|
+
statistics['num_gene']
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def num_rna
|
|
195
|
+
statistics['num_rna']
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def gc
|
|
199
|
+
statistics['gc']
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# GENOMEMAP
|
|
203
|
+
def genomemap
|
|
204
|
+
field_fetch('GENOMEMAP')
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
if __FILE__ == $0
|
|
216
|
+
|
|
217
|
+
begin
|
|
218
|
+
require 'pp'
|
|
219
|
+
def p(arg); pp(arg); end
|
|
220
|
+
rescue LoadError
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
require 'bio/io/flatfile'
|
|
224
|
+
|
|
225
|
+
ff = Bio::FlatFile.new(Bio::KEGG::GENOME, ARGF)
|
|
226
|
+
|
|
227
|
+
ff.each do |genome|
|
|
228
|
+
|
|
229
|
+
puts "### Tags"
|
|
230
|
+
p genome.tags
|
|
231
|
+
|
|
232
|
+
[
|
|
233
|
+
%w( ENTRY entry_id ),
|
|
234
|
+
%w( NAME name ),
|
|
235
|
+
%w( DEFINITION definition ),
|
|
236
|
+
%w( TAXONOMY taxonomy taxid lineage ),
|
|
237
|
+
%w( REFERENCE references ),
|
|
238
|
+
%w( CHROMOSOME chromosomes ),
|
|
239
|
+
%w( PLASMID plasmids ),
|
|
240
|
+
%w( SCAFFOLD plasmids ),
|
|
241
|
+
%w( STATISTICS statistics nalen num_gene num_rna gc ),
|
|
242
|
+
%w( GENOMEMAP genomemap ),
|
|
243
|
+
].each do |x|
|
|
244
|
+
puts "### " + x.shift
|
|
245
|
+
x.each do |m|
|
|
246
|
+
p genome.send(m)
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
=begin
|
|
256
|
+
|
|
257
|
+
= Bio::KEGG::GENOME
|
|
258
|
+
|
|
259
|
+
=== Initialize
|
|
260
|
+
|
|
261
|
+
--- Bio::KEGG::GENOME.new(entry)
|
|
262
|
+
|
|
263
|
+
=== ENTRY
|
|
264
|
+
|
|
265
|
+
--- Bio::KEGG::GENOME#entry_id -> String
|
|
266
|
+
|
|
267
|
+
Returns contents of the ENTRY record as a String.
|
|
268
|
+
|
|
269
|
+
=== NAME
|
|
270
|
+
|
|
271
|
+
--- Bio::KEGG::GENOME#name -> String
|
|
272
|
+
|
|
273
|
+
Returns contents of the NAME record as a String.
|
|
274
|
+
|
|
275
|
+
=== DEFINITION
|
|
276
|
+
|
|
277
|
+
--- Bio::KEGG::GENOME#definition -> String
|
|
278
|
+
|
|
279
|
+
Returns contents of the DEFINITION record as a String.
|
|
280
|
+
|
|
281
|
+
--- Bio::KEGG::GENOME#organism -> String
|
|
282
|
+
|
|
283
|
+
Alias for the 'definition' method.
|
|
284
|
+
|
|
285
|
+
=== TAXONOMY
|
|
286
|
+
|
|
287
|
+
--- Bio::KEGG::GENOME#taxonomy -> Hash
|
|
288
|
+
|
|
289
|
+
Returns contents of the TAXONOMY record as a Hash.
|
|
290
|
+
|
|
291
|
+
--- Bio::KEGG::GENOME#taxid -> String
|
|
292
|
+
|
|
293
|
+
Returns NCBI taxonomy ID from the TAXONOMY record as a String.
|
|
294
|
+
|
|
295
|
+
--- Bio::KEGG::GENOME#lineage -> String
|
|
296
|
+
|
|
297
|
+
Returns contents of the TAXONOMY/LINEAGE record as a String.
|
|
298
|
+
|
|
299
|
+
=== COMMENT
|
|
300
|
+
|
|
301
|
+
--- Bio::KEGG::GENOME#comment -> String
|
|
302
|
+
|
|
303
|
+
Returns contents of the COMMENT record as a String.
|
|
304
|
+
|
|
305
|
+
=== REFERENCE
|
|
306
|
+
|
|
307
|
+
--- Bio::GenBank#references -> Array
|
|
308
|
+
|
|
309
|
+
Returns contents of the REFERENCE records as an Array of Bio::Reference
|
|
310
|
+
objects.
|
|
311
|
+
|
|
312
|
+
=== CHROMOSOME
|
|
313
|
+
|
|
314
|
+
--- Bio::KEGG::GENOME#chromosomes -> Array
|
|
315
|
+
|
|
316
|
+
Returns contents of the CHROMOSOME records as an Array of Hash.
|
|
317
|
+
|
|
318
|
+
=== PLASMID
|
|
319
|
+
|
|
320
|
+
--- Bio::KEGG::GENOME#plasmids -> Array
|
|
321
|
+
|
|
322
|
+
Returns contents of the PLASMID records as an Array of Hash.
|
|
323
|
+
|
|
324
|
+
=== SCAFFOLD
|
|
325
|
+
|
|
326
|
+
--- Bio::KEGG::GENOME#scaffolds -> Array
|
|
327
|
+
|
|
328
|
+
Returns contents of the SCAFFOLD records as an Array of Hash.
|
|
329
|
+
|
|
330
|
+
=== STATISTICS
|
|
331
|
+
|
|
332
|
+
--- Bio::KEGG::GENOME#statistics -> Hash
|
|
333
|
+
|
|
334
|
+
Returns contents of the STATISTICS record as a Hash.
|
|
335
|
+
|
|
336
|
+
--- Bio::KEGG::GENOME#nalen -> Fixnum
|
|
337
|
+
|
|
338
|
+
Returns number of nucleotides from the STATISTICS record as a Fixnum.
|
|
339
|
+
|
|
340
|
+
--- Bio::KEGG::GENOME#num_gene -> Fixnum
|
|
341
|
+
|
|
342
|
+
Returns number of protein genes from the STATISTICS record as a Fixnum.
|
|
343
|
+
|
|
344
|
+
--- Bio::KEGG::GENOME#num_rna -> Fixnum
|
|
345
|
+
|
|
346
|
+
Returns number of rna from the STATISTICS record as a Fixnum.
|
|
347
|
+
|
|
348
|
+
--- Bio::KEGG::GENOME#gc -> Float
|
|
349
|
+
|
|
350
|
+
Returns G+C content from the STATISTICS record as a Float.
|
|
351
|
+
|
|
352
|
+
=== GENOMEMAP
|
|
353
|
+
|
|
354
|
+
--- Bio::KEGG::GENOME#genomemap -> String
|
|
355
|
+
|
|
356
|
+
Returns contents of the GENOMEMAP record as a String.
|
|
357
|
+
|
|
358
|
+
== SEE ALSO
|
|
359
|
+
|
|
360
|
+
ftp://ftp.genome.jp/pub/kegg/genomes/genome
|
|
361
|
+
|
|
362
|
+
=end
|