bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,293 @@
1
+ #
2
+ # bio/db/kegg/genes.rb - KEGG/GENES database class
3
+ #
4
+ # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: genes.rb,v 0.22 2005/11/09 12:30:07 k Exp $
21
+ #
22
+
23
+ require 'bio/db'
24
+
25
+ module Bio
26
+
27
+ class KEGG
28
+
29
+ class GENES < KEGGDB
30
+
31
+ DELIMITER = RS = "\n///\n"
32
+ TAGSIZE = 12
33
+
34
+ def initialize(entry)
35
+ super(entry, TAGSIZE)
36
+ end
37
+
38
+
39
+ def entry
40
+ unless @data['ENTRY']
41
+ hash = Hash.new('')
42
+ if get('ENTRY').length > 30
43
+ e = get('ENTRY')
44
+ hash['id'] = e[12..29].strip
45
+ hash['division'] = e[30..39].strip
46
+ hash['organism'] = e[40..80].strip
47
+ end
48
+ @data['ENTRY'] = hash
49
+ end
50
+ @data['ENTRY']
51
+ end
52
+
53
+ def entry_id
54
+ entry['id']
55
+ end
56
+
57
+ def division
58
+ entry['division'] # CDS, tRNA etc.
59
+ end
60
+
61
+ def organism
62
+ entry['organism'] # H.sapiens etc.
63
+ end
64
+
65
+ def name
66
+ field_fetch('NAME')
67
+ end
68
+
69
+ def genes
70
+ name.split(', ')
71
+ end
72
+
73
+ def gene
74
+ genes.first
75
+ end
76
+
77
+ def definition
78
+ field_fetch('DEFINITION')
79
+ end
80
+
81
+ def eclinks
82
+ # definition.slice(/\[EC:(.*?)\]/, 1) # ruby >= 1.7
83
+ # definition.scan(/\[EC:(.*?)\]/).flatten
84
+ if /\[EC:(.*?)\]/.match(definition)
85
+ $1.split(/\s+/)
86
+ else
87
+ []
88
+ end
89
+ end
90
+
91
+ def splinks
92
+ # definition.slice(/\[SP:(.*?)\]/, 1) # ruby >= 1.7
93
+ # definition.scan(/\[SP:(.*?)\]/).flatten
94
+ if /\[SP:(.*?)\]/.match(definition)
95
+ $1.split(/\s+/)
96
+ else
97
+ []
98
+ end
99
+ end
100
+
101
+ def keggclass
102
+ field_fetch('CLASS')
103
+ end
104
+
105
+ def pathways
106
+ keggclass.scan(/\[PATH:(.*?)\]/).flatten
107
+ end
108
+
109
+ def position
110
+ unless @data['POSITION']
111
+ @data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
112
+ end
113
+ @data['POSITION']
114
+ end
115
+
116
+ def gbposition
117
+ position.sub(/.*?:/, '')
118
+ end
119
+
120
+ def chromosome
121
+ if position =~ /:/
122
+ position.sub(/:.*/, '')
123
+ else
124
+ nil
125
+ end
126
+ end
127
+
128
+ def dblinks
129
+ unless @data['DBLINKS']
130
+ hash = {}
131
+ get('DBLINKS').scan(/(\S+):\s*(.*)\n?/).each do |db, str|
132
+ id_array = str.strip.split(/\s+/)
133
+ hash[db] = id_array
134
+ end
135
+ @data['DBLINKS'] = hash
136
+ end
137
+ @data['DBLINKS'] # Hash of Array of DB IDs in DBLINKS
138
+ end
139
+
140
+ def codon_usage(codon = nil)
141
+ unless @data['CODON_USAGE']
142
+ ary = []
143
+ get('CODON_USAGE').sub(/.*/,'').each_line do |line| # cut 1st line
144
+ line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
145
+ ary.push(cu.to_i)
146
+ end
147
+ end
148
+ @data['CODON_USAGE'] = ary
149
+ end
150
+
151
+ if codon
152
+ h = { 't' => 0, 'c' => 1, 'a' => 2, 'g' => 3 }
153
+ x, y, z = codon.downcase.scan(/\w/)
154
+ codon_num = h[x] * 16 + h[y] * 4 + h[z]
155
+ @data['CODON_USAGE'][codon_num] # CODON_USAGE of the codon
156
+ else
157
+ return @data['CODON_USAGE'] # Array of CODON_USAGE (default)
158
+ end
159
+ end
160
+
161
+ def cu
162
+ hash = Hash.new
163
+ list = codon_usage
164
+ base = %w(t c a g)
165
+ base.each_with_index do |x, i|
166
+ base.each_with_index do |y, j|
167
+ base.each_with_index do |z, k|
168
+ hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k]
169
+ end
170
+ end
171
+ end
172
+ return hash
173
+ end
174
+
175
+ def aaseq
176
+ unless @data['AASEQ']
177
+ @data['AASEQ'] = Sequence::AA.new(fetch('AASEQ').gsub(/[\s\d\/]+/, ''))
178
+ end
179
+ @data['AASEQ']
180
+ end
181
+
182
+ def aalen
183
+ @data['AALEN'] = aaseq.length
184
+ end
185
+
186
+ def ntseq
187
+ unless @data['NTSEQ']
188
+ @data['NTSEQ'] = Sequence::NA.new(fetch('NTSEQ').gsub(/[\s\d\/]+/, ''))
189
+ end
190
+ @data['NTSEQ']
191
+ end
192
+ alias naseq ntseq
193
+
194
+ def ntlen
195
+ @data['NTLEN'] = ntseq.length
196
+ end
197
+ alias nalen ntlen
198
+
199
+ end
200
+
201
+ end
202
+
203
+ end
204
+
205
+
206
+
207
+ if __FILE__ == $0
208
+
209
+ require 'bio/io/fetch'
210
+
211
+ e = Bio::Fetch.query('genes', 'b0002')
212
+ g = Bio::KEGG::GENES.new(e)
213
+
214
+ p g.entry
215
+ p g.entry_id
216
+ p g.division
217
+ p g.name
218
+ p g.gene
219
+ p g.definition
220
+ p g.keggclass
221
+ p g.position
222
+ p g.dblinks
223
+ p g.codon_usage
224
+ p g.cu
225
+ p g.aaseq
226
+ p g.aalen
227
+ p g.naseq
228
+ p g.nalen
229
+ p g.eclinks
230
+ p g.splinks
231
+ p g.pathways
232
+
233
+ end
234
+
235
+
236
+ =begin
237
+
238
+ = Bio::KEGG::GENES
239
+
240
+ === Initialize
241
+
242
+ --- Bio::KEGG::GENES.new
243
+
244
+ === ENTRY
245
+
246
+ --- Bio::KEGG::GENES#entry -> Hash
247
+ --- Bio::KEGG::GENES#entry_id -> String
248
+ --- Bio::KEGG::GENES#division -> String
249
+ --- Bio::KEGG::GENES#organism -> String
250
+
251
+ === NAME
252
+
253
+ --- Bio::KEGG::GENES#name -> String
254
+ --- Bio::KEGG::GENES#genes -> Array
255
+ --- Bio::KEGG::GENES#gene -> String
256
+
257
+ === DEFINITION
258
+
259
+ --- Bio::KEGG::GENES#definition -> String
260
+ --- Bio::KEGG::GENES#eclinks -> Array
261
+ --- Bio::KEGG::GENES#splinks -> Array
262
+
263
+ === CLASS
264
+
265
+ --- Bio::KEGG::GENES#keggclass -> String
266
+ --- Bio::KEGG::GENES#pathways -> Array
267
+
268
+ === POSITION
269
+
270
+ --- Bio::KEGG::GENES#position -> String
271
+
272
+ === DBLINKS
273
+
274
+ --- Bio::KEGG::GENES#dblinks -> Hash
275
+
276
+ === CODON_USAGE
277
+
278
+ --- Bio::KEGG::GENES#codon_usage(codon = nil) -> Array or Fixnum
279
+ --- Bio::KEGG::GENES#cu -> Hash
280
+
281
+ === AASEQ
282
+
283
+ --- Bio::KEGG::GENES#aaseq -> Bio::Sequence::AA
284
+ --- Bio::KEGG::GENES#aalen -> Fixnum
285
+
286
+ === NTSEQ
287
+
288
+ --- Bio::KEGG::GENES#ntseq -> Bio::Sequence::NA
289
+ --- Bio::KEGG::GENES#naseq -> Bio::Sequence::NA
290
+ --- Bio::KEGG::GENES#ntlen -> Fixnum
291
+ --- Bio::KEGG::GENES#nalen -> Fixnum
292
+
293
+ =end
@@ -0,0 +1,362 @@
1
+ #
2
+ # bio/db/kegg/genome.rb - KEGG/GENOME database class
3
+ #
4
+ # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: genome.rb,v 0.14 2005/09/08 01:22:11 k Exp $
21
+ #
22
+
23
+ require 'bio/db'
24
+
25
+ module Bio
26
+
27
+ class KEGG
28
+
29
+ class GENOME < KEGGDB
30
+
31
+ DELIMITER = RS = "\n///\n"
32
+ TAGSIZE = 12
33
+
34
+ def initialize(entry)
35
+ super(entry, TAGSIZE)
36
+ end
37
+
38
+
39
+ # ENTRY
40
+ def entry_id
41
+ field_fetch('ENTRY')
42
+ end
43
+
44
+ # NAME
45
+ def name
46
+ field_fetch('NAME')
47
+ end
48
+
49
+ # DEFINITION
50
+ def definition
51
+ field_fetch('DEFINITION')
52
+ end
53
+ alias organism definition
54
+
55
+ # TAXONOMY
56
+ def taxonomy
57
+ unless @data['TAXONOMY']
58
+ taxid, lineage = subtag2array(get('TAXONOMY'))
59
+ taxid = taxid ? truncate(tag_cut(taxid)) : ''
60
+ lineage = lineage ? truncate(tag_cut(lineage)) : ''
61
+ @data['TAXONOMY'] = {
62
+ 'taxid' => taxid,
63
+ 'lineage' => lineage,
64
+ }
65
+ @data['TAXONOMY'].default = ''
66
+ end
67
+ @data['TAXONOMY']
68
+ end
69
+
70
+ def taxid
71
+ taxonomy['taxid']
72
+ end
73
+
74
+ def lineage
75
+ taxonomy['lineage']
76
+ end
77
+
78
+ # COMMENT
79
+ def comment
80
+ field_fetch('COMMENT')
81
+ end
82
+
83
+ # REFERENCE
84
+ def references
85
+ unless @data['REFERENCE']
86
+ ary = []
87
+ toptag2array(get('REFERENCE')).each do |ref|
88
+ hash = Hash.new('')
89
+ subtag2array(ref).each do |field|
90
+ case tag_get(field)
91
+ when /AUTHORS/
92
+ authors = truncate(tag_cut(field))
93
+ authors = authors.split(', ')
94
+ authors[-1] = authors[-1].split(/\s+and\s+/)
95
+ authors = authors.flatten.map { |a| a.sub(',', ', ') }
96
+ hash['authors'] = authors
97
+ when /TITLE/
98
+ hash['title'] = truncate(tag_cut(field))
99
+ when /JOURNAL/
100
+ journal = truncate(tag_cut(field))
101
+ if journal =~ /(.*) (\d+):(\d+)-(\d+) \((\d+)\) \[UI:(\d+)\]$/
102
+ hash['journal'] = $1
103
+ hash['volume'] = $2
104
+ hash['pages'] = $3
105
+ hash['year'] = $5
106
+ hash['medline'] = $6
107
+ else
108
+ hash['journal'] = journal
109
+ end
110
+ end
111
+ end
112
+ ary.push(Reference.new(hash))
113
+ end
114
+ @data['REFERENCE'] = References.new(ary)
115
+ end
116
+ @data['REFERENCE']
117
+ end
118
+
119
+ # CHROMOSOME
120
+ def chromosomes
121
+ unless @data['CHROMOSOME']
122
+ @data['CHROMOSOME'] = []
123
+ toptag2array(get('CHROMOSOME')).each do |chr|
124
+ hash = Hash.new('')
125
+ subtag2array(chr).each do |field|
126
+ hash[tag_get(field)] = truncate(tag_cut(field))
127
+ end
128
+ @data['CHROMOSOME'].push(hash)
129
+ end
130
+ end
131
+ @data['CHROMOSOME']
132
+ end
133
+
134
+ # PLASMID
135
+ def plasmids
136
+ unless @data['PLASMID']
137
+ @data['PLASMID'] = []
138
+ toptag2array(get('PLASMID')).each do |chr|
139
+ hash = Hash.new('')
140
+ subtag2array(chr).each do |field|
141
+ hash[tag_get(field)] = truncate(tag_cut(field))
142
+ end
143
+ @data['PLASMID'].push(hash)
144
+ end
145
+ end
146
+ @data['PLASMID']
147
+ end
148
+
149
+ # SCAFFOLD
150
+ def scaffolds
151
+ unless @data['SCAFFOLD']
152
+ @data['SCAFFOLD'] = []
153
+ toptag2array(get('SCAFFOLD')).each do |chr|
154
+ hash = Hash.new('')
155
+ subtag2array(chr).each do |field|
156
+ hash[tag_get(field)] = truncate(tag_cut(field))
157
+ end
158
+ @data['SCAFFOLD'].push(hash)
159
+ end
160
+ end
161
+ @data['SCAFFOLD']
162
+ end
163
+
164
+ # STATISTICS
165
+ def statistics
166
+ unless @data['STATISTICS']
167
+ hash = Hash.new(0.0)
168
+ get('STATISTICS').each_line do |line|
169
+ case line
170
+ when /nucleotides:\s+(\d+)/
171
+ hash['nalen'] = $1.to_i
172
+ when /protein genes:\s+(\d+)/
173
+ hash['num_gene'] = $1.to_i
174
+ when /RNA genes:\s+(\d+)/
175
+ hash['num_rna'] = $1.to_i
176
+ when /G\+C content:\s+(\d+.\d+)/
177
+ hash['gc'] = $1.to_f
178
+ end
179
+ end
180
+ @data['STATISTICS'] = hash
181
+ end
182
+ @data['STATISTICS']
183
+ end
184
+
185
+ def nalen
186
+ statistics['nalen']
187
+ end
188
+ alias length nalen
189
+
190
+ def num_gene
191
+ statistics['num_gene']
192
+ end
193
+
194
+ def num_rna
195
+ statistics['num_rna']
196
+ end
197
+
198
+ def gc
199
+ statistics['gc']
200
+ end
201
+
202
+ # GENOMEMAP
203
+ def genomemap
204
+ field_fetch('GENOMEMAP')
205
+ end
206
+
207
+ end
208
+
209
+ end
210
+
211
+ end
212
+
213
+
214
+
215
+ if __FILE__ == $0
216
+
217
+ begin
218
+ require 'pp'
219
+ def p(arg); pp(arg); end
220
+ rescue LoadError
221
+ end
222
+
223
+ require 'bio/io/flatfile'
224
+
225
+ ff = Bio::FlatFile.new(Bio::KEGG::GENOME, ARGF)
226
+
227
+ ff.each do |genome|
228
+
229
+ puts "### Tags"
230
+ p genome.tags
231
+
232
+ [
233
+ %w( ENTRY entry_id ),
234
+ %w( NAME name ),
235
+ %w( DEFINITION definition ),
236
+ %w( TAXONOMY taxonomy taxid lineage ),
237
+ %w( REFERENCE references ),
238
+ %w( CHROMOSOME chromosomes ),
239
+ %w( PLASMID plasmids ),
240
+ %w( SCAFFOLD plasmids ),
241
+ %w( STATISTICS statistics nalen num_gene num_rna gc ),
242
+ %w( GENOMEMAP genomemap ),
243
+ ].each do |x|
244
+ puts "### " + x.shift
245
+ x.each do |m|
246
+ p genome.send(m)
247
+ end
248
+ end
249
+
250
+ end
251
+
252
+ end
253
+
254
+
255
+ =begin
256
+
257
+ = Bio::KEGG::GENOME
258
+
259
+ === Initialize
260
+
261
+ --- Bio::KEGG::GENOME.new(entry)
262
+
263
+ === ENTRY
264
+
265
+ --- Bio::KEGG::GENOME#entry_id -> String
266
+
267
+ Returns contents of the ENTRY record as a String.
268
+
269
+ === NAME
270
+
271
+ --- Bio::KEGG::GENOME#name -> String
272
+
273
+ Returns contents of the NAME record as a String.
274
+
275
+ === DEFINITION
276
+
277
+ --- Bio::KEGG::GENOME#definition -> String
278
+
279
+ Returns contents of the DEFINITION record as a String.
280
+
281
+ --- Bio::KEGG::GENOME#organism -> String
282
+
283
+ Alias for the 'definition' method.
284
+
285
+ === TAXONOMY
286
+
287
+ --- Bio::KEGG::GENOME#taxonomy -> Hash
288
+
289
+ Returns contents of the TAXONOMY record as a Hash.
290
+
291
+ --- Bio::KEGG::GENOME#taxid -> String
292
+
293
+ Returns NCBI taxonomy ID from the TAXONOMY record as a String.
294
+
295
+ --- Bio::KEGG::GENOME#lineage -> String
296
+
297
+ Returns contents of the TAXONOMY/LINEAGE record as a String.
298
+
299
+ === COMMENT
300
+
301
+ --- Bio::KEGG::GENOME#comment -> String
302
+
303
+ Returns contents of the COMMENT record as a String.
304
+
305
+ === REFERENCE
306
+
307
+ --- Bio::GenBank#references -> Array
308
+
309
+ Returns contents of the REFERENCE records as an Array of Bio::Reference
310
+ objects.
311
+
312
+ === CHROMOSOME
313
+
314
+ --- Bio::KEGG::GENOME#chromosomes -> Array
315
+
316
+ Returns contents of the CHROMOSOME records as an Array of Hash.
317
+
318
+ === PLASMID
319
+
320
+ --- Bio::KEGG::GENOME#plasmids -> Array
321
+
322
+ Returns contents of the PLASMID records as an Array of Hash.
323
+
324
+ === SCAFFOLD
325
+
326
+ --- Bio::KEGG::GENOME#scaffolds -> Array
327
+
328
+ Returns contents of the SCAFFOLD records as an Array of Hash.
329
+
330
+ === STATISTICS
331
+
332
+ --- Bio::KEGG::GENOME#statistics -> Hash
333
+
334
+ Returns contents of the STATISTICS record as a Hash.
335
+
336
+ --- Bio::KEGG::GENOME#nalen -> Fixnum
337
+
338
+ Returns number of nucleotides from the STATISTICS record as a Fixnum.
339
+
340
+ --- Bio::KEGG::GENOME#num_gene -> Fixnum
341
+
342
+ Returns number of protein genes from the STATISTICS record as a Fixnum.
343
+
344
+ --- Bio::KEGG::GENOME#num_rna -> Fixnum
345
+
346
+ Returns number of rna from the STATISTICS record as a Fixnum.
347
+
348
+ --- Bio::KEGG::GENOME#gc -> Float
349
+
350
+ Returns G+C content from the STATISTICS record as a Float.
351
+
352
+ === GENOMEMAP
353
+
354
+ --- Bio::KEGG::GENOME#genomemap -> String
355
+
356
+ Returns contents of the GENOMEMAP record as a String.
357
+
358
+ == SEE ALSO
359
+
360
+ ftp://ftp.genome.jp/pub/kegg/genomes/genome
361
+
362
+ =end