bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,293 @@
1
+ #
2
+ # bio/db/kegg/genes.rb - KEGG/GENES database class
3
+ #
4
+ # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: genes.rb,v 0.22 2005/11/09 12:30:07 k Exp $
21
+ #
22
+
23
+ require 'bio/db'
24
+
25
+ module Bio
26
+
27
+ class KEGG
28
+
29
+ class GENES < KEGGDB
30
+
31
+ DELIMITER = RS = "\n///\n"
32
+ TAGSIZE = 12
33
+
34
+ def initialize(entry)
35
+ super(entry, TAGSIZE)
36
+ end
37
+
38
+
39
+ def entry
40
+ unless @data['ENTRY']
41
+ hash = Hash.new('')
42
+ if get('ENTRY').length > 30
43
+ e = get('ENTRY')
44
+ hash['id'] = e[12..29].strip
45
+ hash['division'] = e[30..39].strip
46
+ hash['organism'] = e[40..80].strip
47
+ end
48
+ @data['ENTRY'] = hash
49
+ end
50
+ @data['ENTRY']
51
+ end
52
+
53
+ def entry_id
54
+ entry['id']
55
+ end
56
+
57
+ def division
58
+ entry['division'] # CDS, tRNA etc.
59
+ end
60
+
61
+ def organism
62
+ entry['organism'] # H.sapiens etc.
63
+ end
64
+
65
+ def name
66
+ field_fetch('NAME')
67
+ end
68
+
69
+ def genes
70
+ name.split(', ')
71
+ end
72
+
73
+ def gene
74
+ genes.first
75
+ end
76
+
77
+ def definition
78
+ field_fetch('DEFINITION')
79
+ end
80
+
81
+ def eclinks
82
+ # definition.slice(/\[EC:(.*?)\]/, 1) # ruby >= 1.7
83
+ # definition.scan(/\[EC:(.*?)\]/).flatten
84
+ if /\[EC:(.*?)\]/.match(definition)
85
+ $1.split(/\s+/)
86
+ else
87
+ []
88
+ end
89
+ end
90
+
91
+ def splinks
92
+ # definition.slice(/\[SP:(.*?)\]/, 1) # ruby >= 1.7
93
+ # definition.scan(/\[SP:(.*?)\]/).flatten
94
+ if /\[SP:(.*?)\]/.match(definition)
95
+ $1.split(/\s+/)
96
+ else
97
+ []
98
+ end
99
+ end
100
+
101
+ def keggclass
102
+ field_fetch('CLASS')
103
+ end
104
+
105
+ def pathways
106
+ keggclass.scan(/\[PATH:(.*?)\]/).flatten
107
+ end
108
+
109
+ def position
110
+ unless @data['POSITION']
111
+ @data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
112
+ end
113
+ @data['POSITION']
114
+ end
115
+
116
+ def gbposition
117
+ position.sub(/.*?:/, '')
118
+ end
119
+
120
+ def chromosome
121
+ if position =~ /:/
122
+ position.sub(/:.*/, '')
123
+ else
124
+ nil
125
+ end
126
+ end
127
+
128
+ def dblinks
129
+ unless @data['DBLINKS']
130
+ hash = {}
131
+ get('DBLINKS').scan(/(\S+):\s*(.*)\n?/).each do |db, str|
132
+ id_array = str.strip.split(/\s+/)
133
+ hash[db] = id_array
134
+ end
135
+ @data['DBLINKS'] = hash
136
+ end
137
+ @data['DBLINKS'] # Hash of Array of DB IDs in DBLINKS
138
+ end
139
+
140
+ def codon_usage(codon = nil)
141
+ unless @data['CODON_USAGE']
142
+ ary = []
143
+ get('CODON_USAGE').sub(/.*/,'').each_line do |line| # cut 1st line
144
+ line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
145
+ ary.push(cu.to_i)
146
+ end
147
+ end
148
+ @data['CODON_USAGE'] = ary
149
+ end
150
+
151
+ if codon
152
+ h = { 't' => 0, 'c' => 1, 'a' => 2, 'g' => 3 }
153
+ x, y, z = codon.downcase.scan(/\w/)
154
+ codon_num = h[x] * 16 + h[y] * 4 + h[z]
155
+ @data['CODON_USAGE'][codon_num] # CODON_USAGE of the codon
156
+ else
157
+ return @data['CODON_USAGE'] # Array of CODON_USAGE (default)
158
+ end
159
+ end
160
+
161
+ def cu
162
+ hash = Hash.new
163
+ list = codon_usage
164
+ base = %w(t c a g)
165
+ base.each_with_index do |x, i|
166
+ base.each_with_index do |y, j|
167
+ base.each_with_index do |z, k|
168
+ hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k]
169
+ end
170
+ end
171
+ end
172
+ return hash
173
+ end
174
+
175
+ def aaseq
176
+ unless @data['AASEQ']
177
+ @data['AASEQ'] = Sequence::AA.new(fetch('AASEQ').gsub(/[\s\d\/]+/, ''))
178
+ end
179
+ @data['AASEQ']
180
+ end
181
+
182
+ def aalen
183
+ @data['AALEN'] = aaseq.length
184
+ end
185
+
186
+ def ntseq
187
+ unless @data['NTSEQ']
188
+ @data['NTSEQ'] = Sequence::NA.new(fetch('NTSEQ').gsub(/[\s\d\/]+/, ''))
189
+ end
190
+ @data['NTSEQ']
191
+ end
192
+ alias naseq ntseq
193
+
194
+ def ntlen
195
+ @data['NTLEN'] = ntseq.length
196
+ end
197
+ alias nalen ntlen
198
+
199
+ end
200
+
201
+ end
202
+
203
+ end
204
+
205
+
206
+
207
+ if __FILE__ == $0
208
+
209
+ require 'bio/io/fetch'
210
+
211
+ e = Bio::Fetch.query('genes', 'b0002')
212
+ g = Bio::KEGG::GENES.new(e)
213
+
214
+ p g.entry
215
+ p g.entry_id
216
+ p g.division
217
+ p g.name
218
+ p g.gene
219
+ p g.definition
220
+ p g.keggclass
221
+ p g.position
222
+ p g.dblinks
223
+ p g.codon_usage
224
+ p g.cu
225
+ p g.aaseq
226
+ p g.aalen
227
+ p g.naseq
228
+ p g.nalen
229
+ p g.eclinks
230
+ p g.splinks
231
+ p g.pathways
232
+
233
+ end
234
+
235
+
236
+ =begin
237
+
238
+ = Bio::KEGG::GENES
239
+
240
+ === Initialize
241
+
242
+ --- Bio::KEGG::GENES.new
243
+
244
+ === ENTRY
245
+
246
+ --- Bio::KEGG::GENES#entry -> Hash
247
+ --- Bio::KEGG::GENES#entry_id -> String
248
+ --- Bio::KEGG::GENES#division -> String
249
+ --- Bio::KEGG::GENES#organism -> String
250
+
251
+ === NAME
252
+
253
+ --- Bio::KEGG::GENES#name -> String
254
+ --- Bio::KEGG::GENES#genes -> Array
255
+ --- Bio::KEGG::GENES#gene -> String
256
+
257
+ === DEFINITION
258
+
259
+ --- Bio::KEGG::GENES#definition -> String
260
+ --- Bio::KEGG::GENES#eclinks -> Array
261
+ --- Bio::KEGG::GENES#splinks -> Array
262
+
263
+ === CLASS
264
+
265
+ --- Bio::KEGG::GENES#keggclass -> String
266
+ --- Bio::KEGG::GENES#pathways -> Array
267
+
268
+ === POSITION
269
+
270
+ --- Bio::KEGG::GENES#position -> String
271
+
272
+ === DBLINKS
273
+
274
+ --- Bio::KEGG::GENES#dblinks -> Hash
275
+
276
+ === CODON_USAGE
277
+
278
+ --- Bio::KEGG::GENES#codon_usage(codon = nil) -> Array or Fixnum
279
+ --- Bio::KEGG::GENES#cu -> Hash
280
+
281
+ === AASEQ
282
+
283
+ --- Bio::KEGG::GENES#aaseq -> Bio::Sequence::AA
284
+ --- Bio::KEGG::GENES#aalen -> Fixnum
285
+
286
+ === NTSEQ
287
+
288
+ --- Bio::KEGG::GENES#ntseq -> Bio::Sequence::NA
289
+ --- Bio::KEGG::GENES#naseq -> Bio::Sequence::NA
290
+ --- Bio::KEGG::GENES#ntlen -> Fixnum
291
+ --- Bio::KEGG::GENES#nalen -> Fixnum
292
+
293
+ =end
@@ -0,0 +1,362 @@
1
+ #
2
+ # bio/db/kegg/genome.rb - KEGG/GENOME database class
3
+ #
4
+ # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: genome.rb,v 0.14 2005/09/08 01:22:11 k Exp $
21
+ #
22
+
23
+ require 'bio/db'
24
+
25
+ module Bio
26
+
27
+ class KEGG
28
+
29
+ class GENOME < KEGGDB
30
+
31
+ DELIMITER = RS = "\n///\n"
32
+ TAGSIZE = 12
33
+
34
+ def initialize(entry)
35
+ super(entry, TAGSIZE)
36
+ end
37
+
38
+
39
+ # ENTRY
40
+ def entry_id
41
+ field_fetch('ENTRY')
42
+ end
43
+
44
+ # NAME
45
+ def name
46
+ field_fetch('NAME')
47
+ end
48
+
49
+ # DEFINITION
50
+ def definition
51
+ field_fetch('DEFINITION')
52
+ end
53
+ alias organism definition
54
+
55
+ # TAXONOMY
56
+ def taxonomy
57
+ unless @data['TAXONOMY']
58
+ taxid, lineage = subtag2array(get('TAXONOMY'))
59
+ taxid = taxid ? truncate(tag_cut(taxid)) : ''
60
+ lineage = lineage ? truncate(tag_cut(lineage)) : ''
61
+ @data['TAXONOMY'] = {
62
+ 'taxid' => taxid,
63
+ 'lineage' => lineage,
64
+ }
65
+ @data['TAXONOMY'].default = ''
66
+ end
67
+ @data['TAXONOMY']
68
+ end
69
+
70
+ def taxid
71
+ taxonomy['taxid']
72
+ end
73
+
74
+ def lineage
75
+ taxonomy['lineage']
76
+ end
77
+
78
+ # COMMENT
79
+ def comment
80
+ field_fetch('COMMENT')
81
+ end
82
+
83
+ # REFERENCE
84
+ def references
85
+ unless @data['REFERENCE']
86
+ ary = []
87
+ toptag2array(get('REFERENCE')).each do |ref|
88
+ hash = Hash.new('')
89
+ subtag2array(ref).each do |field|
90
+ case tag_get(field)
91
+ when /AUTHORS/
92
+ authors = truncate(tag_cut(field))
93
+ authors = authors.split(', ')
94
+ authors[-1] = authors[-1].split(/\s+and\s+/)
95
+ authors = authors.flatten.map { |a| a.sub(',', ', ') }
96
+ hash['authors'] = authors
97
+ when /TITLE/
98
+ hash['title'] = truncate(tag_cut(field))
99
+ when /JOURNAL/
100
+ journal = truncate(tag_cut(field))
101
+ if journal =~ /(.*) (\d+):(\d+)-(\d+) \((\d+)\) \[UI:(\d+)\]$/
102
+ hash['journal'] = $1
103
+ hash['volume'] = $2
104
+ hash['pages'] = $3
105
+ hash['year'] = $5
106
+ hash['medline'] = $6
107
+ else
108
+ hash['journal'] = journal
109
+ end
110
+ end
111
+ end
112
+ ary.push(Reference.new(hash))
113
+ end
114
+ @data['REFERENCE'] = References.new(ary)
115
+ end
116
+ @data['REFERENCE']
117
+ end
118
+
119
+ # CHROMOSOME
120
+ def chromosomes
121
+ unless @data['CHROMOSOME']
122
+ @data['CHROMOSOME'] = []
123
+ toptag2array(get('CHROMOSOME')).each do |chr|
124
+ hash = Hash.new('')
125
+ subtag2array(chr).each do |field|
126
+ hash[tag_get(field)] = truncate(tag_cut(field))
127
+ end
128
+ @data['CHROMOSOME'].push(hash)
129
+ end
130
+ end
131
+ @data['CHROMOSOME']
132
+ end
133
+
134
+ # PLASMID
135
+ def plasmids
136
+ unless @data['PLASMID']
137
+ @data['PLASMID'] = []
138
+ toptag2array(get('PLASMID')).each do |chr|
139
+ hash = Hash.new('')
140
+ subtag2array(chr).each do |field|
141
+ hash[tag_get(field)] = truncate(tag_cut(field))
142
+ end
143
+ @data['PLASMID'].push(hash)
144
+ end
145
+ end
146
+ @data['PLASMID']
147
+ end
148
+
149
+ # SCAFFOLD
150
+ def scaffolds
151
+ unless @data['SCAFFOLD']
152
+ @data['SCAFFOLD'] = []
153
+ toptag2array(get('SCAFFOLD')).each do |chr|
154
+ hash = Hash.new('')
155
+ subtag2array(chr).each do |field|
156
+ hash[tag_get(field)] = truncate(tag_cut(field))
157
+ end
158
+ @data['SCAFFOLD'].push(hash)
159
+ end
160
+ end
161
+ @data['SCAFFOLD']
162
+ end
163
+
164
+ # STATISTICS
165
+ def statistics
166
+ unless @data['STATISTICS']
167
+ hash = Hash.new(0.0)
168
+ get('STATISTICS').each_line do |line|
169
+ case line
170
+ when /nucleotides:\s+(\d+)/
171
+ hash['nalen'] = $1.to_i
172
+ when /protein genes:\s+(\d+)/
173
+ hash['num_gene'] = $1.to_i
174
+ when /RNA genes:\s+(\d+)/
175
+ hash['num_rna'] = $1.to_i
176
+ when /G\+C content:\s+(\d+.\d+)/
177
+ hash['gc'] = $1.to_f
178
+ end
179
+ end
180
+ @data['STATISTICS'] = hash
181
+ end
182
+ @data['STATISTICS']
183
+ end
184
+
185
+ def nalen
186
+ statistics['nalen']
187
+ end
188
+ alias length nalen
189
+
190
+ def num_gene
191
+ statistics['num_gene']
192
+ end
193
+
194
+ def num_rna
195
+ statistics['num_rna']
196
+ end
197
+
198
+ def gc
199
+ statistics['gc']
200
+ end
201
+
202
+ # GENOMEMAP
203
+ def genomemap
204
+ field_fetch('GENOMEMAP')
205
+ end
206
+
207
+ end
208
+
209
+ end
210
+
211
+ end
212
+
213
+
214
+
215
+ if __FILE__ == $0
216
+
217
+ begin
218
+ require 'pp'
219
+ def p(arg); pp(arg); end
220
+ rescue LoadError
221
+ end
222
+
223
+ require 'bio/io/flatfile'
224
+
225
+ ff = Bio::FlatFile.new(Bio::KEGG::GENOME, ARGF)
226
+
227
+ ff.each do |genome|
228
+
229
+ puts "### Tags"
230
+ p genome.tags
231
+
232
+ [
233
+ %w( ENTRY entry_id ),
234
+ %w( NAME name ),
235
+ %w( DEFINITION definition ),
236
+ %w( TAXONOMY taxonomy taxid lineage ),
237
+ %w( REFERENCE references ),
238
+ %w( CHROMOSOME chromosomes ),
239
+ %w( PLASMID plasmids ),
240
+ %w( SCAFFOLD plasmids ),
241
+ %w( STATISTICS statistics nalen num_gene num_rna gc ),
242
+ %w( GENOMEMAP genomemap ),
243
+ ].each do |x|
244
+ puts "### " + x.shift
245
+ x.each do |m|
246
+ p genome.send(m)
247
+ end
248
+ end
249
+
250
+ end
251
+
252
+ end
253
+
254
+
255
+ =begin
256
+
257
+ = Bio::KEGG::GENOME
258
+
259
+ === Initialize
260
+
261
+ --- Bio::KEGG::GENOME.new(entry)
262
+
263
+ === ENTRY
264
+
265
+ --- Bio::KEGG::GENOME#entry_id -> String
266
+
267
+ Returns contents of the ENTRY record as a String.
268
+
269
+ === NAME
270
+
271
+ --- Bio::KEGG::GENOME#name -> String
272
+
273
+ Returns contents of the NAME record as a String.
274
+
275
+ === DEFINITION
276
+
277
+ --- Bio::KEGG::GENOME#definition -> String
278
+
279
+ Returns contents of the DEFINITION record as a String.
280
+
281
+ --- Bio::KEGG::GENOME#organism -> String
282
+
283
+ Alias for the 'definition' method.
284
+
285
+ === TAXONOMY
286
+
287
+ --- Bio::KEGG::GENOME#taxonomy -> Hash
288
+
289
+ Returns contents of the TAXONOMY record as a Hash.
290
+
291
+ --- Bio::KEGG::GENOME#taxid -> String
292
+
293
+ Returns NCBI taxonomy ID from the TAXONOMY record as a String.
294
+
295
+ --- Bio::KEGG::GENOME#lineage -> String
296
+
297
+ Returns contents of the TAXONOMY/LINEAGE record as a String.
298
+
299
+ === COMMENT
300
+
301
+ --- Bio::KEGG::GENOME#comment -> String
302
+
303
+ Returns contents of the COMMENT record as a String.
304
+
305
+ === REFERENCE
306
+
307
+ --- Bio::GenBank#references -> Array
308
+
309
+ Returns contents of the REFERENCE records as an Array of Bio::Reference
310
+ objects.
311
+
312
+ === CHROMOSOME
313
+
314
+ --- Bio::KEGG::GENOME#chromosomes -> Array
315
+
316
+ Returns contents of the CHROMOSOME records as an Array of Hash.
317
+
318
+ === PLASMID
319
+
320
+ --- Bio::KEGG::GENOME#plasmids -> Array
321
+
322
+ Returns contents of the PLASMID records as an Array of Hash.
323
+
324
+ === SCAFFOLD
325
+
326
+ --- Bio::KEGG::GENOME#scaffolds -> Array
327
+
328
+ Returns contents of the SCAFFOLD records as an Array of Hash.
329
+
330
+ === STATISTICS
331
+
332
+ --- Bio::KEGG::GENOME#statistics -> Hash
333
+
334
+ Returns contents of the STATISTICS record as a Hash.
335
+
336
+ --- Bio::KEGG::GENOME#nalen -> Fixnum
337
+
338
+ Returns number of nucleotides from the STATISTICS record as a Fixnum.
339
+
340
+ --- Bio::KEGG::GENOME#num_gene -> Fixnum
341
+
342
+ Returns number of protein genes from the STATISTICS record as a Fixnum.
343
+
344
+ --- Bio::KEGG::GENOME#num_rna -> Fixnum
345
+
346
+ Returns number of rna from the STATISTICS record as a Fixnum.
347
+
348
+ --- Bio::KEGG::GENOME#gc -> Float
349
+
350
+ Returns G+C content from the STATISTICS record as a Float.
351
+
352
+ === GENOMEMAP
353
+
354
+ --- Bio::KEGG::GENOME#genomemap -> String
355
+
356
+ Returns contents of the GENOMEMAP record as a String.
357
+
358
+ == SEE ALSO
359
+
360
+ ftp://ftp.genome.jp/pub/kegg/genomes/genome
361
+
362
+ =end