bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,280 @@
1
+ #
2
+ # = bio/db/aaindex.rb - AAindex database class
3
+ #
4
+ # Copyright:: Copyright (C) 2001 KAWASHIMA Shuichi <s@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: aaindex.rb,v 1.16 2005/12/18 15:58:41 k Exp $
8
+ #
9
+ # == Description
10
+ # Classes for Amino Acid Index Database (AAindex and AAindex2).
11
+ # * AAindex Manual: http://www.genome.jp/dbget-bin/show_man?aaindex
12
+ #
13
+ # == Examples
14
+ # aax1 = Bio::AAindex1.new("PRAM900102.aaindex1")
15
+ # aax1.entry_id
16
+ # aax1.index
17
+ #
18
+ # aax2 = Bio::AAindex2.new("HENS920102.aaindex2")
19
+ # aax2.entry_id
20
+ # aax2.matrix
21
+ # aax2.matrix[2,2]
22
+ #
23
+ # == References
24
+ # * http://www.genome.jp/aaindex/
25
+ #
26
+ #--
27
+ #
28
+ # This library is free software; you can redistribute it and/or
29
+ # modify it under the terms of the GNU Lesser General Public
30
+ # License as published by the Free Software Foundation; either
31
+ # version 2 of the License, or (at your option) any later version.
32
+ #
33
+ # This library is distributed in the hope that it will be useful,
34
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
35
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
36
+ # Lesser General Public License for more details.
37
+ #
38
+ # You should have received a copy of the GNU Lesser General Public
39
+ # License along with this library; if not, write to the Free Software
40
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
41
+ #
42
+ #++
43
+ #
44
+
45
+ require "bio/db"
46
+ require "matrix"
47
+
48
+ module Bio
49
+
50
+ class AAindex < KEGGDB
51
+
52
+ # Delimiter
53
+ DELIMITER ="\n//\n"
54
+
55
+ # Delimiter
56
+ RS = DELIMITER
57
+
58
+ # Bio::DB API
59
+ TAGSIZE = 2
60
+
61
+
62
+ def initialize(entry)
63
+ super(entry, TAGSIZE)
64
+ end
65
+
66
+ # Returns
67
+ def entry_id
68
+ field_fetch('H')
69
+ end
70
+
71
+ # Returns
72
+ def definition
73
+ field_fetch('D')
74
+ end
75
+
76
+ # Returns
77
+ def dblinks
78
+ field_fetch('R')
79
+ end
80
+
81
+ # Returns
82
+ def author
83
+ field_fetch('A')
84
+ end
85
+
86
+ # Returns
87
+ def title
88
+ field_fetch('T')
89
+ end
90
+
91
+ # Returns
92
+ def journal
93
+ field_fetch('J')
94
+ end
95
+
96
+ # Returns
97
+ def comment
98
+ get('*')
99
+ end
100
+
101
+ end
102
+
103
+
104
+ class AAindex1 < AAindex
105
+
106
+
107
+ def initialize(entry)
108
+ super(entry)
109
+ end
110
+
111
+ # Returns
112
+ def correlation_coefficient
113
+ field_fetch('C')
114
+ end
115
+
116
+ # Returns
117
+ def index(type = :float)
118
+ aa = %w( A R N D C Q E G H I L K M F P S T W Y V )
119
+ values = field_fetch('I', 1).split(' ')
120
+
121
+ if values.size != 20
122
+ raise "Invalid format in #{entry_id} : #{values.inspect}"
123
+ end
124
+
125
+ if type == :zscore and values.size > 0
126
+ sum = 0.0
127
+ values.each do |a|
128
+ sum += a.to_f
129
+ end
130
+ mean = sum / values.size # / 20
131
+ var = 0.0
132
+ values.each do |a|
133
+ var += (a.to_f - mean) ** 2
134
+ end
135
+ sd = Math.sqrt(var)
136
+ end
137
+
138
+ if type == :integer
139
+ figure = 0
140
+ values.each do |a|
141
+ figure = [ figure, a[/\..*/].length - 1 ].max
142
+ end
143
+ end
144
+
145
+ hash = {}
146
+
147
+ aa.each_with_index do |a, i|
148
+ case type
149
+ when :string
150
+ hash[a] = values[i]
151
+ when :float
152
+ hash[a] = values[i].to_f
153
+ when :zscore
154
+ hash[a] = (values[i].to_f - mean) / sd
155
+ when :integer
156
+ hash[a] = (values[i].to_f * 10 ** figure).to_i
157
+ end
158
+ end
159
+ return hash
160
+ end
161
+
162
+ end
163
+
164
+
165
+ class AAindex2 < AAindex
166
+
167
+
168
+ def initialize(entry)
169
+ super(entry)
170
+ end
171
+
172
+ # Returns
173
+ def rows
174
+ label_data
175
+ @rows
176
+ end
177
+
178
+ # Returns
179
+ def cols
180
+ label_data
181
+ @cols
182
+ end
183
+
184
+ # Returns
185
+ def matrix
186
+ ma = Array.new
187
+
188
+ data = label_data
189
+ data.each_line do |line|
190
+ list = line.strip.split(/\s+/).map{|x| x.to_f}
191
+ ma.push(list)
192
+ end
193
+
194
+ Matrix[*ma]
195
+ end
196
+
197
+ # Returns
198
+ def old_matrix # for AAindex <= ver 5.0
199
+
200
+ @aa = {} # used to determine row/column of the aa
201
+ attr_reader :aa
202
+
203
+ field = field_fetch('I')
204
+
205
+ case field
206
+ when / (ARNDCQEGHILKMFPSTWYV)\s+(.*)/ # 20x19/2 matrix
207
+ aalist = $1
208
+ values = $2.split(/\s+/)
209
+
210
+ 0.upto(aalist.length - 1) do |i|
211
+ @aa[aalist[i].chr] = i
212
+ end
213
+
214
+ ma = Array.new
215
+ 20.times do
216
+ ma.push(Array.new(20)) # 2D array of 20x(20)
217
+ end
218
+
219
+ for i in 0 .. 19 do
220
+ for j in i .. 19 do
221
+ ma[i][j] = values[i + j*(j+1)/2].to_f
222
+ ma[j][i] = ma[i][j]
223
+ end
224
+ end
225
+ Matrix[*ma]
226
+
227
+ when / -ARNDCQEGHILKMFPSTWYV / # 21x20/2 matrix (with gap)
228
+ raise NotImplementedError
229
+ when / ACDEFGHIKLMNPQRSTVWYJ- / # 21x21 matrix (with gap)
230
+ raise NotImplementedError
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ def label_data
237
+ label, data = get('M').split("\n", 2)
238
+ if /M rows = (\S+), cols = (\S+)/.match(label)
239
+ rows, cols = $1, $2
240
+ @rows = rows.split('')
241
+ @cols = cols.split('')
242
+ end
243
+ return data
244
+ end
245
+
246
+ end
247
+
248
+ end
249
+
250
+
251
+ if __FILE__ == $0
252
+ require 'bio/io/fetch'
253
+
254
+ puts "### AAindex1 (PRAM900102)"
255
+ aax1 = Bio::AAindex1.new(Bio::Fetch.query('aaindex', 'PRAM900102', 'raw'))
256
+ p aax1.entry_id
257
+ p aax1.definition
258
+ p aax1.dblinks
259
+ p aax1.author
260
+ p aax1.title
261
+ p aax1.journal
262
+ p aax1.correlation_coefficient
263
+ p aax1.index
264
+ puts "### AAindex2 (HENS920102)"
265
+ aax2 = Bio::AAindex2.new(Bio::Fetch.query('aaindex', 'HENS920102', 'raw'))
266
+ p aax2.entry_id
267
+ p aax2.definition
268
+ p aax2.dblinks
269
+ p aax2.author
270
+ p aax2.title
271
+ p aax2.journal
272
+ p aax2.rows
273
+ p aax2.cols
274
+ p aax2.matrix
275
+ p aax2.matrix[2,2]
276
+ p aax2.matrix.determinant
277
+ p aax2.matrix.rank
278
+ p aax2.matrix.transpose
279
+ end
280
+
@@ -0,0 +1,332 @@
1
+ #
2
+ # = bio/db/embl.rb - Common methods for EMBL style database classes
3
+ #
4
+ # Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: common.rb,v 1.8 2005/11/02 07:30:14 nakao Exp $
8
+ #
9
+ # == EMBL style databases class
10
+ #
11
+ # This module defines a common framework among EMBL, SWISS-PROT, TrEMBL.
12
+ # For more details, see the documentations in each embl/*.rb libraries.
13
+ #
14
+ # EMBL style format:
15
+ # ID - identification (begins each entry; 1 per entry)
16
+ # AC - accession number (>=1 per entry)
17
+ # SV - sequence version (1 per entry)
18
+ # DT - date (2 per entry)
19
+ # DE - description (>=1 per entry)
20
+ # KW - keyword (>=1 per entry)
21
+ # OS - organism species (>=1 per entry)
22
+ # OC - organism classification (>=1 per entry)
23
+ # OG - organelle (0 or 1 per entry)
24
+ # RN - reference number (>=1 per entry)
25
+ # RC - reference comment (>=0 per entry)
26
+ # RP - reference positions (>=1 per entry)
27
+ # RX - reference cross-reference (>=0 per entry)
28
+ # RA - reference author(s) (>=1 per entry)
29
+ # RG - reference group (>=0 per entry)
30
+ # RT - reference title (>=1 per entry)
31
+ # RL - reference location (>=1 per entry)
32
+ # DR - database cross-reference (>=0 per entry)
33
+ # FH - feature table header (0 or 2 per entry)
34
+ # FT - feature table data (>=0 per entry)
35
+ # CC - comments or notes (>=0 per entry)
36
+ # XX - spacer line (many per entry)
37
+ # SQ - sequence header (1 per entry)
38
+ # bb - (blanks) sequence data (>=1 per entry)
39
+ # // - termination line (ends each entry; 1 per entry)
40
+ #
41
+ #
42
+ # == Example
43
+ #
44
+ # require 'bio/db/embl/common'
45
+ # module Bio
46
+ # class NEWDB < EMBLDB
47
+ # include Bio::EMBLDB::Common
48
+ # end
49
+ # end
50
+ #
51
+ #--
52
+ #
53
+ # This library is free software; you can redistribute it and/or
54
+ # modify it under the terms of the GNU Lesser General Public
55
+ # License as published by the Free Software Foundation; either
56
+ # version 2 of the License, or (at your option) any later version.
57
+ #
58
+ # This library is distributed in the hope that it will be useful,
59
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
60
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
61
+ # Lesser General Public License for more details.
62
+ #
63
+ # You should have received a copy of the GNU Lesser General Public
64
+ # License along with this library; if not, write to the Free Software
65
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
66
+ #
67
+ #++
68
+ #
69
+
70
+ require 'bio/db'
71
+ require 'bio/reference'
72
+
73
+ module Bio
74
+ class EMBLDB
75
+ module Common
76
+
77
+ DELIMITER = "\n//\n"
78
+ RS = DELIMITER
79
+ TAGSIZE = 5
80
+
81
+ def initialize(entry)
82
+ super(entry, TAGSIZE)
83
+ end
84
+
85
+ # returns a Array of accession numbers in the AC lines.
86
+ #
87
+ # AC Line
88
+ # "AC A12345; B23456;"
89
+ # AC [AC1;]+
90
+ #
91
+ # Accession numbers format:
92
+ # 1 2 3 4 5 6
93
+ # [O,P,Q] [0-9] [A-Z, 0-9] [A-Z, 0-9] [A-Z, 0-9] [0-9]
94
+ def ac
95
+ unless @data['AC']
96
+ tmp = Array.new
97
+ field_fetch('AC').split(/ /).each do |e|
98
+ tmp.push(e.sub(/;/,''))
99
+ end
100
+ @data['AC'] = tmp
101
+ end
102
+ @data['AC']
103
+ end
104
+ alias accessions ac
105
+
106
+
107
+ # returns the first accession number in the AC lines
108
+ def accession
109
+ ac[0]
110
+ end
111
+
112
+
113
+ # returns a String int the DE line.
114
+ #
115
+ # DE Line
116
+ def de
117
+ unless @data['DE']
118
+ @data['DE'] = fetch('DE')
119
+ end
120
+ @data['DE']
121
+ end
122
+ alias description de
123
+ alias definition de # API
124
+
125
+
126
+
127
+ # returns contents in the OS line.
128
+ # * Bio::EMBLDB#os -> Array of <OS Hash>
129
+ # where <OS Hash> is:
130
+ # [{'name'=>'Human', 'os'=>'Homo sapiens'},
131
+ # {'name'=>'Rat', 'os'=>'Rattus norveticus'}]
132
+ # * Bio::SPTR#os[0]['name'] => "Human"
133
+ # * Bio::SPTR#os[0] => {'name'=>"Human", 'os'=>'Homo sapiens'}
134
+ # * Bio::STPR#os(0) => "Homo sapiens (Human)"
135
+ #
136
+ # OS Line; organism species (>=1)
137
+ # "OS Trifolium repens (white clover)"
138
+ #
139
+ # OS Genus species (name).
140
+ # OS Genus species (name0) (name1).
141
+ # OS Genus species (name0) (name1).
142
+ # OS Genus species (name0), G s0 (name0), and G s (name1).
143
+ def os(num = nil)
144
+ unless @data['OS']
145
+ os = Array.new
146
+ fetch('OS').split(/, and|, /).each do |tmp|
147
+ if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/
148
+ org = $1
149
+ tmp =~ /(\(.+\))/
150
+ os.push({'name' => $1, 'os' => org})
151
+ else
152
+ raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
153
+ end
154
+ end
155
+ @data['OS'] = os
156
+ end
157
+ if num
158
+ # EX. "Trifolium repens (white clover)"
159
+ "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
160
+ end
161
+ @data['OS']
162
+ end
163
+
164
+
165
+ # returns contents in the OG line.
166
+ # * Bio::EMBLDB::Common#og -> [ <ogranella String>* ]
167
+ #
168
+ # OG Line; organella (0 or 1/entry)
169
+ # OG Plastid; Chloroplast.
170
+ # OG Mitochondrion.
171
+ # OG Plasmid sym pNGR234a.
172
+ # OG Plastid; Cyanelle.
173
+ # OG Plasmid pSymA (megaplasmid 1).
174
+ # OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1.
175
+ def og
176
+ unless @data['OG']
177
+ og = Array.new
178
+ if get('OG').size > 0
179
+ ogstr = fetch('OG')
180
+ ogstr.sub!(/\.$/,'')
181
+ ogstr.sub!(/ and/,'')
182
+ ogstr.sub!(/;/, ',')
183
+ ogstr.split(',').each do |tmp|
184
+ og.push(tmp.strip)
185
+ end
186
+ end
187
+ @data['OG'] = og
188
+ end
189
+ @data['OG']
190
+ end
191
+
192
+
193
+ # returns contents in the OC line.
194
+ # * Bio::EMBLDB::Common#oc -> [ <organism class String>* ]
195
+ # OC Line; organism classification (>=1)
196
+ # OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae;
197
+ # OC Theileria.
198
+ def oc
199
+ unless @data['OC']
200
+ begin
201
+ @data['OC'] = fetch('OC').sub(/.$/,'').split(/;/).map {|e|
202
+ e.strip
203
+ }
204
+ rescue NameError
205
+ nil
206
+ end
207
+ end
208
+ @data['OC']
209
+ end
210
+
211
+ # returns keywords in the KW line.
212
+ # * Bio::EMBLDB::Common#kw -> [ <keyword>* ]
213
+ # KW Line; keyword (>=1)
214
+ # KW [Keyword;]+
215
+ def kw
216
+ unless @data['KW']
217
+ if get('KW').size > 0
218
+ tmp = fetch('KW').sub(/.$/,'')
219
+ @data['KW'] = tmp.split(/;/).map {|e| e.strip }
220
+ else
221
+ @data['KW'] = []
222
+ end
223
+ end
224
+ @data['KW']
225
+ end
226
+ alias keywords kw
227
+
228
+
229
+ # returns contents in the R lines.
230
+ # * Bio::EMBLDB::Common#ref -> [ <refernece information Hash>* ]
231
+ # where <reference information Hash> is:
232
+ # {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
233
+ # 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
234
+ #
235
+ # R Lines
236
+ # * RN RC RP RX RA RT RL RG
237
+ def ref
238
+ unless @data['R']
239
+ ary = Array.new
240
+ get('R').split(/\nRN /).each do |str|
241
+ raw = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
242
+ 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
243
+ str = 'RN ' + str unless /^RN / =~ str
244
+ str.split("\n").each do |line|
245
+ if /^(R[NPXARLCTG]) (.+)/ =~ line
246
+ raw[$1] += $2 + ' '
247
+ else
248
+ raise "Invalid format in R lines, \n[#{line}]\n"
249
+ end
250
+ end
251
+ raw.each_value {|v|
252
+ v.strip!
253
+ v.sub!(/^"/,'')
254
+ v.sub!(/;$/,'')
255
+ v.sub!(/"$/,'')
256
+ }
257
+ ary.push(raw)
258
+ end
259
+ @data['R'] = ary
260
+ end
261
+ @data['R']
262
+ end
263
+
264
+ # returns Bio::Reference object from Bio::EMBLDB::Common#ref.
265
+ # * Bio::EMBLDB::Common#ref -> Bio::References
266
+ def references
267
+ unless @data['references']
268
+ ary = self.ref.map {|ent|
269
+ hash = Hash.new('')
270
+ ent.each {|key, value|
271
+ case key
272
+ when 'RA'
273
+ hash['authors'] = value.split(/, /)
274
+ when 'RT'
275
+ hash['title'] = value
276
+ when 'RL'
277
+ if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
278
+ hash['journal'] = $1
279
+ hash['volume'] = $2
280
+ hash['issue'] = $3
281
+ hash['pages'] = $4
282
+ hash['year'] = $5
283
+ else
284
+ hash['journal'] = value
285
+ end
286
+ when 'RX' # PUBMED, MEDLINE
287
+ value.split('.').each {|item|
288
+ tag, xref = item.split(/; /).map {|i| i.strip }
289
+ hash[ tag.downcase ] = xref
290
+ }
291
+ end
292
+ }
293
+ Reference.new(hash)
294
+ }
295
+ @data['references'] = References.new(ary)
296
+ end
297
+ @data['references']
298
+ end
299
+
300
+
301
+ # returns contents in the DR line.
302
+ # * Bio::EMBLDB::Common#dr -> [ <Database cross-reference Hash>* ]
303
+ # where <Database cross-reference Hash> is:
304
+ # * Bio::EMBLDB::Common#dr {|k,v| }
305
+ #
306
+ # DR Line; defabases cross-reference (>=0)
307
+ # a cross_ref pre one line
308
+ # "DR database_identifier; primary_identifier; secondary_identifier."
309
+ def dr
310
+ unless @data['DR']
311
+ tmp = Hash.new
312
+ self.get('DR').split(/\n/).each do |db|
313
+ a = db.sub(/^DR /,'').sub(/.$/,'').strip.split(/;[ ]/)
314
+ dbname = a.shift
315
+ tmp[dbname] = Array.new unless tmp[dbname]
316
+ tmp[dbname].push(a)
317
+ end
318
+ @data['DR'] = tmp
319
+ end
320
+ if block_given?
321
+ @data['DR'].each do |k,v|
322
+ yield(k, v)
323
+ end
324
+ else
325
+ @data['DR']
326
+ end
327
+ end
328
+
329
+ end # module Common
330
+ end # class EMBLDB
331
+ end # module Bio
332
+