bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,280 @@
1
+ #
2
+ # = bio/db/aaindex.rb - AAindex database class
3
+ #
4
+ # Copyright:: Copyright (C) 2001 KAWASHIMA Shuichi <s@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: aaindex.rb,v 1.16 2005/12/18 15:58:41 k Exp $
8
+ #
9
+ # == Description
10
+ # Classes for Amino Acid Index Database (AAindex and AAindex2).
11
+ # * AAindex Manual: http://www.genome.jp/dbget-bin/show_man?aaindex
12
+ #
13
+ # == Examples
14
+ # aax1 = Bio::AAindex1.new("PRAM900102.aaindex1")
15
+ # aax1.entry_id
16
+ # aax1.index
17
+ #
18
+ # aax2 = Bio::AAindex2.new("HENS920102.aaindex2")
19
+ # aax2.entry_id
20
+ # aax2.matrix
21
+ # aax2.matrix[2,2]
22
+ #
23
+ # == References
24
+ # * http://www.genome.jp/aaindex/
25
+ #
26
+ #--
27
+ #
28
+ # This library is free software; you can redistribute it and/or
29
+ # modify it under the terms of the GNU Lesser General Public
30
+ # License as published by the Free Software Foundation; either
31
+ # version 2 of the License, or (at your option) any later version.
32
+ #
33
+ # This library is distributed in the hope that it will be useful,
34
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
35
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
36
+ # Lesser General Public License for more details.
37
+ #
38
+ # You should have received a copy of the GNU Lesser General Public
39
+ # License along with this library; if not, write to the Free Software
40
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
41
+ #
42
+ #++
43
+ #
44
+
45
+ require "bio/db"
46
+ require "matrix"
47
+
48
+ module Bio
49
+
50
+ class AAindex < KEGGDB
51
+
52
+ # Delimiter
53
+ DELIMITER ="\n//\n"
54
+
55
+ # Delimiter
56
+ RS = DELIMITER
57
+
58
+ # Bio::DB API
59
+ TAGSIZE = 2
60
+
61
+
62
+ def initialize(entry)
63
+ super(entry, TAGSIZE)
64
+ end
65
+
66
+ # Returns
67
+ def entry_id
68
+ field_fetch('H')
69
+ end
70
+
71
+ # Returns
72
+ def definition
73
+ field_fetch('D')
74
+ end
75
+
76
+ # Returns
77
+ def dblinks
78
+ field_fetch('R')
79
+ end
80
+
81
+ # Returns
82
+ def author
83
+ field_fetch('A')
84
+ end
85
+
86
+ # Returns
87
+ def title
88
+ field_fetch('T')
89
+ end
90
+
91
+ # Returns
92
+ def journal
93
+ field_fetch('J')
94
+ end
95
+
96
+ # Returns
97
+ def comment
98
+ get('*')
99
+ end
100
+
101
+ end
102
+
103
+
104
+ class AAindex1 < AAindex
105
+
106
+
107
+ def initialize(entry)
108
+ super(entry)
109
+ end
110
+
111
+ # Returns
112
+ def correlation_coefficient
113
+ field_fetch('C')
114
+ end
115
+
116
+ # Returns
117
+ def index(type = :float)
118
+ aa = %w( A R N D C Q E G H I L K M F P S T W Y V )
119
+ values = field_fetch('I', 1).split(' ')
120
+
121
+ if values.size != 20
122
+ raise "Invalid format in #{entry_id} : #{values.inspect}"
123
+ end
124
+
125
+ if type == :zscore and values.size > 0
126
+ sum = 0.0
127
+ values.each do |a|
128
+ sum += a.to_f
129
+ end
130
+ mean = sum / values.size # / 20
131
+ var = 0.0
132
+ values.each do |a|
133
+ var += (a.to_f - mean) ** 2
134
+ end
135
+ sd = Math.sqrt(var)
136
+ end
137
+
138
+ if type == :integer
139
+ figure = 0
140
+ values.each do |a|
141
+ figure = [ figure, a[/\..*/].length - 1 ].max
142
+ end
143
+ end
144
+
145
+ hash = {}
146
+
147
+ aa.each_with_index do |a, i|
148
+ case type
149
+ when :string
150
+ hash[a] = values[i]
151
+ when :float
152
+ hash[a] = values[i].to_f
153
+ when :zscore
154
+ hash[a] = (values[i].to_f - mean) / sd
155
+ when :integer
156
+ hash[a] = (values[i].to_f * 10 ** figure).to_i
157
+ end
158
+ end
159
+ return hash
160
+ end
161
+
162
+ end
163
+
164
+
165
+ class AAindex2 < AAindex
166
+
167
+
168
+ def initialize(entry)
169
+ super(entry)
170
+ end
171
+
172
+ # Returns
173
+ def rows
174
+ label_data
175
+ @rows
176
+ end
177
+
178
+ # Returns
179
+ def cols
180
+ label_data
181
+ @cols
182
+ end
183
+
184
+ # Returns
185
+ def matrix
186
+ ma = Array.new
187
+
188
+ data = label_data
189
+ data.each_line do |line|
190
+ list = line.strip.split(/\s+/).map{|x| x.to_f}
191
+ ma.push(list)
192
+ end
193
+
194
+ Matrix[*ma]
195
+ end
196
+
197
+ # Returns
198
+ def old_matrix # for AAindex <= ver 5.0
199
+
200
+ @aa = {} # used to determine row/column of the aa
201
+ attr_reader :aa
202
+
203
+ field = field_fetch('I')
204
+
205
+ case field
206
+ when / (ARNDCQEGHILKMFPSTWYV)\s+(.*)/ # 20x19/2 matrix
207
+ aalist = $1
208
+ values = $2.split(/\s+/)
209
+
210
+ 0.upto(aalist.length - 1) do |i|
211
+ @aa[aalist[i].chr] = i
212
+ end
213
+
214
+ ma = Array.new
215
+ 20.times do
216
+ ma.push(Array.new(20)) # 2D array of 20x(20)
217
+ end
218
+
219
+ for i in 0 .. 19 do
220
+ for j in i .. 19 do
221
+ ma[i][j] = values[i + j*(j+1)/2].to_f
222
+ ma[j][i] = ma[i][j]
223
+ end
224
+ end
225
+ Matrix[*ma]
226
+
227
+ when / -ARNDCQEGHILKMFPSTWYV / # 21x20/2 matrix (with gap)
228
+ raise NotImplementedError
229
+ when / ACDEFGHIKLMNPQRSTVWYJ- / # 21x21 matrix (with gap)
230
+ raise NotImplementedError
231
+ end
232
+ end
233
+
234
+ private
235
+
236
+ def label_data
237
+ label, data = get('M').split("\n", 2)
238
+ if /M rows = (\S+), cols = (\S+)/.match(label)
239
+ rows, cols = $1, $2
240
+ @rows = rows.split('')
241
+ @cols = cols.split('')
242
+ end
243
+ return data
244
+ end
245
+
246
+ end
247
+
248
+ end
249
+
250
+
251
+ if __FILE__ == $0
252
+ require 'bio/io/fetch'
253
+
254
+ puts "### AAindex1 (PRAM900102)"
255
+ aax1 = Bio::AAindex1.new(Bio::Fetch.query('aaindex', 'PRAM900102', 'raw'))
256
+ p aax1.entry_id
257
+ p aax1.definition
258
+ p aax1.dblinks
259
+ p aax1.author
260
+ p aax1.title
261
+ p aax1.journal
262
+ p aax1.correlation_coefficient
263
+ p aax1.index
264
+ puts "### AAindex2 (HENS920102)"
265
+ aax2 = Bio::AAindex2.new(Bio::Fetch.query('aaindex', 'HENS920102', 'raw'))
266
+ p aax2.entry_id
267
+ p aax2.definition
268
+ p aax2.dblinks
269
+ p aax2.author
270
+ p aax2.title
271
+ p aax2.journal
272
+ p aax2.rows
273
+ p aax2.cols
274
+ p aax2.matrix
275
+ p aax2.matrix[2,2]
276
+ p aax2.matrix.determinant
277
+ p aax2.matrix.rank
278
+ p aax2.matrix.transpose
279
+ end
280
+
@@ -0,0 +1,332 @@
1
+ #
2
+ # = bio/db/embl.rb - Common methods for EMBL style database classes
3
+ #
4
+ # Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: common.rb,v 1.8 2005/11/02 07:30:14 nakao Exp $
8
+ #
9
+ # == EMBL style databases class
10
+ #
11
+ # This module defines a common framework among EMBL, SWISS-PROT, TrEMBL.
12
+ # For more details, see the documentations in each embl/*.rb libraries.
13
+ #
14
+ # EMBL style format:
15
+ # ID - identification (begins each entry; 1 per entry)
16
+ # AC - accession number (>=1 per entry)
17
+ # SV - sequence version (1 per entry)
18
+ # DT - date (2 per entry)
19
+ # DE - description (>=1 per entry)
20
+ # KW - keyword (>=1 per entry)
21
+ # OS - organism species (>=1 per entry)
22
+ # OC - organism classification (>=1 per entry)
23
+ # OG - organelle (0 or 1 per entry)
24
+ # RN - reference number (>=1 per entry)
25
+ # RC - reference comment (>=0 per entry)
26
+ # RP - reference positions (>=1 per entry)
27
+ # RX - reference cross-reference (>=0 per entry)
28
+ # RA - reference author(s) (>=1 per entry)
29
+ # RG - reference group (>=0 per entry)
30
+ # RT - reference title (>=1 per entry)
31
+ # RL - reference location (>=1 per entry)
32
+ # DR - database cross-reference (>=0 per entry)
33
+ # FH - feature table header (0 or 2 per entry)
34
+ # FT - feature table data (>=0 per entry)
35
+ # CC - comments or notes (>=0 per entry)
36
+ # XX - spacer line (many per entry)
37
+ # SQ - sequence header (1 per entry)
38
+ # bb - (blanks) sequence data (>=1 per entry)
39
+ # // - termination line (ends each entry; 1 per entry)
40
+ #
41
+ #
42
+ # == Example
43
+ #
44
+ # require 'bio/db/embl/common'
45
+ # module Bio
46
+ # class NEWDB < EMBLDB
47
+ # include Bio::EMBLDB::Common
48
+ # end
49
+ # end
50
+ #
51
+ #--
52
+ #
53
+ # This library is free software; you can redistribute it and/or
54
+ # modify it under the terms of the GNU Lesser General Public
55
+ # License as published by the Free Software Foundation; either
56
+ # version 2 of the License, or (at your option) any later version.
57
+ #
58
+ # This library is distributed in the hope that it will be useful,
59
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
60
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
61
+ # Lesser General Public License for more details.
62
+ #
63
+ # You should have received a copy of the GNU Lesser General Public
64
+ # License along with this library; if not, write to the Free Software
65
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
66
+ #
67
+ #++
68
+ #
69
+
70
+ require 'bio/db'
71
+ require 'bio/reference'
72
+
73
+ module Bio
74
+ class EMBLDB
75
+ module Common
76
+
77
+ DELIMITER = "\n//\n"
78
+ RS = DELIMITER
79
+ TAGSIZE = 5
80
+
81
+ def initialize(entry)
82
+ super(entry, TAGSIZE)
83
+ end
84
+
85
+ # returns a Array of accession numbers in the AC lines.
86
+ #
87
+ # AC Line
88
+ # "AC A12345; B23456;"
89
+ # AC [AC1;]+
90
+ #
91
+ # Accession numbers format:
92
+ # 1 2 3 4 5 6
93
+ # [O,P,Q] [0-9] [A-Z, 0-9] [A-Z, 0-9] [A-Z, 0-9] [0-9]
94
+ def ac
95
+ unless @data['AC']
96
+ tmp = Array.new
97
+ field_fetch('AC').split(/ /).each do |e|
98
+ tmp.push(e.sub(/;/,''))
99
+ end
100
+ @data['AC'] = tmp
101
+ end
102
+ @data['AC']
103
+ end
104
+ alias accessions ac
105
+
106
+
107
+ # returns the first accession number in the AC lines
108
+ def accession
109
+ ac[0]
110
+ end
111
+
112
+
113
+ # returns a String int the DE line.
114
+ #
115
+ # DE Line
116
+ def de
117
+ unless @data['DE']
118
+ @data['DE'] = fetch('DE')
119
+ end
120
+ @data['DE']
121
+ end
122
+ alias description de
123
+ alias definition de # API
124
+
125
+
126
+
127
+ # returns contents in the OS line.
128
+ # * Bio::EMBLDB#os -> Array of <OS Hash>
129
+ # where <OS Hash> is:
130
+ # [{'name'=>'Human', 'os'=>'Homo sapiens'},
131
+ # {'name'=>'Rat', 'os'=>'Rattus norveticus'}]
132
+ # * Bio::SPTR#os[0]['name'] => "Human"
133
+ # * Bio::SPTR#os[0] => {'name'=>"Human", 'os'=>'Homo sapiens'}
134
+ # * Bio::STPR#os(0) => "Homo sapiens (Human)"
135
+ #
136
+ # OS Line; organism species (>=1)
137
+ # "OS Trifolium repens (white clover)"
138
+ #
139
+ # OS Genus species (name).
140
+ # OS Genus species (name0) (name1).
141
+ # OS Genus species (name0) (name1).
142
+ # OS Genus species (name0), G s0 (name0), and G s (name1).
143
+ def os(num = nil)
144
+ unless @data['OS']
145
+ os = Array.new
146
+ fetch('OS').split(/, and|, /).each do |tmp|
147
+ if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/
148
+ org = $1
149
+ tmp =~ /(\(.+\))/
150
+ os.push({'name' => $1, 'os' => org})
151
+ else
152
+ raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
153
+ end
154
+ end
155
+ @data['OS'] = os
156
+ end
157
+ if num
158
+ # EX. "Trifolium repens (white clover)"
159
+ "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
160
+ end
161
+ @data['OS']
162
+ end
163
+
164
+
165
+ # returns contents in the OG line.
166
+ # * Bio::EMBLDB::Common#og -> [ <ogranella String>* ]
167
+ #
168
+ # OG Line; organella (0 or 1/entry)
169
+ # OG Plastid; Chloroplast.
170
+ # OG Mitochondrion.
171
+ # OG Plasmid sym pNGR234a.
172
+ # OG Plastid; Cyanelle.
173
+ # OG Plasmid pSymA (megaplasmid 1).
174
+ # OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1.
175
+ def og
176
+ unless @data['OG']
177
+ og = Array.new
178
+ if get('OG').size > 0
179
+ ogstr = fetch('OG')
180
+ ogstr.sub!(/\.$/,'')
181
+ ogstr.sub!(/ and/,'')
182
+ ogstr.sub!(/;/, ',')
183
+ ogstr.split(',').each do |tmp|
184
+ og.push(tmp.strip)
185
+ end
186
+ end
187
+ @data['OG'] = og
188
+ end
189
+ @data['OG']
190
+ end
191
+
192
+
193
+ # returns contents in the OC line.
194
+ # * Bio::EMBLDB::Common#oc -> [ <organism class String>* ]
195
+ # OC Line; organism classification (>=1)
196
+ # OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae;
197
+ # OC Theileria.
198
+ def oc
199
+ unless @data['OC']
200
+ begin
201
+ @data['OC'] = fetch('OC').sub(/.$/,'').split(/;/).map {|e|
202
+ e.strip
203
+ }
204
+ rescue NameError
205
+ nil
206
+ end
207
+ end
208
+ @data['OC']
209
+ end
210
+
211
+ # returns keywords in the KW line.
212
+ # * Bio::EMBLDB::Common#kw -> [ <keyword>* ]
213
+ # KW Line; keyword (>=1)
214
+ # KW [Keyword;]+
215
+ def kw
216
+ unless @data['KW']
217
+ if get('KW').size > 0
218
+ tmp = fetch('KW').sub(/.$/,'')
219
+ @data['KW'] = tmp.split(/;/).map {|e| e.strip }
220
+ else
221
+ @data['KW'] = []
222
+ end
223
+ end
224
+ @data['KW']
225
+ end
226
+ alias keywords kw
227
+
228
+
229
+ # returns contents in the R lines.
230
+ # * Bio::EMBLDB::Common#ref -> [ <refernece information Hash>* ]
231
+ # where <reference information Hash> is:
232
+ # {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
233
+ # 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
234
+ #
235
+ # R Lines
236
+ # * RN RC RP RX RA RT RL RG
237
+ def ref
238
+ unless @data['R']
239
+ ary = Array.new
240
+ get('R').split(/\nRN /).each do |str|
241
+ raw = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
242
+ 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
243
+ str = 'RN ' + str unless /^RN / =~ str
244
+ str.split("\n").each do |line|
245
+ if /^(R[NPXARLCTG]) (.+)/ =~ line
246
+ raw[$1] += $2 + ' '
247
+ else
248
+ raise "Invalid format in R lines, \n[#{line}]\n"
249
+ end
250
+ end
251
+ raw.each_value {|v|
252
+ v.strip!
253
+ v.sub!(/^"/,'')
254
+ v.sub!(/;$/,'')
255
+ v.sub!(/"$/,'')
256
+ }
257
+ ary.push(raw)
258
+ end
259
+ @data['R'] = ary
260
+ end
261
+ @data['R']
262
+ end
263
+
264
+ # returns Bio::Reference object from Bio::EMBLDB::Common#ref.
265
+ # * Bio::EMBLDB::Common#ref -> Bio::References
266
+ def references
267
+ unless @data['references']
268
+ ary = self.ref.map {|ent|
269
+ hash = Hash.new('')
270
+ ent.each {|key, value|
271
+ case key
272
+ when 'RA'
273
+ hash['authors'] = value.split(/, /)
274
+ when 'RT'
275
+ hash['title'] = value
276
+ when 'RL'
277
+ if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
278
+ hash['journal'] = $1
279
+ hash['volume'] = $2
280
+ hash['issue'] = $3
281
+ hash['pages'] = $4
282
+ hash['year'] = $5
283
+ else
284
+ hash['journal'] = value
285
+ end
286
+ when 'RX' # PUBMED, MEDLINE
287
+ value.split('.').each {|item|
288
+ tag, xref = item.split(/; /).map {|i| i.strip }
289
+ hash[ tag.downcase ] = xref
290
+ }
291
+ end
292
+ }
293
+ Reference.new(hash)
294
+ }
295
+ @data['references'] = References.new(ary)
296
+ end
297
+ @data['references']
298
+ end
299
+
300
+
301
+ # returns contents in the DR line.
302
+ # * Bio::EMBLDB::Common#dr -> [ <Database cross-reference Hash>* ]
303
+ # where <Database cross-reference Hash> is:
304
+ # * Bio::EMBLDB::Common#dr {|k,v| }
305
+ #
306
+ # DR Line; defabases cross-reference (>=0)
307
+ # a cross_ref pre one line
308
+ # "DR database_identifier; primary_identifier; secondary_identifier."
309
+ def dr
310
+ unless @data['DR']
311
+ tmp = Hash.new
312
+ self.get('DR').split(/\n/).each do |db|
313
+ a = db.sub(/^DR /,'').sub(/.$/,'').strip.split(/;[ ]/)
314
+ dbname = a.shift
315
+ tmp[dbname] = Array.new unless tmp[dbname]
316
+ tmp[dbname].push(a)
317
+ end
318
+ @data['DR'] = tmp
319
+ end
320
+ if block_given?
321
+ @data['DR'].each do |k,v|
322
+ yield(k, v)
323
+ end
324
+ else
325
+ @data['DR']
326
+ end
327
+ end
328
+
329
+ end # module Common
330
+ end # class EMBLDB
331
+ end # module Bio
332
+