bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,299 @@
1
+ #
2
+ # bio/db/genbank/common.rb - Common methods for GenBank style database classes
3
+ #
4
+ # Copyright (C) 2004 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: common.rb,v 1.9 2005/12/07 11:23:51 k Exp $
21
+ #
22
+
23
+ require 'bio/db'
24
+
25
+ module Bio
26
+ class NCBIDB
27
+ module Common
28
+
29
+ DELIMITER = RS = "\n//\n"
30
+ TAGSIZE = 12
31
+
32
+ def initialize(entry)
33
+ super(entry, TAGSIZE)
34
+ end
35
+
36
+ # LOCUS -- Locus class must be defined in child classes
37
+
38
+ # DEFINITION
39
+ def definition
40
+ field_fetch('DEFINITION')
41
+ end
42
+
43
+
44
+ # ACCESSION
45
+ def accessions
46
+ accession.split(/\s+/)
47
+ end
48
+
49
+
50
+ # VERSION
51
+ def versions
52
+ @data['VERSION'] ||= fetch('VERSION').split(/\s+/)
53
+ end
54
+
55
+ def acc_version
56
+ versions.first.to_s
57
+ end
58
+
59
+ def accession
60
+ acc_version.split(/\./).first.to_s
61
+ end
62
+
63
+ def version
64
+ acc_version.split(/\./).last.to_i
65
+ end
66
+
67
+ def gi
68
+ versions.last
69
+ end
70
+
71
+
72
+ # NID
73
+ def nid
74
+ field_fetch('NID')
75
+ end
76
+
77
+
78
+ # KEYWORDS
79
+ def keywords
80
+ @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /)
81
+ end
82
+
83
+
84
+ # SEGMENT
85
+ def segment
86
+ @data['SEGMENT'] ||= fetch('SEGMENT').scan(/\d+/).join("/")
87
+ end
88
+
89
+
90
+ # SOURCE
91
+ def source
92
+ unless @data['SOURCE']
93
+ name, org = get('SOURCE').split('ORGANISM')
94
+ org ||= ""
95
+ if org[/\S+;/]
96
+ organism = $`
97
+ taxonomy = $& + $'
98
+ elsif org[/\S+\./] # rs:NC_001741
99
+ organism = $`
100
+ taxonomy = $& + $'
101
+ else
102
+ organism = org
103
+ taxonomy = ''
104
+ end
105
+ @data['SOURCE'] = {
106
+ 'common_name' => truncate(tag_cut(name)),
107
+ 'organism' => truncate(organism),
108
+ 'taxonomy' => truncate(taxonomy),
109
+ }
110
+ @data['SOURCE'].default = ''
111
+ end
112
+ @data['SOURCE']
113
+ end
114
+
115
+ def common_name
116
+ source['common_name']
117
+ end
118
+ alias vernacular_name common_name
119
+
120
+ def organism
121
+ source['organism']
122
+ end
123
+
124
+ def taxonomy
125
+ source['taxonomy']
126
+ end
127
+
128
+
129
+ # REFERENCE
130
+ def references
131
+ unless @data['REFERENCE']
132
+ ary = []
133
+ toptag2array(get('REFERENCE')).each do |ref|
134
+ hash = Hash.new('')
135
+ subtag2array(ref).each do |field|
136
+ case tag_get(field)
137
+ when /AUTHORS/
138
+ authors = truncate(tag_cut(field))
139
+ authors = authors.split(/, /)
140
+ authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
141
+ authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
142
+ hash['authors'] = authors
143
+ when /TITLE/
144
+ hash['title'] = truncate(tag_cut(field)) + '.'
145
+ when /JOURNAL/
146
+ journal = truncate(tag_cut(field))
147
+ if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
148
+ hash['journal'] = $1
149
+ hash['volume'] = $2
150
+ hash['issue'] = $3
151
+ hash['pages'] = $4
152
+ hash['year'] = $5
153
+ else
154
+ hash['journal'] = journal
155
+ end
156
+ when /MEDLINE/
157
+ hash['medline'] = truncate(tag_cut(field))
158
+ when /PUBMED/
159
+ hash['pubmed'] = truncate(tag_cut(field))
160
+ end
161
+ end
162
+ ary.push(Reference.new(hash))
163
+ end
164
+ @data['REFERENCE'] = References.new(ary)
165
+ end
166
+ if block_given?
167
+ @data['REFERENCE'].each do |r|
168
+ yield r
169
+ end
170
+ else
171
+ @data['REFERENCE']
172
+ end
173
+ end
174
+
175
+
176
+ # COMMENT
177
+ def comment
178
+ field_fetch('COMMENT')
179
+ end
180
+
181
+
182
+ # FEATURES
183
+ def features
184
+ unless @data['FEATURES']
185
+ ary = []
186
+ in_quote = false
187
+ get('FEATURES').each_line do |line|
188
+ next if line =~ /^FEATURES/
189
+
190
+ # feature type (source, CDS, ...)
191
+ head = line[0,20].to_s.strip
192
+
193
+ # feature value (position or /qualifier=)
194
+ body = line[20,60].to_s.chomp
195
+
196
+ # sub-array [ feature type, position, /q="data", ... ]
197
+ if line =~ /^ {5}\S/
198
+ ary.push([ head, body ])
199
+
200
+ # feature qualifier start (/q="data..., /q="data...", /q=data, /q)
201
+ elsif body =~ /^ \// and not in_quote # gb:IRO125195
202
+ ary.last.push(body)
203
+
204
+ # flag for open quote (/q="data...)
205
+ if body =~ /="/ and body !~ /"$/
206
+ in_quote = true
207
+ end
208
+
209
+ # feature qualifier continued (...data..., ...data...")
210
+ else
211
+ ary.last.last << body
212
+
213
+ # flag for closing quote (/q="data... lines ...")
214
+ if body =~ /"$/
215
+ in_quote = false
216
+ end
217
+ end
218
+ end
219
+
220
+ ary.collect! do |subary|
221
+ parse_qualifiers(subary)
222
+ end
223
+
224
+ @data['FEATURES'] = Features.new(ary)
225
+ end
226
+ if block_given?
227
+ @data['FEATURES'].each do |f|
228
+ yield f
229
+ end
230
+ else
231
+ @data['FEATURES']
232
+ end
233
+ end
234
+
235
+
236
+ # ORIGIN
237
+ def origin
238
+ unless @data['ORIGIN']
239
+ ori, seqstr = get('ORIGIN').split("\n", 2)
240
+ seqstr ||= ""
241
+ @data['ORIGIN'] = truncate(tag_cut(ori))
242
+ @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '')
243
+ end
244
+ @data['ORIGIN']
245
+ end
246
+
247
+
248
+ ### private methods
249
+
250
+ private
251
+
252
+ def parse_qualifiers(ary)
253
+ feature = Feature.new
254
+
255
+ feature.feature = ary.shift
256
+ feature.position = ary.shift.gsub(/\s/, '')
257
+
258
+ ary.each do |f|
259
+ if f =~ %r{/([^=]+)=?"?([^"]*)"?}
260
+ qualifier, value = $1, $2
261
+
262
+ case qualifier
263
+ when 'translation'
264
+ value = Sequence::AA.new(value)
265
+ when 'codon_start'
266
+ value = value.to_i
267
+ else
268
+ value = true if value.empty?
269
+ end
270
+
271
+ feature.append(Feature::Qualifier.new(qualifier, value))
272
+ end
273
+ end
274
+
275
+ return feature
276
+ end
277
+
278
+ end # Common
279
+ end # GenBank
280
+ end # Bio
281
+
282
+
283
+ =begin
284
+
285
+ = Bio::GenBank::Common
286
+
287
+ This module defines a common framework among GenBank, GenPept, RefSeq, and
288
+ DDBJ. For more details, see the documentations in each genbank/*.rb files.
289
+
290
+
291
+ == SEE ALSO
292
+
293
+ * ((<URL:ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt>))
294
+ * ((<URL:http://www.ncbi.nlm.nih.gov/collab/FT/index.html>))
295
+
296
+ =end
297
+
298
+
299
+
@@ -0,0 +1,34 @@
1
+ #
2
+ # bio/db/genbank/ddbj.rb - DDBJ database class
3
+ #
4
+ # Copyright (C) 2000-2004 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: ddbj.rb,v 1.7 2005/09/09 16:02:04 ngoto Exp $
21
+ #
22
+
23
+ require 'bio/db/genbank/genbank'
24
+
25
+ module Bio
26
+
27
+ class DDBJ < GenBank
28
+
29
+ autoload :XML, 'bio/io/ddbjxml'
30
+
31
+ # Nothing to do (DDBJ database format is completely same as GenBank)
32
+ end
33
+
34
+ end # Bio
@@ -0,0 +1,354 @@
1
+ #
2
+ # bio/db/genbank/genbank.rb - GenBank database class
3
+ #
4
+ # Copyright (C) 2000-2005 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: genbank.rb,v 0.38 2005/12/07 11:23:51 k Exp $
21
+ #
22
+
23
+ require 'bio/db'
24
+ require 'bio/db/genbank/common'
25
+
26
+ module Bio
27
+ class GenBank < NCBIDB
28
+
29
+ include Bio::NCBIDB::Common
30
+
31
+ # LOCUS
32
+ class Locus
33
+ def initialize(locus_line)
34
+ if locus_line.empty?
35
+ # do nothing (just for empty or incomplete entry string)
36
+ elsif locus_line.length > 75 # after Rel 126.0
37
+ @entry_id = locus_line[12..27].strip
38
+ @length = locus_line[29..39].to_i
39
+ @strand = locus_line[44..46].strip
40
+ @natype = locus_line[47..52].strip
41
+ @circular = locus_line[55..62].strip
42
+ @division = locus_line[63..66].strip
43
+ @date = locus_line[68..78].strip
44
+ else
45
+ @entry_id = locus_line[12..21].strip
46
+ @length = locus_line[22..29].to_i
47
+ @strand = locus_line[33..35].strip
48
+ @natype = locus_line[36..39].strip
49
+ @circular = locus_line[42..51].strip
50
+ @division = locus_line[52..54].strip
51
+ @date = locus_line[62..72].strip
52
+ end
53
+ end
54
+ attr_accessor :entry_id, :length, :strand, :natype, :circular,
55
+ :division, :date
56
+ end
57
+
58
+ def locus
59
+ @data['LOCUS'] ||= Locus.new(get('LOCUS'))
60
+ end
61
+ def entry_id; locus.entry_id; end
62
+ def length; locus.length; end
63
+ def circular; locus.circular; end
64
+ def division; locus.division; end
65
+ def date; locus.date; end
66
+
67
+ def strand; locus.strand; end
68
+ def natype; locus.natype; end
69
+
70
+
71
+ # ORIGIN
72
+ def seq
73
+ unless @data['SEQUENCE']
74
+ origin
75
+ end
76
+ Bio::Sequence::NA.new(@data['SEQUENCE'])
77
+ end
78
+ alias naseq seq
79
+ alias nalen length
80
+
81
+ def seq_len
82
+ seq.length
83
+ end
84
+
85
+
86
+ # FEATURES
87
+ def each_cds
88
+ features.each do |feature|
89
+ if feature.feature == 'CDS'
90
+ yield(feature)
91
+ end
92
+ end
93
+ end
94
+
95
+ def each_gene
96
+ features.each do |feature|
97
+ if feature.feature == 'gene'
98
+ yield(feature)
99
+ end
100
+ end
101
+ end
102
+
103
+
104
+ # BASE COUNT : obsoleted after GenBank release 138.0
105
+ def basecount(base = nil)
106
+ unless @data['BASE COUNT']
107
+ hash = Hash.new(0)
108
+ get('BASE COUNT').scan(/(\d+) (\w)/).each do |c, b|
109
+ hash[b] = c.to_i
110
+ end
111
+ @data['BASE COUNT'] = hash
112
+ end
113
+
114
+ if base
115
+ base.downcase!
116
+ @data['BASE COUNT'][base]
117
+ else
118
+ @data['BASE COUNT']
119
+ end
120
+ end
121
+
122
+ end # GenBank
123
+ end # Bio
124
+
125
+
126
+
127
+ if __FILE__ == $0
128
+
129
+ begin
130
+ require 'pp'
131
+ alias p pp
132
+ rescue LoadError
133
+ end
134
+
135
+ puts "### GenBank"
136
+ if ARGV.size > 0
137
+ gb = Bio::GenBank.new(ARGF.read)
138
+ else
139
+ require 'bio/io/fetch'
140
+ gb = Bio::GenBank.new(Bio::Fetch.query('gb', 'LPATOVGNS'))
141
+ end
142
+
143
+ puts "## LOCUS"
144
+ puts "# GenBank.locus"
145
+ p gb.locus
146
+ puts "# GenBank.entry_id"
147
+ p gb.entry_id
148
+ puts "# GenBank.nalen"
149
+ p gb.nalen
150
+ puts "# GenBank.strand"
151
+ p gb.strand
152
+ puts "# GenBank.natype"
153
+ p gb.natype
154
+ puts "# GenBank.circular"
155
+ p gb.circular
156
+ puts "# GenBank.division"
157
+ p gb.division
158
+ puts "# GenBank.date"
159
+ p gb.date
160
+
161
+ puts "## DEFINITION"
162
+ p gb.definition
163
+
164
+ puts "## ACCESSION"
165
+ p gb.accession
166
+
167
+ puts "## VERSION"
168
+ p gb.versions
169
+ p gb.version
170
+ p gb.gi
171
+
172
+ puts "## NID"
173
+ p gb.nid
174
+
175
+ puts "## KEYWORDS"
176
+ p gb.keywords
177
+
178
+ puts "## SEGMENT"
179
+ p gb.segment
180
+
181
+ puts "## SOURCE"
182
+ p gb.source
183
+ p gb.common_name
184
+ p gb.vernacular_name
185
+ p gb.organism
186
+ p gb.taxonomy
187
+
188
+ puts "## REFERENCE"
189
+ p gb.references
190
+
191
+ puts "## COMMENT"
192
+ p gb.comment
193
+
194
+ puts "## FEATURES"
195
+ p gb.features
196
+
197
+ puts "## BASE COUNT"
198
+ p gb.basecount
199
+ p gb.basecount('a')
200
+ p gb.basecount('A')
201
+
202
+ puts "## ORIGIN"
203
+ p gb.origin
204
+ p gb.naseq
205
+
206
+ end
207
+
208
+
209
+ =begin
210
+
211
+ = Bio::GenBank
212
+
213
+ === Initialize
214
+
215
+ --- Bio::GenBank.new(entry)
216
+
217
+ === LOCUS
218
+
219
+ --- Bio::GenBank#locus -> Bio::Locus
220
+
221
+ Returns contents of the LOCUS record as a Bio::GenBank::Locus object.
222
+
223
+ --- Bio::GenBank#entry_id -> String
224
+ --- Bio::GenBank#nalen -> Fixnum
225
+ --- Bio::GenBank#strand -> String
226
+ --- Bio::GenBank#natype -> String
227
+ --- Bio::GenBank#circular -> String
228
+ --- Bio::GenBank#division -> String
229
+ --- Bio::GenBank#date -> String
230
+
231
+ Access methods for the contents of the LOCUS record.
232
+
233
+ === DEFINITION
234
+
235
+ --- Bio::GenBank#definition -> String
236
+
237
+ Returns contents of the DEFINITION record as a String.
238
+
239
+ === ACCESSION
240
+
241
+ --- Bio::GenBank#accessions -> Array
242
+
243
+ Returns contents of the ACCESSION record as an Array.
244
+
245
+ === VERSION
246
+
247
+ --- Bio::GenBank#versions -> Array
248
+
249
+ Returns contents of the VERSION record as an Array of Strings.
250
+
251
+ --- Bio::GenBank#acc_version -> String
252
+ --- Bio::GenBank#accession -> String
253
+ --- Bio::GenBank#version -> Fixnum
254
+ --- Bio::GenBank#gi -> String
255
+
256
+ Access methods for the contents of the VERSION record.
257
+
258
+ The 'acc_version' method returns the first part of the VERSION record
259
+ as a "ACCESSION.VERSION" String, 'accession' method returns the ACCESSION
260
+ part of the acc_version, 'version' method returns the VERSION part of the
261
+ acc_version as a Fixnum, and the 'gi' method returns the second part of
262
+ the VERSION record as a "GI:#######" String.
263
+
264
+ === NID
265
+
266
+ --- Bio::GenBank#nid -> String
267
+
268
+ Returns contents of the NID record as a String.
269
+
270
+ === KEYWORDS
271
+
272
+ --- Bio::GenBank#keywords -> Array
273
+
274
+ Returns contents of the KEYWORDS record as an Array of Strings.
275
+
276
+ === SEGMENT
277
+
278
+ --- Bio::GenBank#segment -> String
279
+
280
+ Returns contents of the SEGMENT record as a "m/n" form String.
281
+
282
+ === SOURCE
283
+
284
+ --- Bio::GenBank#source -> Hash
285
+
286
+ Returns contents of the SOURCE record as a Hash.
287
+
288
+ --- Bio::GenBank#common_name -> String
289
+ --- Bio::GenBank#vernacular_name -> String
290
+ --- Bio::GenBank#organism -> String
291
+ --- Bio::GenBank#taxonomy -> String
292
+
293
+ Access methods for the contents of the SOURCE record.
294
+
295
+ The 'common_name' method is same as source['common_name'].
296
+ The 'vernacular_name' method is an alias for the 'common_name'.
297
+ The 'organism' method is same as source['organism'].
298
+ The 'taxonomy' method is same as source['taxonomy'].
299
+
300
+ === REFERENCE
301
+
302
+ --- Bio::GenBank#references -> Array
303
+
304
+ Returns contents of the REFERENCE records as an Array of Bio::Reference
305
+ objects.
306
+
307
+ === COMMENT
308
+
309
+ --- Bio::GenBank#comment -> String
310
+
311
+ Returns contents of the COMMENT record as a String.
312
+
313
+ === FEATURES
314
+
315
+ --- Bio::GenBank#features -> Bio::Features
316
+
317
+ Returns contents of the FEATURES record as a Bio::Features object.
318
+
319
+ --- Bio::GenBank#each_cds -> Array
320
+
321
+ Iterate only for the 'CDS' portion of the Bio::Features.
322
+
323
+ --- Bio::GenBank#each_gene -> Array
324
+
325
+ Iterate only for the 'gene' portion of the Bio::Features.
326
+
327
+ === BASE COUNT
328
+
329
+ --- Bio::GenBank#basecount(base = nil) -> Hash or Fixnum
330
+
331
+ Returns the BASE COUNT as a Hash. When the base is specified, returns
332
+ count of the base as a Fixnum. The base can be one of 'a', 't', 'g',
333
+ 'c', and 'o' (others).
334
+
335
+ === ORIGIN
336
+
337
+ --- Bio::GenBank#origin -> String
338
+
339
+ Returns contents of the ORIGIN record as a String.
340
+
341
+ --- Bio::GenBank#naseq -> Bio::Sequence::NA
342
+ --- Bio::GenBank#seq -> Bio::Sequence::NA
343
+
344
+ Returns DNA sequence in the ORIGIN record as a Bio::Sequence::NA object.
345
+
346
+ == SEE ALSO
347
+
348
+ * ((<URL:ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt>))
349
+ * ((<URL:http://www.ncbi.nlm.nih.gov/collab/FT/index.html>))
350
+
351
+ =end
352
+
353
+
354
+