bio 0.7.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. data/bin/bioruby +71 -27
  2. data/bin/br_biofetch.rb +5 -17
  3. data/bin/br_bioflat.rb +14 -26
  4. data/bin/br_biogetseq.rb +6 -18
  5. data/bin/br_pmfetch.rb +6 -16
  6. data/doc/Changes-0.7.rd +35 -0
  7. data/doc/KEGG_API.rd +287 -172
  8. data/doc/KEGG_API.rd.ja +273 -160
  9. data/doc/Tutorial.rd +18 -9
  10. data/doc/Tutorial.rd.ja +656 -138
  11. data/lib/bio.rb +6 -24
  12. data/lib/bio/alignment.rb +5 -5
  13. data/lib/bio/appl/blast.rb +132 -98
  14. data/lib/bio/appl/blast/format0.rb +9 -19
  15. data/lib/bio/appl/blast/wublast.rb +5 -18
  16. data/lib/bio/appl/emboss.rb +40 -47
  17. data/lib/bio/appl/hmmer.rb +116 -82
  18. data/lib/bio/appl/hmmer/report.rb +509 -364
  19. data/lib/bio/appl/spidey/report.rb +7 -18
  20. data/lib/bio/data/na.rb +3 -21
  21. data/lib/bio/db.rb +3 -21
  22. data/lib/bio/db/aaindex.rb +147 -52
  23. data/lib/bio/db/embl/common.rb +27 -6
  24. data/lib/bio/db/embl/embl.rb +18 -10
  25. data/lib/bio/db/embl/sptr.rb +87 -67
  26. data/lib/bio/db/embl/swissprot.rb +32 -3
  27. data/lib/bio/db/embl/trembl.rb +32 -3
  28. data/lib/bio/db/embl/uniprot.rb +32 -3
  29. data/lib/bio/db/fasta.rb +327 -289
  30. data/lib/bio/db/medline.rb +25 -4
  31. data/lib/bio/db/nbrf.rb +12 -20
  32. data/lib/bio/db/pdb.rb +4 -1
  33. data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
  34. data/lib/bio/db/pdb/pdb.rb +13 -8
  35. data/lib/bio/db/rebase.rb +93 -97
  36. data/lib/bio/feature.rb +2 -31
  37. data/lib/bio/io/ddbjxml.rb +167 -139
  38. data/lib/bio/io/fastacmd.rb +89 -56
  39. data/lib/bio/io/flatfile.rb +994 -278
  40. data/lib/bio/io/flatfile/index.rb +257 -194
  41. data/lib/bio/io/flatfile/indexer.rb +37 -29
  42. data/lib/bio/reference.rb +147 -64
  43. data/lib/bio/sequence.rb +57 -417
  44. data/lib/bio/sequence/aa.rb +64 -0
  45. data/lib/bio/sequence/common.rb +175 -0
  46. data/lib/bio/sequence/compat.rb +68 -0
  47. data/lib/bio/sequence/format.rb +134 -0
  48. data/lib/bio/sequence/generic.rb +24 -0
  49. data/lib/bio/sequence/na.rb +189 -0
  50. data/lib/bio/shell.rb +9 -23
  51. data/lib/bio/shell/core.rb +130 -125
  52. data/lib/bio/shell/demo.rb +143 -0
  53. data/lib/bio/shell/{session.rb → interface.rb} +42 -40
  54. data/lib/bio/shell/object.rb +52 -0
  55. data/lib/bio/shell/plugin/codon.rb +4 -22
  56. data/lib/bio/shell/plugin/emboss.rb +23 -0
  57. data/lib/bio/shell/plugin/entry.rb +34 -25
  58. data/lib/bio/shell/plugin/flatfile.rb +5 -23
  59. data/lib/bio/shell/plugin/keggapi.rb +11 -24
  60. data/lib/bio/shell/plugin/midi.rb +5 -23
  61. data/lib/bio/shell/plugin/obda.rb +4 -22
  62. data/lib/bio/shell/plugin/seq.rb +6 -24
  63. data/lib/bio/shell/rails/Rakefile +10 -0
  64. data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
  65. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
  66. data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
  67. data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
  68. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
  69. data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
  70. data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
  71. data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
  72. data/lib/bio/shell/rails/config/boot.rb +19 -0
  73. data/lib/bio/shell/rails/config/database.yml +85 -0
  74. data/lib/bio/shell/rails/config/environment.rb +53 -0
  75. data/lib/bio/shell/rails/config/environments/development.rb +19 -0
  76. data/lib/bio/shell/rails/config/environments/production.rb +19 -0
  77. data/lib/bio/shell/rails/config/environments/test.rb +19 -0
  78. data/lib/bio/shell/rails/config/routes.rb +19 -0
  79. data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
  80. data/lib/bio/shell/rails/public/404.html +8 -0
  81. data/lib/bio/shell/rails/public/500.html +8 -0
  82. data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
  83. data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
  84. data/lib/bio/shell/rails/public/dispatch.rb +10 -0
  85. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  86. data/lib/bio/shell/rails/public/images/icon.png +0 -0
  87. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  88. data/lib/bio/shell/rails/public/index.html +277 -0
  89. data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
  90. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
  91. data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
  92. data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
  93. data/lib/bio/shell/rails/public/robots.txt +1 -0
  94. data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
  95. data/lib/bio/shell/rails/script/about +3 -0
  96. data/lib/bio/shell/rails/script/breakpointer +3 -0
  97. data/lib/bio/shell/rails/script/console +3 -0
  98. data/lib/bio/shell/rails/script/destroy +3 -0
  99. data/lib/bio/shell/rails/script/generate +3 -0
  100. data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
  101. data/lib/bio/shell/rails/script/performance/profiler +3 -0
  102. data/lib/bio/shell/rails/script/plugin +3 -0
  103. data/lib/bio/shell/rails/script/process/reaper +3 -0
  104. data/lib/bio/shell/rails/script/process/spawner +3 -0
  105. data/lib/bio/shell/rails/script/process/spinner +3 -0
  106. data/lib/bio/shell/rails/script/runner +3 -0
  107. data/lib/bio/shell/rails/script/server +42 -0
  108. data/lib/bio/shell/rails/test/test_helper.rb +28 -0
  109. data/lib/bio/shell/web.rb +90 -0
  110. data/lib/bio/util/contingency_table.rb +231 -225
  111. data/sample/any2fasta.rb +59 -0
  112. data/test/data/HMMER/hmmpfam.out +64 -0
  113. data/test/data/HMMER/hmmsearch.out +88 -0
  114. data/test/data/aaindex/DAYM780301 +30 -0
  115. data/test/data/aaindex/PRAM900102 +20 -0
  116. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  117. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  118. data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
  119. data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
  120. data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
  121. data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
  122. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  123. data/test/unit/bio/appl/blast/test_report.rb +15 -12
  124. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
  125. data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
  126. data/test/unit/bio/appl/test_blast.rb +5 -5
  127. data/test/unit/bio/data/test_na.rb +9 -18
  128. data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
  129. data/test/unit/bio/db/test_aaindex.rb +197 -0
  130. data/test/unit/bio/io/test_fastacmd.rb +55 -0
  131. data/test/unit/bio/sequence/test_aa.rb +102 -0
  132. data/test/unit/bio/sequence/test_common.rb +178 -0
  133. data/test/unit/bio/sequence/test_compat.rb +82 -0
  134. data/test/unit/bio/sequence/test_na.rb +242 -0
  135. data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
  136. data/test/unit/bio/test_alignment.rb +15 -7
  137. data/test/unit/bio/test_reference.rb +198 -0
  138. data/test/unit/bio/test_sequence.rb +4 -49
  139. data/test/unit/bio/test_shell.rb +2 -2
  140. metadata +118 -15
  141. data/lib/bio/io/brdb.rb +0 -103
  142. data/lib/bioruby.rb +0 -34
@@ -1,23 +1,10 @@
1
1
  #
2
- # bio/io/flatfile/indexer.rb - OBDA flatfile indexer
2
+ # = bio/io/flatfile/indexer.rb - OBDA flatfile indexer
3
3
  #
4
- # Copyright (C) 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
4
+ # Copyright:: Copyright (C) 2002 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: Ruby's
5
6
  #
6
- # This library is free software; you can redistribute it and/or
7
- # modify it under the terms of the GNU Lesser General Public
8
- # License as published by the Free Software Foundation; either
9
- # version 2 of the License, or (at your option) any later version.
10
- #
11
- # This library is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
- # Lesser General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU Lesser General Public
17
- # License along with this library; if not, write to the Free Software
18
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
- #
20
- # $Id: indexer.rb,v 1.21 2005/09/26 13:00:08 k Exp $
7
+ # $Id: indexer.rb,v 1.23 2006/02/22 08:41:03 ngoto Exp $
21
8
  #
22
9
 
23
10
  require 'bio/io/flatfile/index'
@@ -80,6 +67,8 @@ module Bio
80
67
  BlastDefaultParser.new(Bio::Blast::WU::Report, *arg)
81
68
  when 'Bio::Blast::WU::Report_TBlast'
82
69
  BlastDefaultParser.new(Bio::Blast::WU::Report_TBlast, *arg)
70
+ when 'Bio::PDB::ChemicalComponent'
71
+ PDBChemicalComponentParser.new(Bio::PDB::ChemicalComponent, *arg)
83
72
  else
84
73
  raise 'unknown or unsupported format'
85
74
  end #case dbclass.to_s
@@ -130,10 +119,10 @@ module Bio
130
119
  attr_reader :fileid
131
120
 
132
121
  def each
133
- pos = @flatfile.pos
134
122
  @flatfile.each do |x|
135
123
  @entry = x
136
- len = @flatfile.entry_raw.length
124
+ pos = @flatfile.entry_start_pos
125
+ len = @flatfile.entry_ended_pos - @flatfile.entry_start_pos
137
126
  begin
138
127
  yield pos, len
139
128
  rescue RuntimeError, NameError => evar
@@ -150,7 +139,6 @@ module Bio
150
139
  DEBUG.print "This entry shall be incorrectly indexed.\n"
151
140
  end
152
141
  end #rescue
153
- pos = @flatfile.pos
154
142
  end
155
143
  end
156
144
 
@@ -204,15 +192,6 @@ module Bio
204
192
  end
205
193
  self.add_secondary_namespaces(*sec_names)
206
194
  end
207
- def open_flatfile(fileid, file)
208
- super
209
- @flatfile.pos = 0
210
- begin
211
- pos = @flatfile.pos
212
- line = @flatfile.gets
213
- end until (!line or line =~ /^LOCUS /)
214
- @flatfile.pos = pos
215
- end
216
195
  end #class GenBankParser
217
196
 
218
197
  class GenPeptParser < GenBankParser
@@ -437,6 +416,35 @@ module Bio
437
416
  end
438
417
  end #class BlastDefaultReportParser
439
418
 
419
+ class PDBChemicalComponentParser < TemplateParser
420
+ NAMESTYLE = NameSpaces.new(
421
+ NameSpace.new( 'UNIQUE', Proc.new { |x| x.entry_id } )
422
+ )
423
+ PRIMARY = 'UNIQUE'
424
+ def initialize(klass, pri_name = nil, sec_names = nil)
425
+ super()
426
+ self.format = 'raw'
427
+ self.dbclass = Bio::PDB::ChemicalComponent
428
+ self.set_primary_namespace((pri_name or PRIMARY))
429
+ unless sec_names then
430
+ sec_names = []
431
+ @namestyle.each_value do |x|
432
+ sec_names << x.name if x.name != self.primary.name
433
+ end
434
+ end
435
+ self.add_secondary_namespaces(*sec_names)
436
+ end
437
+ def open_flatfile(fileid, file)
438
+ super
439
+ @flatfile.pos = 0
440
+ begin
441
+ pos = @flatfile.pos
442
+ line = @flatfile.gets
443
+ end until (!line or line =~ /^RESIDUE /)
444
+ @flatfile.pos = pos
445
+ end
446
+ end #class PDBChemicalComponentParser
447
+
440
448
  end #module Parser
441
449
 
442
450
  def self.makeindexBDB(name, parser, options, *files)
@@ -1,7 +1,23 @@
1
1
  #
2
- # bio/reference.rb - journal reference class
2
+ # = bio/reference.rb - Journal reference classes
3
3
  #
4
- # Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2001
5
+ # KATAYAMA Toshiaki <k@bioruby.org>
6
+ # Lisence:: LGPL
7
+ #
8
+ # $Id: reference.rb,v 1.21 2006/02/08 15:06:26 nakao Exp $
9
+ #
10
+ # == Description
11
+ #
12
+ # Journal reference classes.
13
+ #
14
+ # == Examples
15
+ #
16
+ # == References
17
+ #
18
+ #
19
+ #
20
+ #--
5
21
  #
6
22
  # This library is free software; you can redistribute it and/or
7
23
  # modify it under the terms of the GNU Lesser General Public
@@ -17,13 +33,78 @@
17
33
  # License along with this library; if not, write to the Free Software
18
34
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
35
  #
20
- # $Id: reference.rb,v 1.18 2005/12/18 16:58:58 nakao Exp $
36
+ #++
21
37
  #
22
38
 
23
39
  module Bio
24
40
 
41
+ # A class for journal reference information.
42
+ #
43
+ # === Examples
44
+ #
45
+ # hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
46
+ # 'title' => "Title of the study.",
47
+ # 'journal' => "Theor. J. Hoge",
48
+ # 'volume' => 12,
49
+ # 'issue' => 3,
50
+ # 'pages' => "123-145",
51
+ # 'year' => 2001,
52
+ # 'pubmed' => 12345678,
53
+ # 'medline' => 98765432,
54
+ # 'abstract' => "Hoge fuga. ...",
55
+ # 'url' => "http://example.com",
56
+ # 'mesh' => [],
57
+ # 'affiliations' => []}
58
+ # ref = Bio::Reference.new(hash)
59
+ #
60
+ # # Formats in the BiBTeX style.
61
+ # ref.format("bibtex")
62
+ #
63
+ # # Short-cut for Bio::Reference#format("bibtex")
64
+ # ref.bibtex
65
+ #
25
66
  class Reference
26
67
 
68
+ # Author names in an Array, [ "Hoge, J.P.", "Fuga, F.B." ].
69
+ attr_reader :authors
70
+
71
+ # "Title of the study."
72
+ attr_reader :title
73
+
74
+ # "Theor. J. Hoge"
75
+ attr_reader :journal
76
+
77
+ # 12
78
+ attr_reader :volume
79
+
80
+ # 3
81
+ attr_reader :issue
82
+
83
+ # "123-145"
84
+ attr_reader :pages
85
+
86
+ # 2001
87
+ attr_reader :year
88
+
89
+ # 12345678
90
+ attr_reader :pubmed
91
+
92
+ # 98765432
93
+ attr_reader :medline
94
+
95
+ # Abstract test in String.
96
+ attr_reader :abstract
97
+
98
+ # A URL String.
99
+ attr_reader :url
100
+
101
+ # MeSH terms in an Array.
102
+ attr_reader :mesh
103
+
104
+ # Affiliations in an Array.
105
+ attr_reader :affiliations
106
+
107
+ #
27
108
  def initialize(hash)
28
109
  hash.default = ''
29
110
  @authors = hash['authors'] # [ "Hoge, J.P.", "Fuga, F.B." ]
@@ -43,9 +124,23 @@ module Bio
43
124
  @mesh = [] if @mesh.empty?
44
125
  @affiliations = [] if @affiliations.empty?
45
126
  end
46
- attr_reader :authors, :title, :journal, :volume, :issue, :pages, :year,
47
- :pubmed, :medline, :abstract, :url, :mesh, :affiliations
48
127
 
128
+ # Formats the reference in a given style.
129
+ #
130
+ # Styles:
131
+ # 0. nil - general
132
+ # 1. endnote - Endnote
133
+ # 2. bibitem - Bibitem (option acceptable)
134
+ # 3. bibtex - BiBTeX (option acceptable)
135
+ # 4. rd - rd (option acceptable)
136
+ # 5. nature - Nature (option acceptable)
137
+ # 6. science - Science
138
+ # 7. genome_biol - Genome Biology
139
+ # 8. genome_res - Genome Research
140
+ # 9. nar - Nucleic Acids Research
141
+ # 10. current - Current Biology
142
+ # 11. trends - Trends in *
143
+ # 12. cell - Cell Press
49
144
  def format(style = nil, option = nil)
50
145
  case style
51
146
  when 'endnote'
@@ -77,19 +172,20 @@ module Bio
77
172
  end
78
173
  end
79
174
 
175
+ # Formats in the Endonote style.
80
176
  def endnote
81
177
  lines = []
82
178
  lines << "%0 Journal Article"
83
179
  @authors.each do |author|
84
180
  lines << "%A #{author}"
85
181
  end
86
- lines << "%D #{@year}" unless @year.empty?
182
+ lines << "%D #{@year}" unless @year.to_s.empty?
87
183
  lines << "%T #{@title}" unless @title.empty?
88
184
  lines << "%J #{@journal}" unless @journal.empty?
89
- lines << "%V #{@volume}" unless @volume.empty?
90
- lines << "%N #{@issue}" unless @issue.empty?
185
+ lines << "%V #{@volume}" unless @volume.to_s.empty?
186
+ lines << "%N #{@issue}" unless @issue.to_s.empty?
91
187
  lines << "%P #{@pages}" unless @pages.empty?
92
- lines << "%M #{@pubmed}" unless @pubmed.empty?
188
+ lines << "%M #{@pubmed}" unless @pubmed.to_s.empty?
93
189
  if @pubmed
94
190
  cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
95
191
  opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
@@ -104,6 +200,7 @@ module Bio
104
200
  return lines.join("\n")
105
201
  end
106
202
 
203
+ # Formats in the bibitem.
107
204
  def bibitem(item = nil)
108
205
  item = "PMID:#{@pubmed}" unless item
109
206
  pages = @pages.sub('-', '--')
@@ -115,6 +212,7 @@ module Bio
115
212
  END
116
213
  end
117
214
 
215
+ # Formats in the BiBTeX style.
118
216
  def bibtex(section = nil)
119
217
  section = "article" unless section
120
218
  authors = authors_join(' and ', ' and ')
@@ -132,11 +230,13 @@ module Bio
132
230
  END
133
231
  end
134
232
 
233
+ # Formats in a general style.
135
234
  def general
136
235
  authors = @authors.join(', ')
137
236
  "#{authors} (#{@year}). \"#{@title}\" #{@journal} #{@volume}:#{@pages}."
138
237
  end
139
238
 
239
+ # Formats in the RD style.
140
240
  def rd(str = nil)
141
241
  @abstract ||= str
142
242
  lines = []
@@ -147,6 +247,8 @@ module Bio
147
247
  return lines.join("\n\n")
148
248
  end
149
249
 
250
+ # Formats in the Nature Publish Group style.
251
+ # * http://www.nature.com
150
252
  def nature(short = false)
151
253
  if short
152
254
  if @authors.size > 4
@@ -163,6 +265,8 @@ module Bio
163
265
  end
164
266
  end
165
267
 
268
+ # Formats in the Science style.
269
+ # * http://www.siencemag.com/
166
270
  def science
167
271
  if @authors.size > 4
168
272
  authors = rev_name(@authors[0]) + " et al."
@@ -173,28 +277,40 @@ module Bio
173
277
  "#{authors}, #{@journal} #{@volume} #{page_from} (#{@year})."
174
278
  end
175
279
 
280
+ # Formats in the Genome Biology style.
281
+ # * http://genomebiology.com/
176
282
  def genome_biol
177
283
  authors = @authors.collect {|name| strip_dots(name)}.join(', ')
178
284
  journal = strip_dots(@journal)
179
285
  "#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
180
286
  end
287
+ # Formats in the Current Biology style.
288
+ # * http://www.current-biology.com/
181
289
  alias current genome_biol
182
290
 
291
+ # Formats in the Genome Research style.
292
+ # * http://genome.org/
183
293
  def genome_res
184
294
  authors = authors_join(' and ')
185
295
  "#{authors} #{@year}.\n #{@title} #{@journal} #{@volume}: #{@pages}."
186
296
  end
187
297
 
298
+ # Formats in the Nucleic Acids Reseach style.
299
+ # * http://nar.oxfordjournals.org/
188
300
  def nar
189
301
  authors = authors_join(' and ')
190
302
  "#{authors} (#{@year}) #{@title} #{@journal}, #{@volume}, #{@pages}."
191
303
  end
192
304
 
305
+ # Formats in the CELL Press style.
306
+ # http://www.cell.com/
193
307
  def cell
194
308
  authors = authors_join(' and ')
195
309
  "#{authors} (#{@year}). #{@title} #{@journal} #{@volume}, #{pages}."
196
310
  end
197
-
311
+
312
+ # Formats in the TRENDS Journals.
313
+ # * http://www.trends.com/
198
314
  def trends
199
315
  if @authors.size > 2
200
316
  authors = "#{@authors[0]} et al."
@@ -235,22 +351,37 @@ module Bio
235
351
 
236
352
  end
237
353
 
238
-
354
+ # Set of Bio::Reference.
355
+ #
356
+ # === Examples
357
+ #
358
+ # refs = Bio::References.new
359
+ # refs.append(Bio::Reference.new(hash))
360
+ # refs.each do |reference|
361
+ # ...
362
+ # end
363
+ #
239
364
  class References
240
365
 
366
+ # Array of Bio::Reference.
367
+ attr_accessor :references
368
+
369
+ #
241
370
  def initialize(ary = [])
242
371
  @references = ary
243
372
  end
244
- attr_accessor :references
245
373
 
246
- def append(a)
247
- @references.push(a) if a.is_a? Reference
374
+
375
+ # Append a Bio::Reference object.
376
+ def append(reference)
377
+ @references.push(reference) if reference.is_a? Reference
248
378
  return self
249
379
  end
250
380
 
381
+ # Iterates each Bio::Reference object.
251
382
  def each
252
- @references.each do |x|
253
- yield x
383
+ @references.each do |reference|
384
+ yield reference
254
385
  end
255
386
  end
256
387
 
@@ -258,51 +389,3 @@ module Bio
258
389
 
259
390
  end
260
391
 
261
-
262
-
263
- =begin
264
-
265
- = Bio::Reference
266
-
267
- --- Bio::Reference.new(hash)
268
-
269
- --- Bio::Reference#authors -> Array
270
- --- Bio::Reference#title -> String
271
- --- Bio::Reference#journal -> String
272
- --- Bio::Reference#volume -> Fixnum
273
- --- Bio::Reference#issue -> Fixnum
274
- --- Bio::Reference#pages -> String
275
- --- Bio::Reference#year -> Fixnum
276
- --- Bio::Reference#pubmed -> Fixnum
277
- --- Bio::Reference#medline -> Fixnum
278
- --- Bio::Reference#abstract -> String
279
- --- Bio::Reference#url -> String
280
- --- Bio::Reference#mesh -> Array
281
- --- Bio::Reference#affiliations -> Array
282
-
283
- --- Bio::Reference#format(style = nil, option = nil) -> String
284
-
285
- --- Bio::Reference#endnote
286
- --- Bio::Reference#bibitem(item = nil) -> String
287
- --- Bio::Reference#bibtex(section = nil) -> String
288
- --- Bio::Reference#rd(str = nil) -> String
289
- --- Bio::Reference#nature(short = false) -> String
290
- --- Bio::Reference#science -> String
291
- --- Bio::Reference#genome_biol -> String
292
- --- Bio::Reference#genome_res -> String
293
- --- Bio::Reference#nar -> String
294
- --- Bio::Reference#cell -> String
295
- --- Bio::Reference#trends -> String
296
- --- Bio::Reference#general -> String
297
-
298
- = Bio::References
299
-
300
- --- Bio::References.new(ary = [])
301
-
302
- --- Bio::References#references -> Array
303
- --- Bio::References#append(a) -> Bio::References
304
- --- Bio::References#each -> Array
305
-
306
- =end
307
-
308
-
@@ -1,65 +1,75 @@
1
1
  #
2
2
  # = bio/sequence.rb - biological sequence class
3
3
  #
4
- # Copyright:: Copyright (C) 2000-2005
4
+ # Copyright:: Copyright (C) 2000-2006
5
5
  # Toshiaki Katayama <k@bioruby.org>,
6
- # Yoshinori K. Okuji <okuji@embug.org>,
6
+ # Yoshinori K. Okuji <okuji@enbug.org>,
7
7
  # Naohisa Goto <ng@bioruby.org>
8
- # License:: LGPL
8
+ # License:: Ruby's
9
9
  #
10
- # $Id: sequence.rb,v 0.50 2006/01/20 09:58:31 k Exp $
11
- #
12
- #--
13
- # *TODO* remove this functionality?
14
- # You can use Bio::Seq instead of Bio::Sequence for short.
15
- #++
16
- #
17
- #--
18
- #
19
- # This library is free software; you can redistribute it and/or
20
- # modify it under the terms of the GNU Lesser General Public
21
- # License as published by the Free Software Foundation; either
22
- # version 2 of the License, or (at your option) any later version.
23
- #
24
- # This library is distributed in the hope that it will be useful,
25
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
26
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27
- # Lesser General Public License for more details.
28
- #
29
- # You should have received a copy of the GNU Lesser General Public
30
- # License along with this library; if not, write to the Free Software
31
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32
- #
33
- #++
10
+ # $Id: sequence.rb,v 0.56 2006/02/17 17:15:08 k Exp $
34
11
  #
35
12
 
36
- require 'bio/data/na'
37
- require 'bio/data/aa'
38
- require 'bio/data/codontable'
39
- require 'bio/location'
13
+ require 'bio/sequence/compat'
40
14
 
41
15
  module Bio
42
16
 
43
- # Nucleic/Amino Acid sequence
17
+ class Sequence
44
18
 
45
- class Sequence < String
19
+ autoload :Common, 'bio/sequence/common'
20
+ autoload :NA, 'bio/sequence/na'
21
+ autoload :AA, 'bio/sequence/aa'
22
+ autoload :Generic, 'bio/sequence/generic'
23
+ autoload :Format, 'bio/sequence/format'
46
24
 
47
- def self.auto(str)
48
- moltype = self.guess(str)
49
- if moltype == NA
50
- NA.new(str)
25
+ def initialize(str)
26
+ @seq = str
27
+ end
28
+
29
+ def method_missing(*arg)
30
+ @seq.send(*arg)
31
+ end
32
+
33
+ attr_accessor :entry_id, :definition, :features, :references, :comments,
34
+ :date, :keywords, :dblinks, :taxonomy, :moltype, :seq
35
+
36
+ def output(style)
37
+ extend Bio::Sequence::Format
38
+ case style
39
+ when :fasta
40
+ format_fasta
41
+ when :gff
42
+ format_gff
43
+ when :genbank
44
+ format_genbank
45
+ when :embl
46
+ format_embl
47
+ end
48
+ end
49
+
50
+ def auto
51
+ @moltype = guess
52
+ if @moltype == NA
53
+ @seq = NA.new(@seq)
51
54
  else
52
- AA.new(str)
55
+ @seq = AA.new(@seq)
53
56
  end
54
57
  end
55
58
 
56
- def guess(threshold = 0.9)
57
- cmp = self.composition
59
+ def self.auto(str)
60
+ seq = self.new(str)
61
+ seq.auto
62
+ return seq
63
+ end
64
+
65
+ def guess(threshold = 0.9, length = 10000, index = 0)
66
+ str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
67
+ cmp = str.composition
58
68
 
59
69
  bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] +
60
70
  cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
61
71
 
62
- total = self.length - cmp['N'] - cmp['n']
72
+ total = @seq.length - cmp['N'] - cmp['n']
63
73
 
64
74
  if bases.to_f / total > threshold
65
75
  return NA
@@ -72,389 +82,19 @@ class Sequence < String
72
82
  self.new(str).guess(*args)
73
83
  end
74
84
 
75
- def to_s
76
- String.new(self)
77
- end
78
- alias to_str to_s
79
-
80
- # Force self to re-initialize for clean up (remove white spaces,
81
- # case unification).
82
- def seq
83
- self.class.new(self)
84
- end
85
-
86
- # Similar to the 'seq' method, but changes the self object destructively.
87
- def normalize!
88
- initialize(self)
89
- self
90
- end
91
- alias seq! normalize!
92
-
93
- def <<(*arg)
94
- super(self.class.new(*arg))
85
+ def na
86
+ @seq = NA.new(@seq)
87
+ @moltype = NA
95
88
  end
96
- alias concat <<
97
-
98
- def +(*arg)
99
- self.class.new(super(*arg))
100
- end
101
-
102
- # Returns the subsequence of the self string.
103
- def subseq(s = 1, e = self.length)
104
- return nil if s < 1 or e < 1
105
- s -= 1
106
- e -= 1
107
- self[s..e]
108
- end
109
-
110
- # Output the FASTA format string of the sequence. The 1st argument is
111
- # used as the comment string. If the 2nd option is given, the output
112
- # sequence will be folded.
113
- def to_fasta(header = '', width = nil)
114
- ">#{header}\n" +
115
- if width
116
- self.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
117
- else
118
- self.to_s + "\n"
119
- end
120
- end
121
-
122
- # This method iterates on sub string with specified length 'window_size'.
123
- # By specifing 'step_size', codon sized shifting or spliting genome
124
- # sequence with ovelapping each end can easily be yielded.
125
- #
126
- # The remainder sequence at the terminal end will be returned.
127
- #
128
- # Example:
129
- # # prints average GC% on each 100bp
130
- # seq.window_search(100) do |subseq|
131
- # puts subseq.gc
132
- # end
133
- # # prints every translated peptide (length 5aa) in the same frame
134
- # seq.window_search(15, 3) do |subseq|
135
- # puts subseq.translate
136
- # end
137
- # # split genome sequence by 10000bp with 1000bp overlap in fasta format
138
- # i = 1
139
- # remainder = seq.window_search(10000, 9000) do |subseq|
140
- # puts subseq.to_fasta("segment #{i}", 60)
141
- # i += 1
142
- # end
143
- # puts remainder.to_fasta("segment #{i}", 60)
144
- #
145
- def window_search(window_size, step_size = 1)
146
- i = 0
147
- 0.step(self.length - window_size, step_size) do |i|
148
- yield self[i, window_size]
149
- end
150
- return self[i + window_size .. -1]
151
- end
152
-
153
- # This method receive a hash of residues/bases to the particular values,
154
- # and sum up the value along with the self sequence. Especially useful
155
- # to use with the window_search method and amino acid indices etc.
156
- def total(hash)
157
- hash.default = 0.0 unless hash.default
158
- sum = 0.0
159
- self.each_byte do |x|
160
- begin
161
- sum += hash[x.chr]
162
- end
163
- end
164
- return sum
165
- end
166
-
167
- # Returns a hash of the occurrence counts for each residue or base.
168
- def composition
169
- count = Hash.new(0)
170
- self.scan(/./) do |x|
171
- count[x] += 1
172
- end
173
- return count
174
- end
175
-
176
- # Returns a randomized sequence keeping its composition by default.
177
- # The argument is required when generating a random sequence from the empty
178
- # sequence (used by the class methods NA.randomize, AA.randomize).
179
- # If the block is given, yields for each random residue/base.
180
- def randomize(hash = nil)
181
- length = self.length
182
- if hash
183
- count = hash.clone
184
- count.each_value {|x| length += x}
185
- else
186
- count = self.composition
187
- end
188
-
189
- seq = ''
190
- tmp = {}
191
- length.times do
192
- count.each do |k, v|
193
- tmp[k] = v * rand
194
- end
195
- max = tmp.max {|a, b| a[1] <=> b[1]}
196
- count[max.first] -= 1
197
-
198
- if block_given?
199
- yield max.first
200
- else
201
- seq += max.first
202
- end
203
- end
204
- return self.class.new(seq)
205
- end
206
-
207
- # Generate a new random sequence with the given frequency of bases
208
- # or residues. The sequence length is determined by the sum of each
209
- # base/residue occurences.
210
- def self.randomize(*arg, &block)
211
- self.new('').randomize(*arg, &block)
212
- end
213
-
214
- # Receive a GenBank style position string and convert it to the Locations
215
- # objects to splice the sequence itself. See also: bio/location.rb
216
- #
217
- # This method depends on Locations class, see bio/location.rb
218
- def splicing(position)
219
- unless position.is_a?(Locations) then
220
- position = Locations.new(position)
221
- end
222
- s = ''
223
- position.each do |location|
224
- if location.sequence
225
- s << location.sequence
226
- else
227
- exon = self.subseq(location.from, location.to)
228
- begin
229
- exon.complement! if location.strand < 0
230
- rescue NameError
231
- end
232
- s << exon
233
- end
234
- end
235
- return self.class.new(s)
236
- end
237
-
238
-
239
- # Nucleic Acid sequence
240
-
241
- class NA < Sequence
242
-
243
- # Generate a nucleic acid sequence object from a string.
244
- def initialize(str)
245
- super
246
- self.downcase!
247
- self.tr!(" \t\n\r",'')
248
- end
249
-
250
- # This method depends on Locations class, see bio/location.rb
251
- def splicing(position)
252
- mRNA = super
253
- if mRNA.rna?
254
- mRNA.tr!('t', 'u')
255
- else
256
- mRNA.tr!('u', 't')
257
- end
258
- mRNA
259
- end
260
-
261
- # Returns complement sequence without reversing ("atgc" -> "tacg")
262
- def forward_complement
263
- s = self.class.new(self)
264
- s.forward_complement!
265
- s
266
- end
267
-
268
- # Convert to complement sequence without reversing ("atgc" -> "tacg")
269
- def forward_complement!
270
- if self.rna?
271
- self.tr!('augcrymkdhvbswn', 'uacgyrkmhdbvswn')
272
- else
273
- self.tr!('atgcrymkdhvbswn', 'tacgyrkmhdbvswn')
274
- end
275
- self
276
- end
277
-
278
- # Returns reverse complement sequence ("atgc" -> "gcat")
279
- def reverse_complement
280
- s = self.class.new(self)
281
- s.reverse_complement!
282
- s
283
- end
284
-
285
- # Convert to reverse complement sequence ("atgc" -> "gcat")
286
- def reverse_complement!
287
- self.reverse!
288
- self.forward_complement!
289
- end
290
-
291
- # Aliases for short
292
- alias complement reverse_complement
293
- alias complement! reverse_complement!
294
-
295
-
296
- # Translate into the amino acid sequence from the given frame and the
297
- # selected codon table. The table also can be a Bio::CodonTable object.
298
- # The 'unknown' character is used for invalid/unknown codon (can be
299
- # used for 'nnn' and/or gap translation in practice).
300
- #
301
- # Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
302
- # (4, 5 or 6 is also accepted) for the reverse strand.
303
- def translate(frame = 1, table = 1, unknown = 'X')
304
- if table.is_a?(Bio::CodonTable)
305
- ct = table
306
- else
307
- ct = Bio::CodonTable[table]
308
- end
309
- naseq = self.dna
310
- case frame
311
- when 1, 2, 3
312
- from = frame - 1
313
- when 4, 5, 6
314
- from = frame - 4
315
- naseq.complement!
316
- when -1, -2, -3
317
- from = -1 - frame
318
- naseq.complement!
319
- else
320
- from = 0
321
- end
322
- nalen = naseq.length - from
323
- nalen -= nalen % 3
324
- aaseq = naseq[from, nalen].gsub(/.{3}/) {|codon| ct[codon] or unknown}
325
- return Bio::Sequence::AA.new(aaseq)
326
- end
327
-
328
- # Returns counts of the each codon in the sequence by Hash.
329
- def codon_usage
330
- hash = Hash.new(0)
331
- self.window_search(3, 3) do |codon|
332
- hash[codon] += 1
333
- end
334
- return hash
335
- end
336
-
337
- # Calculate the ratio of GC / ATGC bases in percent.
338
- def gc_percent
339
- count = self.composition
340
- at = count['a'] + count['t'] + count['u']
341
- gc = count['g'] + count['c']
342
- gc = 100 * gc / (at + gc)
343
- return gc
344
- end
345
-
346
- # Show abnormal bases other than 'atgcu'.
347
- def illegal_bases
348
- self.scan(/[^atgcu]/).sort.uniq
349
- end
350
-
351
- # Estimate the weight of this biological string molecule.
352
- # NucleicAcid is defined in bio/data/na.rb
353
- def molecular_weight
354
- if self.rna?
355
- NucleicAcid.weight(self, true)
356
- else
357
- NucleicAcid.weight(self)
358
- end
359
- end
360
-
361
- # Convert the universal code string into the regular expression.
362
- def to_re
363
- if self.rna?
364
- NucleicAcid.to_re(self.dna, true)
365
- else
366
- NucleicAcid.to_re(self)
367
- end
368
- end
369
-
370
- # Convert the self string into the list of the names of the each base.
371
- def names
372
- array = []
373
- self.each_byte do |x|
374
- array.push(NucleicAcid.names[x.chr.upcase])
375
- end
376
- return array
377
- end
378
-
379
- # Output a DNA string by substituting 'u' to 't'.
380
- def dna
381
- self.tr('u', 't')
382
- end
383
-
384
- def dna!
385
- self.tr!('u', 't')
386
- end
387
-
388
- # Output a RNA string by substituting 't' to 'u'.
389
- def rna
390
- self.tr('t', 'u')
391
- end
392
-
393
- def rna!
394
- self.tr!('t', 'u')
395
- end
396
-
397
- def rna?
398
- self.index('u')
399
- end
400
- protected :rna?
401
-
402
- def pikachu
403
- self.dna.tr("atgc", "pika") # joke, of course :-)
404
- end
405
-
406
- end
407
-
408
-
409
- # Amino Acid sequence
410
-
411
- class AA < Sequence
412
-
413
- # Generate a amino acid sequence object from a string.
414
- def initialize(str)
415
- super
416
- self.upcase!
417
- self.tr!(" \t\n\r",'')
418
- end
419
-
420
- # Estimate the weight of this protein.
421
- # AminoAcid is defined in bio/data/aa.rb
422
- def molecular_weight
423
- AminoAcid.weight(self)
424
- end
425
-
426
- def to_re
427
- AminoAcid.to_re(self)
428
- end
429
-
430
- # Generate the list of the names of the each residue along with the
431
- # sequence (3 letters code).
432
- def codes
433
- array = []
434
- self.each_byte do |x|
435
- array.push(AminoAcid.names[x.chr])
436
- end
437
- return array
438
- end
439
-
440
- # Similar to codes but returns long names.
441
- def names
442
- self.codes.map do |x|
443
- AminoAcid.names[x]
444
- end
445
- end
446
89
 
90
+ def aa
91
+ @seq = AA.new(@seq)
92
+ @moltype = AA
447
93
  end
448
94
 
449
95
  end # Sequence
450
96
 
451
97
 
452
- class Seq < Sequence
453
- attr_accessor :entry_id, :definition, :features, :references, :comments,
454
- :date, :keywords, :dblinks, :taxonomy, :moltype
455
- end
456
-
457
-
458
98
  end # Bio
459
99
 
460
100