bio 0.7.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. data/bin/bioruby +71 -27
  2. data/bin/br_biofetch.rb +5 -17
  3. data/bin/br_bioflat.rb +14 -26
  4. data/bin/br_biogetseq.rb +6 -18
  5. data/bin/br_pmfetch.rb +6 -16
  6. data/doc/Changes-0.7.rd +35 -0
  7. data/doc/KEGG_API.rd +287 -172
  8. data/doc/KEGG_API.rd.ja +273 -160
  9. data/doc/Tutorial.rd +18 -9
  10. data/doc/Tutorial.rd.ja +656 -138
  11. data/lib/bio.rb +6 -24
  12. data/lib/bio/alignment.rb +5 -5
  13. data/lib/bio/appl/blast.rb +132 -98
  14. data/lib/bio/appl/blast/format0.rb +9 -19
  15. data/lib/bio/appl/blast/wublast.rb +5 -18
  16. data/lib/bio/appl/emboss.rb +40 -47
  17. data/lib/bio/appl/hmmer.rb +116 -82
  18. data/lib/bio/appl/hmmer/report.rb +509 -364
  19. data/lib/bio/appl/spidey/report.rb +7 -18
  20. data/lib/bio/data/na.rb +3 -21
  21. data/lib/bio/db.rb +3 -21
  22. data/lib/bio/db/aaindex.rb +147 -52
  23. data/lib/bio/db/embl/common.rb +27 -6
  24. data/lib/bio/db/embl/embl.rb +18 -10
  25. data/lib/bio/db/embl/sptr.rb +87 -67
  26. data/lib/bio/db/embl/swissprot.rb +32 -3
  27. data/lib/bio/db/embl/trembl.rb +32 -3
  28. data/lib/bio/db/embl/uniprot.rb +32 -3
  29. data/lib/bio/db/fasta.rb +327 -289
  30. data/lib/bio/db/medline.rb +25 -4
  31. data/lib/bio/db/nbrf.rb +12 -20
  32. data/lib/bio/db/pdb.rb +4 -1
  33. data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
  34. data/lib/bio/db/pdb/pdb.rb +13 -8
  35. data/lib/bio/db/rebase.rb +93 -97
  36. data/lib/bio/feature.rb +2 -31
  37. data/lib/bio/io/ddbjxml.rb +167 -139
  38. data/lib/bio/io/fastacmd.rb +89 -56
  39. data/lib/bio/io/flatfile.rb +994 -278
  40. data/lib/bio/io/flatfile/index.rb +257 -194
  41. data/lib/bio/io/flatfile/indexer.rb +37 -29
  42. data/lib/bio/reference.rb +147 -64
  43. data/lib/bio/sequence.rb +57 -417
  44. data/lib/bio/sequence/aa.rb +64 -0
  45. data/lib/bio/sequence/common.rb +175 -0
  46. data/lib/bio/sequence/compat.rb +68 -0
  47. data/lib/bio/sequence/format.rb +134 -0
  48. data/lib/bio/sequence/generic.rb +24 -0
  49. data/lib/bio/sequence/na.rb +189 -0
  50. data/lib/bio/shell.rb +9 -23
  51. data/lib/bio/shell/core.rb +130 -125
  52. data/lib/bio/shell/demo.rb +143 -0
  53. data/lib/bio/shell/{session.rb → interface.rb} +42 -40
  54. data/lib/bio/shell/object.rb +52 -0
  55. data/lib/bio/shell/plugin/codon.rb +4 -22
  56. data/lib/bio/shell/plugin/emboss.rb +23 -0
  57. data/lib/bio/shell/plugin/entry.rb +34 -25
  58. data/lib/bio/shell/plugin/flatfile.rb +5 -23
  59. data/lib/bio/shell/plugin/keggapi.rb +11 -24
  60. data/lib/bio/shell/plugin/midi.rb +5 -23
  61. data/lib/bio/shell/plugin/obda.rb +4 -22
  62. data/lib/bio/shell/plugin/seq.rb +6 -24
  63. data/lib/bio/shell/rails/Rakefile +10 -0
  64. data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
  65. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
  66. data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
  67. data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
  68. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
  69. data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
  70. data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
  71. data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
  72. data/lib/bio/shell/rails/config/boot.rb +19 -0
  73. data/lib/bio/shell/rails/config/database.yml +85 -0
  74. data/lib/bio/shell/rails/config/environment.rb +53 -0
  75. data/lib/bio/shell/rails/config/environments/development.rb +19 -0
  76. data/lib/bio/shell/rails/config/environments/production.rb +19 -0
  77. data/lib/bio/shell/rails/config/environments/test.rb +19 -0
  78. data/lib/bio/shell/rails/config/routes.rb +19 -0
  79. data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
  80. data/lib/bio/shell/rails/public/404.html +8 -0
  81. data/lib/bio/shell/rails/public/500.html +8 -0
  82. data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
  83. data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
  84. data/lib/bio/shell/rails/public/dispatch.rb +10 -0
  85. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  86. data/lib/bio/shell/rails/public/images/icon.png +0 -0
  87. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  88. data/lib/bio/shell/rails/public/index.html +277 -0
  89. data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
  90. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
  91. data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
  92. data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
  93. data/lib/bio/shell/rails/public/robots.txt +1 -0
  94. data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
  95. data/lib/bio/shell/rails/script/about +3 -0
  96. data/lib/bio/shell/rails/script/breakpointer +3 -0
  97. data/lib/bio/shell/rails/script/console +3 -0
  98. data/lib/bio/shell/rails/script/destroy +3 -0
  99. data/lib/bio/shell/rails/script/generate +3 -0
  100. data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
  101. data/lib/bio/shell/rails/script/performance/profiler +3 -0
  102. data/lib/bio/shell/rails/script/plugin +3 -0
  103. data/lib/bio/shell/rails/script/process/reaper +3 -0
  104. data/lib/bio/shell/rails/script/process/spawner +3 -0
  105. data/lib/bio/shell/rails/script/process/spinner +3 -0
  106. data/lib/bio/shell/rails/script/runner +3 -0
  107. data/lib/bio/shell/rails/script/server +42 -0
  108. data/lib/bio/shell/rails/test/test_helper.rb +28 -0
  109. data/lib/bio/shell/web.rb +90 -0
  110. data/lib/bio/util/contingency_table.rb +231 -225
  111. data/sample/any2fasta.rb +59 -0
  112. data/test/data/HMMER/hmmpfam.out +64 -0
  113. data/test/data/HMMER/hmmsearch.out +88 -0
  114. data/test/data/aaindex/DAYM780301 +30 -0
  115. data/test/data/aaindex/PRAM900102 +20 -0
  116. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  117. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  118. data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
  119. data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
  120. data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
  121. data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
  122. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  123. data/test/unit/bio/appl/blast/test_report.rb +15 -12
  124. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
  125. data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
  126. data/test/unit/bio/appl/test_blast.rb +5 -5
  127. data/test/unit/bio/data/test_na.rb +9 -18
  128. data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
  129. data/test/unit/bio/db/test_aaindex.rb +197 -0
  130. data/test/unit/bio/io/test_fastacmd.rb +55 -0
  131. data/test/unit/bio/sequence/test_aa.rb +102 -0
  132. data/test/unit/bio/sequence/test_common.rb +178 -0
  133. data/test/unit/bio/sequence/test_compat.rb +82 -0
  134. data/test/unit/bio/sequence/test_na.rb +242 -0
  135. data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
  136. data/test/unit/bio/test_alignment.rb +15 -7
  137. data/test/unit/bio/test_reference.rb +198 -0
  138. data/test/unit/bio/test_sequence.rb +4 -49
  139. data/test/unit/bio/test_shell.rb +2 -2
  140. metadata +118 -15
  141. data/lib/bio/io/brdb.rb +0 -103
  142. data/lib/bioruby.rb +0 -34
@@ -1,23 +1,10 @@
1
1
  #
2
- # bio/io/flatfile/indexer.rb - OBDA flatfile indexer
2
+ # = bio/io/flatfile/indexer.rb - OBDA flatfile indexer
3
3
  #
4
- # Copyright (C) 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
4
+ # Copyright:: Copyright (C) 2002 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: Ruby's
5
6
  #
6
- # This library is free software; you can redistribute it and/or
7
- # modify it under the terms of the GNU Lesser General Public
8
- # License as published by the Free Software Foundation; either
9
- # version 2 of the License, or (at your option) any later version.
10
- #
11
- # This library is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
- # Lesser General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU Lesser General Public
17
- # License along with this library; if not, write to the Free Software
18
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
- #
20
- # $Id: indexer.rb,v 1.21 2005/09/26 13:00:08 k Exp $
7
+ # $Id: indexer.rb,v 1.23 2006/02/22 08:41:03 ngoto Exp $
21
8
  #
22
9
 
23
10
  require 'bio/io/flatfile/index'
@@ -80,6 +67,8 @@ module Bio
80
67
  BlastDefaultParser.new(Bio::Blast::WU::Report, *arg)
81
68
  when 'Bio::Blast::WU::Report_TBlast'
82
69
  BlastDefaultParser.new(Bio::Blast::WU::Report_TBlast, *arg)
70
+ when 'Bio::PDB::ChemicalComponent'
71
+ PDBChemicalComponentParser.new(Bio::PDB::ChemicalComponent, *arg)
83
72
  else
84
73
  raise 'unknown or unsupported format'
85
74
  end #case dbclass.to_s
@@ -130,10 +119,10 @@ module Bio
130
119
  attr_reader :fileid
131
120
 
132
121
  def each
133
- pos = @flatfile.pos
134
122
  @flatfile.each do |x|
135
123
  @entry = x
136
- len = @flatfile.entry_raw.length
124
+ pos = @flatfile.entry_start_pos
125
+ len = @flatfile.entry_ended_pos - @flatfile.entry_start_pos
137
126
  begin
138
127
  yield pos, len
139
128
  rescue RuntimeError, NameError => evar
@@ -150,7 +139,6 @@ module Bio
150
139
  DEBUG.print "This entry shall be incorrectly indexed.\n"
151
140
  end
152
141
  end #rescue
153
- pos = @flatfile.pos
154
142
  end
155
143
  end
156
144
 
@@ -204,15 +192,6 @@ module Bio
204
192
  end
205
193
  self.add_secondary_namespaces(*sec_names)
206
194
  end
207
- def open_flatfile(fileid, file)
208
- super
209
- @flatfile.pos = 0
210
- begin
211
- pos = @flatfile.pos
212
- line = @flatfile.gets
213
- end until (!line or line =~ /^LOCUS /)
214
- @flatfile.pos = pos
215
- end
216
195
  end #class GenBankParser
217
196
 
218
197
  class GenPeptParser < GenBankParser
@@ -437,6 +416,35 @@ module Bio
437
416
  end
438
417
  end #class BlastDefaultReportParser
439
418
 
419
+ class PDBChemicalComponentParser < TemplateParser
420
+ NAMESTYLE = NameSpaces.new(
421
+ NameSpace.new( 'UNIQUE', Proc.new { |x| x.entry_id } )
422
+ )
423
+ PRIMARY = 'UNIQUE'
424
+ def initialize(klass, pri_name = nil, sec_names = nil)
425
+ super()
426
+ self.format = 'raw'
427
+ self.dbclass = Bio::PDB::ChemicalComponent
428
+ self.set_primary_namespace((pri_name or PRIMARY))
429
+ unless sec_names then
430
+ sec_names = []
431
+ @namestyle.each_value do |x|
432
+ sec_names << x.name if x.name != self.primary.name
433
+ end
434
+ end
435
+ self.add_secondary_namespaces(*sec_names)
436
+ end
437
+ def open_flatfile(fileid, file)
438
+ super
439
+ @flatfile.pos = 0
440
+ begin
441
+ pos = @flatfile.pos
442
+ line = @flatfile.gets
443
+ end until (!line or line =~ /^RESIDUE /)
444
+ @flatfile.pos = pos
445
+ end
446
+ end #class PDBChemicalComponentParser
447
+
440
448
  end #module Parser
441
449
 
442
450
  def self.makeindexBDB(name, parser, options, *files)
@@ -1,7 +1,23 @@
1
1
  #
2
- # bio/reference.rb - journal reference class
2
+ # = bio/reference.rb - Journal reference classes
3
3
  #
4
- # Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2001
5
+ # KATAYAMA Toshiaki <k@bioruby.org>
6
+ # Lisence:: LGPL
7
+ #
8
+ # $Id: reference.rb,v 1.21 2006/02/08 15:06:26 nakao Exp $
9
+ #
10
+ # == Description
11
+ #
12
+ # Journal reference classes.
13
+ #
14
+ # == Examples
15
+ #
16
+ # == References
17
+ #
18
+ #
19
+ #
20
+ #--
5
21
  #
6
22
  # This library is free software; you can redistribute it and/or
7
23
  # modify it under the terms of the GNU Lesser General Public
@@ -17,13 +33,78 @@
17
33
  # License along with this library; if not, write to the Free Software
18
34
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
35
  #
20
- # $Id: reference.rb,v 1.18 2005/12/18 16:58:58 nakao Exp $
36
+ #++
21
37
  #
22
38
 
23
39
  module Bio
24
40
 
41
+ # A class for journal reference information.
42
+ #
43
+ # === Examples
44
+ #
45
+ # hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
46
+ # 'title' => "Title of the study.",
47
+ # 'journal' => "Theor. J. Hoge",
48
+ # 'volume' => 12,
49
+ # 'issue' => 3,
50
+ # 'pages' => "123-145",
51
+ # 'year' => 2001,
52
+ # 'pubmed' => 12345678,
53
+ # 'medline' => 98765432,
54
+ # 'abstract' => "Hoge fuga. ...",
55
+ # 'url' => "http://example.com",
56
+ # 'mesh' => [],
57
+ # 'affiliations' => []}
58
+ # ref = Bio::Reference.new(hash)
59
+ #
60
+ # # Formats in the BiBTeX style.
61
+ # ref.format("bibtex")
62
+ #
63
+ # # Short-cut for Bio::Reference#format("bibtex")
64
+ # ref.bibtex
65
+ #
25
66
  class Reference
26
67
 
68
+ # Author names in an Array, [ "Hoge, J.P.", "Fuga, F.B." ].
69
+ attr_reader :authors
70
+
71
+ # "Title of the study."
72
+ attr_reader :title
73
+
74
+ # "Theor. J. Hoge"
75
+ attr_reader :journal
76
+
77
+ # 12
78
+ attr_reader :volume
79
+
80
+ # 3
81
+ attr_reader :issue
82
+
83
+ # "123-145"
84
+ attr_reader :pages
85
+
86
+ # 2001
87
+ attr_reader :year
88
+
89
+ # 12345678
90
+ attr_reader :pubmed
91
+
92
+ # 98765432
93
+ attr_reader :medline
94
+
95
+ # Abstract test in String.
96
+ attr_reader :abstract
97
+
98
+ # A URL String.
99
+ attr_reader :url
100
+
101
+ # MeSH terms in an Array.
102
+ attr_reader :mesh
103
+
104
+ # Affiliations in an Array.
105
+ attr_reader :affiliations
106
+
107
+ #
27
108
  def initialize(hash)
28
109
  hash.default = ''
29
110
  @authors = hash['authors'] # [ "Hoge, J.P.", "Fuga, F.B." ]
@@ -43,9 +124,23 @@ module Bio
43
124
  @mesh = [] if @mesh.empty?
44
125
  @affiliations = [] if @affiliations.empty?
45
126
  end
46
- attr_reader :authors, :title, :journal, :volume, :issue, :pages, :year,
47
- :pubmed, :medline, :abstract, :url, :mesh, :affiliations
48
127
 
128
+ # Formats the reference in a given style.
129
+ #
130
+ # Styles:
131
+ # 0. nil - general
132
+ # 1. endnote - Endnote
133
+ # 2. bibitem - Bibitem (option acceptable)
134
+ # 3. bibtex - BiBTeX (option acceptable)
135
+ # 4. rd - rd (option acceptable)
136
+ # 5. nature - Nature (option acceptable)
137
+ # 6. science - Science
138
+ # 7. genome_biol - Genome Biology
139
+ # 8. genome_res - Genome Research
140
+ # 9. nar - Nucleic Acids Research
141
+ # 10. current - Current Biology
142
+ # 11. trends - Trends in *
143
+ # 12. cell - Cell Press
49
144
  def format(style = nil, option = nil)
50
145
  case style
51
146
  when 'endnote'
@@ -77,19 +172,20 @@ module Bio
77
172
  end
78
173
  end
79
174
 
175
+ # Formats in the Endonote style.
80
176
  def endnote
81
177
  lines = []
82
178
  lines << "%0 Journal Article"
83
179
  @authors.each do |author|
84
180
  lines << "%A #{author}"
85
181
  end
86
- lines << "%D #{@year}" unless @year.empty?
182
+ lines << "%D #{@year}" unless @year.to_s.empty?
87
183
  lines << "%T #{@title}" unless @title.empty?
88
184
  lines << "%J #{@journal}" unless @journal.empty?
89
- lines << "%V #{@volume}" unless @volume.empty?
90
- lines << "%N #{@issue}" unless @issue.empty?
185
+ lines << "%V #{@volume}" unless @volume.to_s.empty?
186
+ lines << "%N #{@issue}" unless @issue.to_s.empty?
91
187
  lines << "%P #{@pages}" unless @pages.empty?
92
- lines << "%M #{@pubmed}" unless @pubmed.empty?
188
+ lines << "%M #{@pubmed}" unless @pubmed.to_s.empty?
93
189
  if @pubmed
94
190
  cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
95
191
  opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
@@ -104,6 +200,7 @@ module Bio
104
200
  return lines.join("\n")
105
201
  end
106
202
 
203
+ # Formats in the bibitem.
107
204
  def bibitem(item = nil)
108
205
  item = "PMID:#{@pubmed}" unless item
109
206
  pages = @pages.sub('-', '--')
@@ -115,6 +212,7 @@ module Bio
115
212
  END
116
213
  end
117
214
 
215
+ # Formats in the BiBTeX style.
118
216
  def bibtex(section = nil)
119
217
  section = "article" unless section
120
218
  authors = authors_join(' and ', ' and ')
@@ -132,11 +230,13 @@ module Bio
132
230
  END
133
231
  end
134
232
 
233
+ # Formats in a general style.
135
234
  def general
136
235
  authors = @authors.join(', ')
137
236
  "#{authors} (#{@year}). \"#{@title}\" #{@journal} #{@volume}:#{@pages}."
138
237
  end
139
238
 
239
+ # Formats in the RD style.
140
240
  def rd(str = nil)
141
241
  @abstract ||= str
142
242
  lines = []
@@ -147,6 +247,8 @@ module Bio
147
247
  return lines.join("\n\n")
148
248
  end
149
249
 
250
+ # Formats in the Nature Publish Group style.
251
+ # * http://www.nature.com
150
252
  def nature(short = false)
151
253
  if short
152
254
  if @authors.size > 4
@@ -163,6 +265,8 @@ module Bio
163
265
  end
164
266
  end
165
267
 
268
+ # Formats in the Science style.
269
+ # * http://www.siencemag.com/
166
270
  def science
167
271
  if @authors.size > 4
168
272
  authors = rev_name(@authors[0]) + " et al."
@@ -173,28 +277,40 @@ module Bio
173
277
  "#{authors}, #{@journal} #{@volume} #{page_from} (#{@year})."
174
278
  end
175
279
 
280
+ # Formats in the Genome Biology style.
281
+ # * http://genomebiology.com/
176
282
  def genome_biol
177
283
  authors = @authors.collect {|name| strip_dots(name)}.join(', ')
178
284
  journal = strip_dots(@journal)
179
285
  "#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
180
286
  end
287
+ # Formats in the Current Biology style.
288
+ # * http://www.current-biology.com/
181
289
  alias current genome_biol
182
290
 
291
+ # Formats in the Genome Research style.
292
+ # * http://genome.org/
183
293
  def genome_res
184
294
  authors = authors_join(' and ')
185
295
  "#{authors} #{@year}.\n #{@title} #{@journal} #{@volume}: #{@pages}."
186
296
  end
187
297
 
298
+ # Formats in the Nucleic Acids Reseach style.
299
+ # * http://nar.oxfordjournals.org/
188
300
  def nar
189
301
  authors = authors_join(' and ')
190
302
  "#{authors} (#{@year}) #{@title} #{@journal}, #{@volume}, #{@pages}."
191
303
  end
192
304
 
305
+ # Formats in the CELL Press style.
306
+ # http://www.cell.com/
193
307
  def cell
194
308
  authors = authors_join(' and ')
195
309
  "#{authors} (#{@year}). #{@title} #{@journal} #{@volume}, #{pages}."
196
310
  end
197
-
311
+
312
+ # Formats in the TRENDS Journals.
313
+ # * http://www.trends.com/
198
314
  def trends
199
315
  if @authors.size > 2
200
316
  authors = "#{@authors[0]} et al."
@@ -235,22 +351,37 @@ module Bio
235
351
 
236
352
  end
237
353
 
238
-
354
+ # Set of Bio::Reference.
355
+ #
356
+ # === Examples
357
+ #
358
+ # refs = Bio::References.new
359
+ # refs.append(Bio::Reference.new(hash))
360
+ # refs.each do |reference|
361
+ # ...
362
+ # end
363
+ #
239
364
  class References
240
365
 
366
+ # Array of Bio::Reference.
367
+ attr_accessor :references
368
+
369
+ #
241
370
  def initialize(ary = [])
242
371
  @references = ary
243
372
  end
244
- attr_accessor :references
245
373
 
246
- def append(a)
247
- @references.push(a) if a.is_a? Reference
374
+
375
+ # Append a Bio::Reference object.
376
+ def append(reference)
377
+ @references.push(reference) if reference.is_a? Reference
248
378
  return self
249
379
  end
250
380
 
381
+ # Iterates each Bio::Reference object.
251
382
  def each
252
- @references.each do |x|
253
- yield x
383
+ @references.each do |reference|
384
+ yield reference
254
385
  end
255
386
  end
256
387
 
@@ -258,51 +389,3 @@ module Bio
258
389
 
259
390
  end
260
391
 
261
-
262
-
263
- =begin
264
-
265
- = Bio::Reference
266
-
267
- --- Bio::Reference.new(hash)
268
-
269
- --- Bio::Reference#authors -> Array
270
- --- Bio::Reference#title -> String
271
- --- Bio::Reference#journal -> String
272
- --- Bio::Reference#volume -> Fixnum
273
- --- Bio::Reference#issue -> Fixnum
274
- --- Bio::Reference#pages -> String
275
- --- Bio::Reference#year -> Fixnum
276
- --- Bio::Reference#pubmed -> Fixnum
277
- --- Bio::Reference#medline -> Fixnum
278
- --- Bio::Reference#abstract -> String
279
- --- Bio::Reference#url -> String
280
- --- Bio::Reference#mesh -> Array
281
- --- Bio::Reference#affiliations -> Array
282
-
283
- --- Bio::Reference#format(style = nil, option = nil) -> String
284
-
285
- --- Bio::Reference#endnote
286
- --- Bio::Reference#bibitem(item = nil) -> String
287
- --- Bio::Reference#bibtex(section = nil) -> String
288
- --- Bio::Reference#rd(str = nil) -> String
289
- --- Bio::Reference#nature(short = false) -> String
290
- --- Bio::Reference#science -> String
291
- --- Bio::Reference#genome_biol -> String
292
- --- Bio::Reference#genome_res -> String
293
- --- Bio::Reference#nar -> String
294
- --- Bio::Reference#cell -> String
295
- --- Bio::Reference#trends -> String
296
- --- Bio::Reference#general -> String
297
-
298
- = Bio::References
299
-
300
- --- Bio::References.new(ary = [])
301
-
302
- --- Bio::References#references -> Array
303
- --- Bio::References#append(a) -> Bio::References
304
- --- Bio::References#each -> Array
305
-
306
- =end
307
-
308
-
@@ -1,65 +1,75 @@
1
1
  #
2
2
  # = bio/sequence.rb - biological sequence class
3
3
  #
4
- # Copyright:: Copyright (C) 2000-2005
4
+ # Copyright:: Copyright (C) 2000-2006
5
5
  # Toshiaki Katayama <k@bioruby.org>,
6
- # Yoshinori K. Okuji <okuji@embug.org>,
6
+ # Yoshinori K. Okuji <okuji@enbug.org>,
7
7
  # Naohisa Goto <ng@bioruby.org>
8
- # License:: LGPL
8
+ # License:: Ruby's
9
9
  #
10
- # $Id: sequence.rb,v 0.50 2006/01/20 09:58:31 k Exp $
11
- #
12
- #--
13
- # *TODO* remove this functionality?
14
- # You can use Bio::Seq instead of Bio::Sequence for short.
15
- #++
16
- #
17
- #--
18
- #
19
- # This library is free software; you can redistribute it and/or
20
- # modify it under the terms of the GNU Lesser General Public
21
- # License as published by the Free Software Foundation; either
22
- # version 2 of the License, or (at your option) any later version.
23
- #
24
- # This library is distributed in the hope that it will be useful,
25
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
26
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27
- # Lesser General Public License for more details.
28
- #
29
- # You should have received a copy of the GNU Lesser General Public
30
- # License along with this library; if not, write to the Free Software
31
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32
- #
33
- #++
10
+ # $Id: sequence.rb,v 0.56 2006/02/17 17:15:08 k Exp $
34
11
  #
35
12
 
36
- require 'bio/data/na'
37
- require 'bio/data/aa'
38
- require 'bio/data/codontable'
39
- require 'bio/location'
13
+ require 'bio/sequence/compat'
40
14
 
41
15
  module Bio
42
16
 
43
- # Nucleic/Amino Acid sequence
17
+ class Sequence
44
18
 
45
- class Sequence < String
19
+ autoload :Common, 'bio/sequence/common'
20
+ autoload :NA, 'bio/sequence/na'
21
+ autoload :AA, 'bio/sequence/aa'
22
+ autoload :Generic, 'bio/sequence/generic'
23
+ autoload :Format, 'bio/sequence/format'
46
24
 
47
- def self.auto(str)
48
- moltype = self.guess(str)
49
- if moltype == NA
50
- NA.new(str)
25
+ def initialize(str)
26
+ @seq = str
27
+ end
28
+
29
+ def method_missing(*arg)
30
+ @seq.send(*arg)
31
+ end
32
+
33
+ attr_accessor :entry_id, :definition, :features, :references, :comments,
34
+ :date, :keywords, :dblinks, :taxonomy, :moltype, :seq
35
+
36
+ def output(style)
37
+ extend Bio::Sequence::Format
38
+ case style
39
+ when :fasta
40
+ format_fasta
41
+ when :gff
42
+ format_gff
43
+ when :genbank
44
+ format_genbank
45
+ when :embl
46
+ format_embl
47
+ end
48
+ end
49
+
50
+ def auto
51
+ @moltype = guess
52
+ if @moltype == NA
53
+ @seq = NA.new(@seq)
51
54
  else
52
- AA.new(str)
55
+ @seq = AA.new(@seq)
53
56
  end
54
57
  end
55
58
 
56
- def guess(threshold = 0.9)
57
- cmp = self.composition
59
+ def self.auto(str)
60
+ seq = self.new(str)
61
+ seq.auto
62
+ return seq
63
+ end
64
+
65
+ def guess(threshold = 0.9, length = 10000, index = 0)
66
+ str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
67
+ cmp = str.composition
58
68
 
59
69
  bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] +
60
70
  cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
61
71
 
62
- total = self.length - cmp['N'] - cmp['n']
72
+ total = @seq.length - cmp['N'] - cmp['n']
63
73
 
64
74
  if bases.to_f / total > threshold
65
75
  return NA
@@ -72,389 +82,19 @@ class Sequence < String
72
82
  self.new(str).guess(*args)
73
83
  end
74
84
 
75
- def to_s
76
- String.new(self)
77
- end
78
- alias to_str to_s
79
-
80
- # Force self to re-initialize for clean up (remove white spaces,
81
- # case unification).
82
- def seq
83
- self.class.new(self)
84
- end
85
-
86
- # Similar to the 'seq' method, but changes the self object destructively.
87
- def normalize!
88
- initialize(self)
89
- self
90
- end
91
- alias seq! normalize!
92
-
93
- def <<(*arg)
94
- super(self.class.new(*arg))
85
+ def na
86
+ @seq = NA.new(@seq)
87
+ @moltype = NA
95
88
  end
96
- alias concat <<
97
-
98
- def +(*arg)
99
- self.class.new(super(*arg))
100
- end
101
-
102
- # Returns the subsequence of the self string.
103
- def subseq(s = 1, e = self.length)
104
- return nil if s < 1 or e < 1
105
- s -= 1
106
- e -= 1
107
- self[s..e]
108
- end
109
-
110
- # Output the FASTA format string of the sequence. The 1st argument is
111
- # used as the comment string. If the 2nd option is given, the output
112
- # sequence will be folded.
113
- def to_fasta(header = '', width = nil)
114
- ">#{header}\n" +
115
- if width
116
- self.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
117
- else
118
- self.to_s + "\n"
119
- end
120
- end
121
-
122
- # This method iterates on sub string with specified length 'window_size'.
123
- # By specifing 'step_size', codon sized shifting or spliting genome
124
- # sequence with ovelapping each end can easily be yielded.
125
- #
126
- # The remainder sequence at the terminal end will be returned.
127
- #
128
- # Example:
129
- # # prints average GC% on each 100bp
130
- # seq.window_search(100) do |subseq|
131
- # puts subseq.gc
132
- # end
133
- # # prints every translated peptide (length 5aa) in the same frame
134
- # seq.window_search(15, 3) do |subseq|
135
- # puts subseq.translate
136
- # end
137
- # # split genome sequence by 10000bp with 1000bp overlap in fasta format
138
- # i = 1
139
- # remainder = seq.window_search(10000, 9000) do |subseq|
140
- # puts subseq.to_fasta("segment #{i}", 60)
141
- # i += 1
142
- # end
143
- # puts remainder.to_fasta("segment #{i}", 60)
144
- #
145
- def window_search(window_size, step_size = 1)
146
- i = 0
147
- 0.step(self.length - window_size, step_size) do |i|
148
- yield self[i, window_size]
149
- end
150
- return self[i + window_size .. -1]
151
- end
152
-
153
- # This method receive a hash of residues/bases to the particular values,
154
- # and sum up the value along with the self sequence. Especially useful
155
- # to use with the window_search method and amino acid indices etc.
156
- def total(hash)
157
- hash.default = 0.0 unless hash.default
158
- sum = 0.0
159
- self.each_byte do |x|
160
- begin
161
- sum += hash[x.chr]
162
- end
163
- end
164
- return sum
165
- end
166
-
167
- # Returns a hash of the occurrence counts for each residue or base.
168
- def composition
169
- count = Hash.new(0)
170
- self.scan(/./) do |x|
171
- count[x] += 1
172
- end
173
- return count
174
- end
175
-
176
- # Returns a randomized sequence keeping its composition by default.
177
- # The argument is required when generating a random sequence from the empty
178
- # sequence (used by the class methods NA.randomize, AA.randomize).
179
- # If the block is given, yields for each random residue/base.
180
- def randomize(hash = nil)
181
- length = self.length
182
- if hash
183
- count = hash.clone
184
- count.each_value {|x| length += x}
185
- else
186
- count = self.composition
187
- end
188
-
189
- seq = ''
190
- tmp = {}
191
- length.times do
192
- count.each do |k, v|
193
- tmp[k] = v * rand
194
- end
195
- max = tmp.max {|a, b| a[1] <=> b[1]}
196
- count[max.first] -= 1
197
-
198
- if block_given?
199
- yield max.first
200
- else
201
- seq += max.first
202
- end
203
- end
204
- return self.class.new(seq)
205
- end
206
-
207
- # Generate a new random sequence with the given frequency of bases
208
- # or residues. The sequence length is determined by the sum of each
209
- # base/residue occurences.
210
- def self.randomize(*arg, &block)
211
- self.new('').randomize(*arg, &block)
212
- end
213
-
214
- # Receive a GenBank style position string and convert it to the Locations
215
- # objects to splice the sequence itself. See also: bio/location.rb
216
- #
217
- # This method depends on Locations class, see bio/location.rb
218
- def splicing(position)
219
- unless position.is_a?(Locations) then
220
- position = Locations.new(position)
221
- end
222
- s = ''
223
- position.each do |location|
224
- if location.sequence
225
- s << location.sequence
226
- else
227
- exon = self.subseq(location.from, location.to)
228
- begin
229
- exon.complement! if location.strand < 0
230
- rescue NameError
231
- end
232
- s << exon
233
- end
234
- end
235
- return self.class.new(s)
236
- end
237
-
238
-
239
- # Nucleic Acid sequence
240
-
241
- class NA < Sequence
242
-
243
- # Generate a nucleic acid sequence object from a string.
244
- def initialize(str)
245
- super
246
- self.downcase!
247
- self.tr!(" \t\n\r",'')
248
- end
249
-
250
- # This method depends on Locations class, see bio/location.rb
251
- def splicing(position)
252
- mRNA = super
253
- if mRNA.rna?
254
- mRNA.tr!('t', 'u')
255
- else
256
- mRNA.tr!('u', 't')
257
- end
258
- mRNA
259
- end
260
-
261
- # Returns complement sequence without reversing ("atgc" -> "tacg")
262
- def forward_complement
263
- s = self.class.new(self)
264
- s.forward_complement!
265
- s
266
- end
267
-
268
- # Convert to complement sequence without reversing ("atgc" -> "tacg")
269
- def forward_complement!
270
- if self.rna?
271
- self.tr!('augcrymkdhvbswn', 'uacgyrkmhdbvswn')
272
- else
273
- self.tr!('atgcrymkdhvbswn', 'tacgyrkmhdbvswn')
274
- end
275
- self
276
- end
277
-
278
- # Returns reverse complement sequence ("atgc" -> "gcat")
279
- def reverse_complement
280
- s = self.class.new(self)
281
- s.reverse_complement!
282
- s
283
- end
284
-
285
- # Convert to reverse complement sequence ("atgc" -> "gcat")
286
- def reverse_complement!
287
- self.reverse!
288
- self.forward_complement!
289
- end
290
-
291
- # Aliases for short
292
- alias complement reverse_complement
293
- alias complement! reverse_complement!
294
-
295
-
296
- # Translate into the amino acid sequence from the given frame and the
297
- # selected codon table. The table also can be a Bio::CodonTable object.
298
- # The 'unknown' character is used for invalid/unknown codon (can be
299
- # used for 'nnn' and/or gap translation in practice).
300
- #
301
- # Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
302
- # (4, 5 or 6 is also accepted) for the reverse strand.
303
- def translate(frame = 1, table = 1, unknown = 'X')
304
- if table.is_a?(Bio::CodonTable)
305
- ct = table
306
- else
307
- ct = Bio::CodonTable[table]
308
- end
309
- naseq = self.dna
310
- case frame
311
- when 1, 2, 3
312
- from = frame - 1
313
- when 4, 5, 6
314
- from = frame - 4
315
- naseq.complement!
316
- when -1, -2, -3
317
- from = -1 - frame
318
- naseq.complement!
319
- else
320
- from = 0
321
- end
322
- nalen = naseq.length - from
323
- nalen -= nalen % 3
324
- aaseq = naseq[from, nalen].gsub(/.{3}/) {|codon| ct[codon] or unknown}
325
- return Bio::Sequence::AA.new(aaseq)
326
- end
327
-
328
- # Returns counts of the each codon in the sequence by Hash.
329
- def codon_usage
330
- hash = Hash.new(0)
331
- self.window_search(3, 3) do |codon|
332
- hash[codon] += 1
333
- end
334
- return hash
335
- end
336
-
337
- # Calculate the ratio of GC / ATGC bases in percent.
338
- def gc_percent
339
- count = self.composition
340
- at = count['a'] + count['t'] + count['u']
341
- gc = count['g'] + count['c']
342
- gc = 100 * gc / (at + gc)
343
- return gc
344
- end
345
-
346
- # Show abnormal bases other than 'atgcu'.
347
- def illegal_bases
348
- self.scan(/[^atgcu]/).sort.uniq
349
- end
350
-
351
- # Estimate the weight of this biological string molecule.
352
- # NucleicAcid is defined in bio/data/na.rb
353
- def molecular_weight
354
- if self.rna?
355
- NucleicAcid.weight(self, true)
356
- else
357
- NucleicAcid.weight(self)
358
- end
359
- end
360
-
361
- # Convert the universal code string into the regular expression.
362
- def to_re
363
- if self.rna?
364
- NucleicAcid.to_re(self.dna, true)
365
- else
366
- NucleicAcid.to_re(self)
367
- end
368
- end
369
-
370
- # Convert the self string into the list of the names of the each base.
371
- def names
372
- array = []
373
- self.each_byte do |x|
374
- array.push(NucleicAcid.names[x.chr.upcase])
375
- end
376
- return array
377
- end
378
-
379
- # Output a DNA string by substituting 'u' to 't'.
380
- def dna
381
- self.tr('u', 't')
382
- end
383
-
384
- def dna!
385
- self.tr!('u', 't')
386
- end
387
-
388
- # Output a RNA string by substituting 't' to 'u'.
389
- def rna
390
- self.tr('t', 'u')
391
- end
392
-
393
- def rna!
394
- self.tr!('t', 'u')
395
- end
396
-
397
- def rna?
398
- self.index('u')
399
- end
400
- protected :rna?
401
-
402
- def pikachu
403
- self.dna.tr("atgc", "pika") # joke, of course :-)
404
- end
405
-
406
- end
407
-
408
-
409
- # Amino Acid sequence
410
-
411
- class AA < Sequence
412
-
413
- # Generate a amino acid sequence object from a string.
414
- def initialize(str)
415
- super
416
- self.upcase!
417
- self.tr!(" \t\n\r",'')
418
- end
419
-
420
- # Estimate the weight of this protein.
421
- # AminoAcid is defined in bio/data/aa.rb
422
- def molecular_weight
423
- AminoAcid.weight(self)
424
- end
425
-
426
- def to_re
427
- AminoAcid.to_re(self)
428
- end
429
-
430
- # Generate the list of the names of the each residue along with the
431
- # sequence (3 letters code).
432
- def codes
433
- array = []
434
- self.each_byte do |x|
435
- array.push(AminoAcid.names[x.chr])
436
- end
437
- return array
438
- end
439
-
440
- # Similar to codes but returns long names.
441
- def names
442
- self.codes.map do |x|
443
- AminoAcid.names[x]
444
- end
445
- end
446
89
 
90
+ def aa
91
+ @seq = AA.new(@seq)
92
+ @moltype = AA
447
93
  end
448
94
 
449
95
  end # Sequence
450
96
 
451
97
 
452
- class Seq < Sequence
453
- attr_accessor :entry_id, :definition, :features, :references, :comments,
454
- :date, :keywords, :dblinks, :taxonomy, :moltype
455
- end
456
-
457
-
458
98
  end # Bio
459
99
 
460
100