RubyGems - bio - Versions diffs - 0.7.1 → 1.0.0 - Mend

bio 0.7.1 → 1.0.0

Files changed (142) hide show

data/bin/bioruby +71 -27
data/bin/br_biofetch.rb +5 -17
data/bin/br_bioflat.rb +14 -26
data/bin/br_biogetseq.rb +6 -18
data/bin/br_pmfetch.rb +6 -16
data/doc/Changes-0.7.rd +35 -0
data/doc/KEGG_API.rd +287 -172
data/doc/KEGG_API.rd.ja +273 -160
data/doc/Tutorial.rd +18 -9
data/doc/Tutorial.rd.ja +656 -138
data/lib/bio.rb +6 -24
data/lib/bio/alignment.rb +5 -5
data/lib/bio/appl/blast.rb +132 -98
data/lib/bio/appl/blast/format0.rb +9 -19
data/lib/bio/appl/blast/wublast.rb +5 -18
data/lib/bio/appl/emboss.rb +40 -47
data/lib/bio/appl/hmmer.rb +116 -82
data/lib/bio/appl/hmmer/report.rb +509 -364
data/lib/bio/appl/spidey/report.rb +7 -18
data/lib/bio/data/na.rb +3 -21
data/lib/bio/db.rb +3 -21
data/lib/bio/db/aaindex.rb +147 -52
data/lib/bio/db/embl/common.rb +27 -6
data/lib/bio/db/embl/embl.rb +18 -10
data/lib/bio/db/embl/sptr.rb +87 -67
data/lib/bio/db/embl/swissprot.rb +32 -3
data/lib/bio/db/embl/trembl.rb +32 -3
data/lib/bio/db/embl/uniprot.rb +32 -3
data/lib/bio/db/fasta.rb +327 -289
data/lib/bio/db/medline.rb +25 -4
data/lib/bio/db/nbrf.rb +12 -20
data/lib/bio/db/pdb.rb +4 -1
data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
data/lib/bio/db/pdb/pdb.rb +13 -8
data/lib/bio/db/rebase.rb +93 -97
data/lib/bio/feature.rb +2 -31
data/lib/bio/io/ddbjxml.rb +167 -139
data/lib/bio/io/fastacmd.rb +89 -56
data/lib/bio/io/flatfile.rb +994 -278
data/lib/bio/io/flatfile/index.rb +257 -194
data/lib/bio/io/flatfile/indexer.rb +37 -29
data/lib/bio/reference.rb +147 -64
data/lib/bio/sequence.rb +57 -417
data/lib/bio/sequence/aa.rb +64 -0
data/lib/bio/sequence/common.rb +175 -0
data/lib/bio/sequence/compat.rb +68 -0
data/lib/bio/sequence/format.rb +134 -0
data/lib/bio/sequence/generic.rb +24 -0
data/lib/bio/sequence/na.rb +189 -0
data/lib/bio/shell.rb +9 -23
data/lib/bio/shell/core.rb +130 -125
data/lib/bio/shell/demo.rb +143 -0
data/lib/bio/shell/{session.rb → interface.rb} +42 -40
data/lib/bio/shell/object.rb +52 -0
data/lib/bio/shell/plugin/codon.rb +4 -22
data/lib/bio/shell/plugin/emboss.rb +23 -0
data/lib/bio/shell/plugin/entry.rb +34 -25
data/lib/bio/shell/plugin/flatfile.rb +5 -23
data/lib/bio/shell/plugin/keggapi.rb +11 -24
data/lib/bio/shell/plugin/midi.rb +5 -23
data/lib/bio/shell/plugin/obda.rb +4 -22
data/lib/bio/shell/plugin/seq.rb +6 -24
data/lib/bio/shell/rails/Rakefile +10 -0
data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
data/lib/bio/shell/rails/config/boot.rb +19 -0
data/lib/bio/shell/rails/config/database.yml +85 -0
data/lib/bio/shell/rails/config/environment.rb +53 -0
data/lib/bio/shell/rails/config/environments/development.rb +19 -0
data/lib/bio/shell/rails/config/environments/production.rb +19 -0
data/lib/bio/shell/rails/config/environments/test.rb +19 -0
data/lib/bio/shell/rails/config/routes.rb +19 -0
data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
data/lib/bio/shell/rails/public/404.html +8 -0
data/lib/bio/shell/rails/public/500.html +8 -0
data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
data/lib/bio/shell/rails/public/dispatch.rb +10 -0
data/lib/bio/shell/rails/public/favicon.ico +0 -0
data/lib/bio/shell/rails/public/images/icon.png +0 -0
data/lib/bio/shell/rails/public/images/rails.png +0 -0
data/lib/bio/shell/rails/public/index.html +277 -0
data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
data/lib/bio/shell/rails/public/robots.txt +1 -0
data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
data/lib/bio/shell/rails/script/about +3 -0
data/lib/bio/shell/rails/script/breakpointer +3 -0
data/lib/bio/shell/rails/script/console +3 -0
data/lib/bio/shell/rails/script/destroy +3 -0
data/lib/bio/shell/rails/script/generate +3 -0
data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
data/lib/bio/shell/rails/script/performance/profiler +3 -0
data/lib/bio/shell/rails/script/plugin +3 -0
data/lib/bio/shell/rails/script/process/reaper +3 -0
data/lib/bio/shell/rails/script/process/spawner +3 -0
data/lib/bio/shell/rails/script/process/spinner +3 -0
data/lib/bio/shell/rails/script/runner +3 -0
data/lib/bio/shell/rails/script/server +42 -0
data/lib/bio/shell/rails/test/test_helper.rb +28 -0
data/lib/bio/shell/web.rb +90 -0
data/lib/bio/util/contingency_table.rb +231 -225
data/sample/any2fasta.rb +59 -0
data/test/data/HMMER/hmmpfam.out +64 -0
data/test/data/HMMER/hmmsearch.out +88 -0
data/test/data/aaindex/DAYM780301 +30 -0
data/test/data/aaindex/PRAM900102 +20 -0
data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
data/test/unit/bio/appl/blast/test_report.rb +15 -12
data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
data/test/unit/bio/appl/test_blast.rb +5 -5
data/test/unit/bio/data/test_na.rb +9 -18
data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
data/test/unit/bio/db/test_aaindex.rb +197 -0
data/test/unit/bio/io/test_fastacmd.rb +55 -0
data/test/unit/bio/sequence/test_aa.rb +102 -0
data/test/unit/bio/sequence/test_common.rb +178 -0
data/test/unit/bio/sequence/test_compat.rb +82 -0
data/test/unit/bio/sequence/test_na.rb +242 -0
data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
data/test/unit/bio/test_alignment.rb +15 -7
data/test/unit/bio/test_reference.rb +198 -0
data/test/unit/bio/test_sequence.rb +4 -49
data/test/unit/bio/test_shell.rb +2 -2
metadata +118 -15
data/lib/bio/io/brdb.rb +0 -103
data/lib/bioruby.rb +0 -34

data/lib/bio/io/flatfile/indexer.rb CHANGED

@@ -1,23 +1,10 @@
 #
-# bio/io/flatfile/indexer.rb - OBDA flatfile indexer
+# = bio/io/flatfile/indexer.rb - OBDA flatfile indexer
 #
-#   Copyright (C) 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
+# Copyright:: Copyright (C) 2002 GOTO Naohisa <ng@bioruby.org>
+# License::   Ruby's
 #
-#  This library is free software; you can redistribute it and/or
-#  modify it under the terms of the GNU Lesser General Public
-#  License as published by the Free Software Foundation; either
-#  version 2 of the License, or (at your option) any later version.
-#
-#  This library is distributed in the hope that it will be useful,
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-#  Lesser General Public License for more details.
-#
-#  You should have received a copy of the GNU Lesser General Public
-#  License along with this library; if not, write to the Free Software
-#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
-#
-#  $Id: indexer.rb,v 1.21 2005/09/26 13:00:08 k Exp $
+#  $Id: indexer.rb,v 1.23 2006/02/22 08:41:03 ngoto Exp $
 #
 require 'bio/io/flatfile/index'
@@ -80,6 +67,8 @@ module Bio
             BlastDefaultParser.new(Bio::Blast::WU::Report, *arg)
           when 'Bio::Blast::WU::Report_TBlast'
             BlastDefaultParser.new(Bio::Blast::WU::Report_TBlast, *arg)
+          when 'Bio::PDB::ChemicalComponent'
+            PDBChemicalComponentParser.new(Bio::PDB::ChemicalComponent, *arg)
           else
             raise 'unknown or unsupported format'
           end #case dbclass.to_s
@@ -130,10 +119,10 @@ module Bio
           attr_reader :fileid
           def each
-            pos = @flatfile.pos
             @flatfile.each do |x|
               @entry = x
-              len = @flatfile.entry_raw.length
+              pos = @flatfile.entry_start_pos
+              len = @flatfile.entry_ended_pos - @flatfile.entry_start_pos
               begin
                 yield pos, len
               rescue RuntimeError, NameError => evar
@@ -150,7 +139,6 @@ module Bio
                   DEBUG.print "This entry shall be incorrectly indexed.\n"
                 end
               end #rescue
-              pos = @flatfile.pos
             end
           end
@@ -204,15 +192,6 @@ module Bio
             end
             self.add_secondary_namespaces(*sec_names)
           end
-          def open_flatfile(fileid, file)
-            super
-            @flatfile.pos = 0
-            begin
-              pos = @flatfile.pos
-              line = @flatfile.gets
-            end until (!line or line =~ /^LOCUS /)
-            @flatfile.pos = pos
-          end
         end #class GenBankParser
         class GenPeptParser < GenBankParser
@@ -437,6 +416,35 @@ module Bio
           end
         end #class BlastDefaultReportParser
+        class PDBChemicalComponentParser < TemplateParser
+          NAMESTYLE = NameSpaces.new(
+             NameSpace.new( 'UNIQUE', Proc.new { |x| x.entry_id } )
+                                     )
+          PRIMARY = 'UNIQUE'
+          def initialize(klass, pri_name = nil, sec_names = nil)
+            super()
+            self.format = 'raw'
+            self.dbclass = Bio::PDB::ChemicalComponent
+            self.set_primary_namespace((pri_name or PRIMARY))
+            unless sec_names then
+              sec_names = []
+              @namestyle.each_value do |x|
+                sec_names << x.name if x.name != self.primary.name
+              end
+            end
+            self.add_secondary_namespaces(*sec_names)
+          end
+          def open_flatfile(fileid, file)
+            super
+            @flatfile.pos = 0
+            begin
+              pos = @flatfile.pos
+              line = @flatfile.gets
+            end until (!line or line =~ /^RESIDUE /)
+            @flatfile.pos = pos
+          end
+        end #class PDBChemicalComponentParser
       end #module Parser
       def self.makeindexBDB(name, parser, options, *files)

data/lib/bio/reference.rb CHANGED

@@ -1,7 +1,23 @@
 #
-# bio/reference.rb - journal reference class
+# = bio/reference.rb - Journal reference classes
 #
-#   Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
+# Copyright::   Copyright (C) 2001
+#               KATAYAMA Toshiaki <k@bioruby.org>
+# Lisence::     LGPL
+#
+# $Id: reference.rb,v 1.21 2006/02/08 15:06:26 nakao Exp $
+#
+# == Description
+#
+# Journal reference classes.
+#
+# == Examples
+#
+# == References
+#
+#
+#
+#--
 #
 #  This library is free software; you can redistribute it and/or
 #  modify it under the terms of the GNU Lesser General Public
@@ -17,13 +33,78 @@
 #  License along with this library; if not, write to the Free Software
 #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 #
-#  $Id: reference.rb,v 1.18 2005/12/18 16:58:58 nakao Exp $
+#++
 #
 module Bio
+  # A class for journal reference information.
+  #
+  # === Examples
+  #
+  #    hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
+  #            'title' => "Title of the study.",
+  #            'journal' => "Theor. J. Hoge",
+  #            'volume' => 12,
+  #            'issue' => 3,
+  #            'pages' => "123-145",
+  #            'year' => 2001,
+  #            'pubmed' => 12345678,
+  #            'medline' => 98765432,
+  #            'abstract' => "Hoge fuga. ...",
+  #            'url' => "http://example.com",
+  #            'mesh' => [],
+  #            'affiliations' => []}
+  #    ref = Bio::Reference.new(hash)
+  #
+  #    # Formats in the BiBTeX style.
+  #    ref.format("bibtex")
+  #
+  #    # Short-cut for Bio::Reference#format("bibtex")
+  #    ref.bibtex
+  #
   class Reference
+    # Author names in an Array, [ "Hoge, J.P.", "Fuga, F.B." ].
+    attr_reader :authors
+    # "Title of the study."
+    attr_reader :title
+    # "Theor. J. Hoge"
+    attr_reader :journal
+    # 12
+    attr_reader :volume
+    # 3
+    attr_reader :issue
+    # "123-145"
+    attr_reader :pages
+    # 2001
+    attr_reader :year
+    # 12345678
+    attr_reader :pubmed
+    # 98765432
+    attr_reader :medline
+    # Abstract test in String.
+    attr_reader :abstract
+    # A URL String.
+    attr_reader :url
+    # MeSH terms in an Array.
+    attr_reader :mesh
+    # Affiliations in an Array.
+    attr_reader :affiliations
+    #
     def initialize(hash)
       hash.default = ''
       @authors  = hash['authors'] # [ "Hoge, J.P.", "Fuga, F.B." ]
@@ -43,9 +124,23 @@ module Bio
       @mesh    = [] if @mesh.empty?
       @affiliations = [] if @affiliations.empty?
     end
-    attr_reader :authors, :title, :journal, :volume, :issue, :pages, :year,
-      :pubmed, :medline, :abstract, :url, :mesh, :affiliations
+    # Formats the reference in a given style.
+    #
+    # Styles:
+    # 0. nil - general
+    # 1. endnote - Endnote
+    # 2. bibitem - Bibitem (option acceptable)
+    # 3. bibtex - BiBTeX (option acceptable)
+    # 4. rd - rd (option acceptable)
+    # 5. nature - Nature (option acceptable)
+    # 6. science - Science
+    # 7. genome_biol - Genome Biology
+    # 8. genome_res - Genome Research
+    # 9. nar - Nucleic Acids Research
+    # 10. current - Current Biology
+    # 11. trends - Trends in *
+    # 12. cell - Cell Press
     def format(style = nil, option = nil)
       case style
       when 'endnote'
@@ -77,19 +172,20 @@ module Bio
       end
     end
+    # Formats in the Endonote style.
     def endnote
       lines = []
       lines << "%0 Journal Article"
       @authors.each do |author|
         lines << "%A #{author}"
       end
-      lines << "%D #{@year}" unless @year.empty?
+      lines << "%D #{@year}" unless @year.to_s.empty?
       lines << "%T #{@title}" unless @title.empty?
       lines << "%J #{@journal}" unless @journal.empty?
-      lines << "%V #{@volume}" unless @volume.empty?
-      lines << "%N #{@issue}" unless @issue.empty?
+      lines << "%V #{@volume}" unless @volume.to_s.empty?
+      lines << "%N #{@issue}" unless @issue.to_s.empty?
       lines << "%P #{@pages}" unless @pages.empty?
-      lines << "%M #{@pubmed}" unless @pubmed.empty?
+      lines << "%M #{@pubmed}" unless @pubmed.to_s.empty?
       if @pubmed
         cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
         opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
@@ -104,6 +200,7 @@ module Bio
       return lines.join("\n")
     end
+    # Formats in the bibitem.
     def bibitem(item = nil)
       item  = "PMID:#{@pubmed}" unless item
       pages = @pages.sub('-', '--')
@@ -115,6 +212,7 @@ module Bio
       END
     end
+    # Formats in the BiBTeX style.
     def bibtex(section = nil)
       section = "article" unless section
       authors = authors_join(' and ', ' and ')
@@ -132,11 +230,13 @@ module Bio
       END
     end
+    # Formats in a general style.
     def general
       authors = @authors.join(', ')
       "#{authors} (#{@year}). \"#{@title}\" #{@journal} #{@volume}:#{@pages}."
     end
+    # Formats in the RD style.
     def rd(str = nil)
       @abstract ||= str
       lines = []
@@ -147,6 +247,8 @@ module Bio
       return lines.join("\n\n")
     end
+    # Formats in the Nature Publish Group style.
+    # * http://www.nature.com
     def nature(short = false)
       if short
         if @authors.size > 4
@@ -163,6 +265,8 @@ module Bio
       end
     end
+    # Formats in the Science style.
+    # * http://www.siencemag.com/
     def science
       if @authors.size > 4
         authors = rev_name(@authors[0]) + " et al."
@@ -173,28 +277,40 @@ module Bio
       "#{authors}, #{@journal} #{@volume} #{page_from} (#{@year})."
     end
+    # Formats in the Genome Biology style.
+    # * http://genomebiology.com/
     def genome_biol
       authors = @authors.collect {|name| strip_dots(name)}.join(', ')
       journal = strip_dots(@journal)
       "#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
     end
+    # Formats in the Current Biology style.
+    # * http://www.current-biology.com/
     alias current genome_biol
+    # Formats in the Genome Research style.
+    # * http://genome.org/
     def genome_res
       authors = authors_join(' and ')
       "#{authors} #{@year}.\n  #{@title} #{@journal} #{@volume}: #{@pages}."
     end
+    # Formats in the Nucleic Acids Reseach style.
+    # * http://nar.oxfordjournals.org/
     def nar
       authors = authors_join(' and ')
       "#{authors} (#{@year}) #{@title} #{@journal}, #{@volume}, #{@pages}."
     end
+    # Formats in the CELL Press style.
+    # http://www.cell.com/
     def cell
       authors = authors_join(' and ')
       "#{authors} (#{@year}). #{@title} #{@journal} #{@volume}, #{pages}."
     end
+    # Formats in the TRENDS Journals.
+    # * http://www.trends.com/
     def trends
       if @authors.size > 2
         authors = "#{@authors[0]} et al."
@@ -235,22 +351,37 @@ module Bio
   end
+  # Set of Bio::Reference.
+  #
+  # === Examples
+  #
+  #   refs = Bio::References.new
+  #   refs.append(Bio::Reference.new(hash))
+  #   refs.each do |reference|
+  #     ...
+  #   end
+  #
   class References
+    # Array of Bio::Reference.
+    attr_accessor :references
+    #
     def initialize(ary = [])
       @references = ary
     end
-    attr_accessor :references
-    def append(a)
-      @references.push(a) if a.is_a? Reference
+    # Append a Bio::Reference object.
+    def append(reference)
+      @references.push(reference) if reference.is_a? Reference
       return self
     end
+    # Iterates each Bio::Reference object.
     def each
-      @references.each do |x|
-        yield x
+      @references.each do |reference|
+        yield reference
       end
     end
@@ -258,51 +389,3 @@ module Bio
 end
-=begin
-= Bio::Reference
---- Bio::Reference.new(hash)
---- Bio::Reference#authors -> Array
---- Bio::Reference#title -> String
---- Bio::Reference#journal -> String
---- Bio::Reference#volume -> Fixnum
---- Bio::Reference#issue -> Fixnum
---- Bio::Reference#pages -> String
---- Bio::Reference#year -> Fixnum
---- Bio::Reference#pubmed -> Fixnum
---- Bio::Reference#medline -> Fixnum
---- Bio::Reference#abstract -> String
---- Bio::Reference#url -> String
---- Bio::Reference#mesh -> Array
---- Bio::Reference#affiliations -> Array
---- Bio::Reference#format(style = nil, option = nil) -> String
---- Bio::Reference#endnote
---- Bio::Reference#bibitem(item = nil) -> String
---- Bio::Reference#bibtex(section = nil) -> String
---- Bio::Reference#rd(str = nil) -> String
---- Bio::Reference#nature(short = false) -> String
---- Bio::Reference#science -> String
---- Bio::Reference#genome_biol -> String
---- Bio::Reference#genome_res -> String
---- Bio::Reference#nar -> String
---- Bio::Reference#cell -> String
---- Bio::Reference#trends -> String
---- Bio::Reference#general -> String
-= Bio::References
---- Bio::References.new(ary = [])
---- Bio::References#references -> Array
---- Bio::References#append(a) -> Bio::References
---- Bio::References#each -> Array
-=end

data/lib/bio/sequence.rb CHANGED

@@ -1,65 +1,75 @@
 #
 # = bio/sequence.rb - biological sequence class
 #
-# Copyright::   Copyright (C) 2000-2005
+# Copyright::   Copyright (C) 2000-2006
 #               Toshiaki Katayama <k@bioruby.org>,
-#               Yoshinori K. Okuji <okuji@embug.org>,
+#               Yoshinori K. Okuji <okuji@enbug.org>,
 #               Naohisa Goto <ng@bioruby.org>
-# License::     LGPL
+# License::     Ruby's
 #
-# $Id: sequence.rb,v 0.50 2006/01/20 09:58:31 k Exp $
-#
-#--
-# *TODO* remove this functionality?
-# You can use Bio::Seq instead of Bio::Sequence for short.
-#++
-#
-#--
-#
-#  This library is free software; you can redistribute it and/or
-#  modify it under the terms of the GNU Lesser General Public
-#  License as published by the Free Software Foundation; either
-#  version 2 of the License, or (at your option) any later version.
-#
-#  This library is distributed in the hope that it will be useful,
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-#  Lesser General Public License for more details.
-#
-#  You should have received a copy of the GNU Lesser General Public
-#  License along with this library; if not, write to the Free Software
-#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
-#
-#++
+# $Id: sequence.rb,v 0.56 2006/02/17 17:15:08 k Exp $
 #
-require 'bio/data/na'
-require 'bio/data/aa'
-require 'bio/data/codontable'
-require 'bio/location'
+require 'bio/sequence/compat'
 module Bio
-# Nucleic/Amino Acid sequence
+class Sequence
-class Sequence < String
+  autoload :Common,  'bio/sequence/common'
+  autoload :NA,      'bio/sequence/na'
+  autoload :AA,      'bio/sequence/aa'
+  autoload :Generic, 'bio/sequence/generic'
+  autoload :Format,  'bio/sequence/format'
-  def self.auto(str)
-    moltype = self.guess(str)
-    if moltype == NA
-      NA.new(str)
+  def initialize(str)
+    @seq = str
+  end
+  def method_missing(*arg)
+    @seq.send(*arg)
+  end
+  attr_accessor :entry_id, :definition, :features, :references, :comments,
+    :date, :keywords, :dblinks, :taxonomy, :moltype, :seq
+  def output(style)
+    extend Bio::Sequence::Format
+    case style
+    when :fasta
+      format_fasta
+    when :gff
+      format_gff
+    when :genbank
+      format_genbank
+    when :embl
+      format_embl
+    end
+  end
+  def auto
+    @moltype = guess
+    if @moltype == NA
+      @seq = NA.new(@seq)
     else
-      AA.new(str)
+      @seq = AA.new(@seq)
     end
   end
-  def guess(threshold = 0.9)
-    cmp = self.composition
+  def self.auto(str)
+    seq = self.new(str)
+    seq.auto
+    return seq
+  end
+  def guess(threshold = 0.9, length = 10000, index = 0)
+    str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
+    cmp = str.composition
     bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] +
             cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
-    total = self.length - cmp['N'] - cmp['n']
+    total = @seq.length - cmp['N'] - cmp['n']
     if bases.to_f / total > threshold
       return NA
@@ -72,389 +82,19 @@ class Sequence < String
     self.new(str).guess(*args)
   end
-  def to_s
-    String.new(self)
-  end
-  alias to_str to_s
-  # Force self to re-initialize for clean up (remove white spaces,
-  # case unification).
-  def seq
-    self.class.new(self)
-  end
-  # Similar to the 'seq' method, but changes the self object destructively.
-  def normalize!
-    initialize(self)
-    self
-  end
-  alias seq! normalize!
-  def <<(*arg)
-    super(self.class.new(*arg))
+  def na
+    @seq = NA.new(@seq)
+    @moltype = NA
   end
-  alias concat <<
-  def +(*arg)
-    self.class.new(super(*arg))
-  end
-  # Returns the subsequence of the self string.
-  def subseq(s = 1, e = self.length)
-    return nil if s < 1 or e < 1
-    s -= 1
-    e -= 1
-    self[s..e]
-  end
-  # Output the FASTA format string of the sequence.  The 1st argument is
-  # used as the comment string.  If the 2nd option is given, the output
-  # sequence will be folded.
-  def to_fasta(header = '', width = nil)
-    ">#{header}\n" +
-    if width
-      self.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
-    else
-      self.to_s + "\n"
-    end
-  end
-  # This method iterates on sub string with specified length 'window_size'.
-  # By specifing 'step_size', codon sized shifting or spliting genome
-  # sequence with ovelapping each end can easily be yielded.
-  #
-  # The remainder sequence at the terminal end will be returned.
-  #
-  # Example:
-  #   # prints average GC% on each 100bp
-  #   seq.window_search(100) do |subseq|
-  #     puts subseq.gc
-  #   end
-  #   # prints every translated peptide (length 5aa) in the same frame
-  #   seq.window_search(15, 3) do |subseq|
-  #     puts subseq.translate
-  #   end
-  #   # split genome sequence by 10000bp with 1000bp overlap in fasta format
-  #   i = 1
-  #   remainder = seq.window_search(10000, 9000) do |subseq|
-  #     puts subseq.to_fasta("segment #{i}", 60)
-  #     i += 1
-  #   end
-  #   puts remainder.to_fasta("segment #{i}", 60)
-  #
-  def window_search(window_size, step_size = 1)
-    i = 0
-    0.step(self.length - window_size, step_size) do |i|
-      yield self[i, window_size]
-    end
-    return self[i + window_size .. -1]
-  end
-  # This method receive a hash of residues/bases to the particular values,
-  # and sum up the value along with the self sequence.  Especially useful
-  # to use with the window_search method and amino acid indices etc.
-  def total(hash)
-    hash.default = 0.0 unless hash.default
-    sum = 0.0
-    self.each_byte do |x|
-      begin
-        sum += hash[x.chr]
-      end
-    end
-    return sum
-  end
-  # Returns a hash of the occurrence counts for each residue or base.
-  def composition
-    count = Hash.new(0)
-    self.scan(/./) do |x|
-      count[x] += 1
-    end
-    return count
-  end
-  # Returns a randomized sequence keeping its composition by default.
-  # The argument is required when generating a random sequence from the empty
-  # sequence (used by the class methods NA.randomize, AA.randomize).
-  # If the block is given, yields for each random residue/base.
-  def randomize(hash = nil)
-    length = self.length
-    if hash
-      count = hash.clone
-      count.each_value {|x| length += x}
-    else
-      count = self.composition
-    end
-    seq = ''
-    tmp = {}
-    length.times do
-      count.each do |k, v|
-        tmp[k] = v * rand
-      end
-      max = tmp.max {|a, b| a[1] <=> b[1]}
-      count[max.first] -= 1
-      if block_given?
-        yield max.first
-      else
-        seq += max.first
-      end
-    end
-    return self.class.new(seq)
-  end
-  # Generate a new random sequence with the given frequency of bases
-  # or residues.  The sequence length is determined by the sum of each
-  # base/residue occurences.
-  def self.randomize(*arg, &block)
-    self.new('').randomize(*arg, &block)
-  end
-  # Receive a GenBank style position string and convert it to the Locations
-  # objects to splice the sequence itself.  See also: bio/location.rb
-  #
-  # This method depends on Locations class, see bio/location.rb
-  def splicing(position)
-    unless position.is_a?(Locations) then
-      position = Locations.new(position)
-    end
-    s = ''
-    position.each do |location|
-      if location.sequence
-        s << location.sequence
-      else
-        exon = self.subseq(location.from, location.to)
-        begin
-          exon.complement! if location.strand < 0
-        rescue NameError
-        end
-        s << exon
-      end
-    end
-    return self.class.new(s)
-  end
-  # Nucleic Acid sequence
-  class NA < Sequence
-    # Generate a nucleic acid sequence object from a string.
-    def initialize(str)
-      super
-      self.downcase!
-      self.tr!(" \t\n\r",'')
-    end
-    # This method depends on Locations class, see bio/location.rb
-    def splicing(position)
-      mRNA = super
-      if mRNA.rna?
-        mRNA.tr!('t', 'u')
-      else
-        mRNA.tr!('u', 't')
-      end
-      mRNA
-    end
-    # Returns complement sequence without reversing ("atgc" -> "tacg")
-    def forward_complement
-      s = self.class.new(self)
-      s.forward_complement!
-      s
-    end
-    # Convert to complement sequence without reversing ("atgc" -> "tacg")
-    def forward_complement!
-      if self.rna?
-        self.tr!('augcrymkdhvbswn', 'uacgyrkmhdbvswn')
-      else
-        self.tr!('atgcrymkdhvbswn', 'tacgyrkmhdbvswn')
-      end
-      self
-    end
-    # Returns reverse complement sequence ("atgc" -> "gcat")
-    def reverse_complement
-      s = self.class.new(self)
-      s.reverse_complement!
-      s
-    end
-    # Convert to reverse complement sequence ("atgc" -> "gcat")
-    def reverse_complement!
-      self.reverse!
-      self.forward_complement!
-    end
-    # Aliases for short
-    alias complement reverse_complement
-    alias complement! reverse_complement!
-    # Translate into the amino acid sequence from the given frame and the
-    # selected codon table.  The table also can be a Bio::CodonTable object.
-    # The 'unknown' character is used for invalid/unknown codon (can be
-    # used for 'nnn' and/or gap translation in practice).
-    #
-    # Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
-    # (4, 5 or 6 is also accepted) for the reverse strand.
-    def translate(frame = 1, table = 1, unknown = 'X')
-      if table.is_a?(Bio::CodonTable)
-        ct = table
-      else
-        ct = Bio::CodonTable[table]
-      end
-      naseq = self.dna
-      case frame
-      when 1, 2, 3
-        from = frame - 1
-      when 4, 5, 6
-        from = frame - 4
-        naseq.complement!
-      when -1, -2, -3
-        from = -1 - frame
-        naseq.complement!
-      else
-        from = 0
-      end
-      nalen = naseq.length - from
-      nalen -= nalen % 3
-      aaseq = naseq[from, nalen].gsub(/.{3}/) {|codon| ct[codon] or unknown}
-      return Bio::Sequence::AA.new(aaseq)
-    end
-    # Returns counts of the each codon in the sequence by Hash.
-    def codon_usage
-      hash = Hash.new(0)
-      self.window_search(3, 3) do |codon|
-        hash[codon] += 1
-      end
-      return hash
-    end
-    # Calculate the ratio of GC / ATGC bases in percent.
-    def gc_percent
-      count = self.composition
-      at = count['a'] + count['t'] + count['u']
-      gc = count['g'] + count['c']
-      gc = 100 * gc / (at + gc)
-      return gc
-    end
-    # Show abnormal bases other than 'atgcu'.
-    def illegal_bases
-      self.scan(/[^atgcu]/).sort.uniq
-    end
-    # Estimate the weight of this biological string molecule.
-    # NucleicAcid is defined in bio/data/na.rb
-    def molecular_weight
-      if self.rna?
-        NucleicAcid.weight(self, true)
-      else
-        NucleicAcid.weight(self)
-      end
-    end
-    # Convert the universal code string into the regular expression.
-    def to_re
-      if self.rna?
-        NucleicAcid.to_re(self.dna, true)
-      else
-        NucleicAcid.to_re(self)
-      end
-    end
-    # Convert the self string into the list of the names of the each base.
-    def names
-      array = []
-      self.each_byte do |x|
-        array.push(NucleicAcid.names[x.chr.upcase])
-      end
-      return array
-    end
-    # Output a DNA string by substituting 'u' to 't'.
-    def dna
-      self.tr('u', 't')
-    end
-    def dna!
-      self.tr!('u', 't')
-    end
-    # Output a RNA string by substituting 't' to 'u'.
-    def rna
-      self.tr('t', 'u')
-    end
-    def rna!
-      self.tr!('t', 'u')
-    end
-    def rna?
-      self.index('u')
-    end
-    protected :rna?
-    def pikachu
-      self.dna.tr("atgc", "pika") # joke, of course :-)
-    end
-  end
-  # Amino Acid sequence
-  class AA < Sequence
-    # Generate a amino acid sequence object from a string.
-    def initialize(str)
-      super
-      self.upcase!
-      self.tr!(" \t\n\r",'')
-    end
-    # Estimate the weight of this protein.
-    # AminoAcid is defined in bio/data/aa.rb
-    def molecular_weight
-      AminoAcid.weight(self)
-    end
-    def to_re
-      AminoAcid.to_re(self)
-    end
-    # Generate the list of the names of the each residue along with the
-    # sequence (3 letters code).
-    def codes
-      array = []
-      self.each_byte do |x|
-        array.push(AminoAcid.names[x.chr])
-      end
-      return array
-    end
-    # Similar to codes but returns long names.
-    def names
-      self.codes.map do |x|
-        AminoAcid.names[x]
-      end
-    end
+  def aa
+    @seq = AA.new(@seq)
+    @moltype = AA
   end
 end # Sequence
-class Seq < Sequence
-  attr_accessor :entry_id, :definition, :features, :references, :comments,
-    :date, :keywords, :dblinks, :taxonomy, :moltype
-end
 end # Bio