RubyGems - bio - Versions diffs - 0.7.0 - Mend

bio 0.7.0

Files changed (201) hide show

data/bin/bioruby +107 -0
data/bin/br_biofetch.rb +59 -0
data/bin/br_bioflat.rb +294 -0
data/bin/br_biogetseq.rb +57 -0
data/bin/br_pmfetch.rb +431 -0
data/doc/BioRuby.rd.ja +225 -0
data/doc/Changes-0.7.rd +236 -0
data/doc/Design.rd.ja +341 -0
data/doc/KEGG_API.rd +1437 -0
data/doc/KEGG_API.rd.ja +1399 -0
data/doc/TODO.rd.ja +138 -0
data/doc/Tutorial.rd +1138 -0
data/doc/Tutorial.rd.ja +2110 -0
data/etc/bioinformatics/seqdatabase.ini +210 -0
data/lib/bio.rb +256 -0
data/lib/bio/alignment.rb +1906 -0
data/lib/bio/appl/bl2seq/report.rb +350 -0
data/lib/bio/appl/blast.rb +269 -0
data/lib/bio/appl/blast/format0.rb +1402 -0
data/lib/bio/appl/blast/format8.rb +95 -0
data/lib/bio/appl/blast/report.rb +652 -0
data/lib/bio/appl/blast/rexml.rb +151 -0
data/lib/bio/appl/blast/wublast.rb +553 -0
data/lib/bio/appl/blast/xmlparser.rb +222 -0
data/lib/bio/appl/blat/report.rb +392 -0
data/lib/bio/appl/clustalw.rb +191 -0
data/lib/bio/appl/clustalw/report.rb +154 -0
data/lib/bio/appl/emboss.rb +68 -0
data/lib/bio/appl/fasta.rb +262 -0
data/lib/bio/appl/fasta/format10.rb +428 -0
data/lib/bio/appl/fasta/format6.rb +37 -0
data/lib/bio/appl/genscan/report.rb +570 -0
data/lib/bio/appl/hmmer.rb +129 -0
data/lib/bio/appl/hmmer/report.rb +556 -0
data/lib/bio/appl/mafft.rb +222 -0
data/lib/bio/appl/mafft/report.rb +119 -0
data/lib/bio/appl/psort.rb +555 -0
data/lib/bio/appl/psort/report.rb +473 -0
data/lib/bio/appl/sim4.rb +134 -0
data/lib/bio/appl/sim4/report.rb +501 -0
data/lib/bio/appl/sosui/report.rb +166 -0
data/lib/bio/appl/spidey/report.rb +604 -0
data/lib/bio/appl/targetp/report.rb +283 -0
data/lib/bio/appl/tmhmm/report.rb +238 -0
data/lib/bio/command.rb +166 -0
data/lib/bio/data/aa.rb +354 -0
data/lib/bio/data/codontable.rb +740 -0
data/lib/bio/data/na.rb +226 -0
data/lib/bio/db.rb +340 -0
data/lib/bio/db/aaindex.rb +280 -0
data/lib/bio/db/embl/common.rb +332 -0
data/lib/bio/db/embl/embl.rb +446 -0
data/lib/bio/db/embl/sptr.rb +954 -0
data/lib/bio/db/embl/swissprot.rb +32 -0
data/lib/bio/db/embl/trembl.rb +31 -0
data/lib/bio/db/embl/uniprot.rb +32 -0
data/lib/bio/db/fantom.rb +604 -0
data/lib/bio/db/fasta.rb +869 -0
data/lib/bio/db/genbank/common.rb +299 -0
data/lib/bio/db/genbank/ddbj.rb +34 -0
data/lib/bio/db/genbank/genbank.rb +354 -0
data/lib/bio/db/genbank/genpept.rb +73 -0
data/lib/bio/db/genbank/refseq.rb +31 -0
data/lib/bio/db/gff.rb +106 -0
data/lib/bio/db/go.rb +497 -0
data/lib/bio/db/kegg/brite.rb +51 -0
data/lib/bio/db/kegg/cell.rb +88 -0
data/lib/bio/db/kegg/compound.rb +130 -0
data/lib/bio/db/kegg/enzyme.rb +125 -0
data/lib/bio/db/kegg/expression.rb +173 -0
data/lib/bio/db/kegg/genes.rb +293 -0
data/lib/bio/db/kegg/genome.rb +362 -0
data/lib/bio/db/kegg/glycan.rb +213 -0
data/lib/bio/db/kegg/keggtab.rb +418 -0
data/lib/bio/db/kegg/kgml.rb +299 -0
data/lib/bio/db/kegg/ko.rb +178 -0
data/lib/bio/db/kegg/reaction.rb +97 -0
data/lib/bio/db/litdb.rb +131 -0
data/lib/bio/db/medline.rb +317 -0
data/lib/bio/db/nbrf.rb +199 -0
data/lib/bio/db/pdb.rb +38 -0
data/lib/bio/db/pdb/atom.rb +60 -0
data/lib/bio/db/pdb/chain.rb +117 -0
data/lib/bio/db/pdb/model.rb +106 -0
data/lib/bio/db/pdb/pdb.rb +1682 -0
data/lib/bio/db/pdb/residue.rb +122 -0
data/lib/bio/db/pdb/utils.rb +234 -0
data/lib/bio/db/prosite.rb +616 -0
data/lib/bio/db/rebase.rb +417 -0
data/lib/bio/db/transfac.rb +387 -0
data/lib/bio/feature.rb +201 -0
data/lib/bio/io/brdb.rb +103 -0
data/lib/bio/io/das.rb +471 -0
data/lib/bio/io/dbget.rb +212 -0
data/lib/bio/io/ddbjxml.rb +614 -0
data/lib/bio/io/fastacmd.rb +123 -0
data/lib/bio/io/fetch.rb +114 -0
data/lib/bio/io/flatfile.rb +496 -0
data/lib/bio/io/flatfile/bdb.rb +266 -0
data/lib/bio/io/flatfile/index.rb +1308 -0
data/lib/bio/io/flatfile/indexer.rb +778 -0
data/lib/bio/io/higet.rb +92 -0
data/lib/bio/io/keggapi.rb +863 -0
data/lib/bio/io/pubmed.rb +189 -0
data/lib/bio/io/registry.rb +308 -0
data/lib/bio/io/soapwsdl.rb +114 -0
data/lib/bio/io/sql.rb +428 -0
data/lib/bio/location.rb +650 -0
data/lib/bio/pathway.rb +991 -0
data/lib/bio/reference.rb +308 -0
data/lib/bio/sequence.rb +593 -0
data/lib/bio/shell.rb +51 -0
data/lib/bio/shell/core.rb +512 -0
data/lib/bio/shell/plugin/codon.rb +228 -0
data/lib/bio/shell/plugin/entry.rb +85 -0
data/lib/bio/shell/plugin/flatfile.rb +119 -0
data/lib/bio/shell/plugin/keggapi.rb +187 -0
data/lib/bio/shell/plugin/midi.rb +448 -0
data/lib/bio/shell/plugin/obda.rb +63 -0
data/lib/bio/shell/plugin/seq.rb +238 -0
data/lib/bio/shell/session.rb +214 -0
data/lib/bio/util/color_scheme.rb +214 -0
data/lib/bio/util/color_scheme/buried.rb +78 -0
data/lib/bio/util/color_scheme/helix.rb +78 -0
data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
data/lib/bio/util/color_scheme/strand.rb +78 -0
data/lib/bio/util/color_scheme/taylor.rb +69 -0
data/lib/bio/util/color_scheme/turn.rb +78 -0
data/lib/bio/util/color_scheme/zappo.rb +69 -0
data/lib/bio/util/contingency_table.rb +337 -0
data/lib/bio/util/sirna.rb +306 -0
data/lib/bioruby.rb +34 -0
data/sample/biofetch.rb +475 -0
data/sample/color_scheme_na.rb +99 -0
data/sample/dbget +37 -0
data/sample/fasta2tab.rb +99 -0
data/sample/fsplit.rb +51 -0
data/sample/gb2fasta.rb +31 -0
data/sample/gb2tab.rb +325 -0
data/sample/gbtab2mysql.rb +161 -0
data/sample/genes2nuc.rb +33 -0
data/sample/genes2pep.rb +33 -0
data/sample/genes2tab.rb +81 -0
data/sample/genome2rb.rb +29 -0
data/sample/genome2tab.rb +76 -0
data/sample/goslim.rb +311 -0
data/sample/gt2fasta.rb +47 -0
data/sample/pmfetch.rb +42 -0
data/sample/pmsearch.rb +42 -0
data/sample/psortplot_html.rb +222 -0
data/sample/ssearch2tab.rb +96 -0
data/sample/tdiary.rb +158 -0
data/sample/tfastx2tab.rb +100 -0
data/sample/vs-genes.rb +212 -0
data/test/data/SOSUI/sample.report +11 -0
data/test/data/TMHMM/sample.report +21 -0
data/test/data/blast/eco:b0002.faa +15 -0
data/test/data/blast/eco:b0002.faa.m0 +128 -0
data/test/data/blast/eco:b0002.faa.m7 +65 -0
data/test/data/blast/eco:b0002.faa.m8 +1 -0
data/test/data/embl/AB090716.embl +65 -0
data/test/data/genscan/sample.report +63 -0
data/test/data/prosite/prosite.dat +2233 -0
data/test/data/refseq/nm_126355.entret +64 -0
data/test/data/uniprot/p53_human.uniprot +1456 -0
data/test/runner.rb +10 -0
data/test/unit/bio/appl/blast/test_report.rb +427 -0
data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
data/test/unit/bio/appl/genscan/test_report.rb +195 -0
data/test/unit/bio/appl/sosui/test_report.rb +94 -0
data/test/unit/bio/appl/targetp/test_report.rb +159 -0
data/test/unit/bio/appl/test_blast.rb +159 -0
data/test/unit/bio/appl/test_fasta.rb +142 -0
data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
data/test/unit/bio/data/test_aa.rb +103 -0
data/test/unit/bio/data/test_codontable.rb +120 -0
data/test/unit/bio/data/test_na.rb +89 -0
data/test/unit/bio/db/embl/test_common.rb +130 -0
data/test/unit/bio/db/embl/test_embl.rb +227 -0
data/test/unit/bio/db/embl/test_sptr.rb +268 -0
data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
data/test/unit/bio/db/kegg/test_genes.rb +58 -0
data/test/unit/bio/db/test_fasta.rb +263 -0
data/test/unit/bio/db/test_gff.rb +140 -0
data/test/unit/bio/db/test_prosite.rb +1450 -0
data/test/unit/bio/io/test_ddbjxml.rb +87 -0
data/test/unit/bio/io/test_soapwsdl.rb +45 -0
data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
data/test/unit/bio/test_alignment.rb +1028 -0
data/test/unit/bio/test_command.rb +71 -0
data/test/unit/bio/test_db.rb +109 -0
data/test/unit/bio/test_feature.rb +128 -0
data/test/unit/bio/test_location.rb +51 -0
data/test/unit/bio/test_pathway.rb +485 -0
data/test/unit/bio/test_sequence.rb +386 -0
data/test/unit/bio/test_shell.rb +31 -0
data/test/unit/bio/util/test_color_scheme.rb +45 -0
data/test/unit/bio/util/test_contingency_table.rb +106 -0
data/test/unit/bio/util/test_sirna.rb +258 -0
metadata +295 -0

data/lib/bio/reference.rb ADDED Viewed

@@ -0,0 +1,308 @@
+#
+# bio/reference.rb - journal reference class
+#
+#   Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
+#
+#  This library is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2 of the License, or (at your option) any later version.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+#
+#  $Id: reference.rb,v 1.18 2005/12/18 16:58:58 nakao Exp $
+#
+module Bio
+  class Reference
+    def initialize(hash)
+      hash.default = ''
+      @authors  = hash['authors'] # [ "Hoge, J.P.", "Fuga, F.B." ]
+      @title    = hash['title']   # "Title of the study."
+      @journal  = hash['journal'] # "Theor. J. Hoge"
+      @volume   = hash['volume']  # 12
+      @issue    = hash['issue']   # 3
+      @pages    = hash['pages']   # 123-145
+      @year     = hash['year']    # 2001
+      @pubmed   = hash['pubmed']  # 12345678
+      @medline  = hash['medline'] # 98765432
+      @abstract = hash['abstract']
+      @url      = hash['url']
+      @mesh     = hash['mesh']
+      @affiliations = hash['affiliations']
+      @authors = [] if @authors.empty?
+      @mesh    = [] if @mesh.empty?
+      @affiliations = [] if @affiliations.empty?
+    end
+    attr_reader :authors, :title, :journal, :volume, :issue, :pages, :year,
+      :pubmed, :medline, :abstract, :url, :mesh, :affiliations
+    def format(style = nil, option = nil)
+      case style
+      when 'endnote'
+        return endnote
+      when 'bibitem'
+        return bibitem(option)
+      when 'bibtex'
+        return bibtex(option)
+      when 'rd'
+        return rd(option)
+      when /^nature$/i
+        return nature(option)
+      when /^science$/i
+        return science
+      when /^genome\s*_*biol/i
+        return genome_biol
+      when /^genome\s*_*res/i
+        return genome_res
+      when /^nar$/i
+        return nar
+      when /^current/i
+        return current
+      when /^trends/i
+        return trends
+      when /^cell$/i
+        return cell
+      else
+        return general
+      end
+    end
+    def endnote
+      lines = []
+      lines << "%0 Journal Article"
+      @authors.each do |author|
+        lines << "%A #{author}"
+      end
+      lines << "%D #{@year}" unless @year.empty?
+      lines << "%T #{@title}" unless @title.empty?
+      lines << "%J #{@journal}" unless @journal.empty?
+      lines << "%V #{@volume}" unless @volume.empty?
+      lines << "%N #{@issue}" unless @issue.empty?
+      lines << "%P #{@pages}" unless @pages.empty?
+      lines << "%M #{@pubmed}" unless @pubmed.empty?
+      if @pubmed
+        cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
+        opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
+        @url = "#{cgi}?#{opts}=#{@pubmed}"
+      end
+      lines << "%U #{@url}" unless @url.empty?
+      lines << "%X #{@abstract}" unless @abstract.empty?
+      @mesh.each do |term|
+        lines << "%K #{term}"
+      end
+      lines << "%+ #{@affiliations.join(' ')}" unless @affiliations.empty?
+      return lines.join("\n")
+    end
+    def bibitem(item = nil)
+      item  = "PMID:#{@pubmed}" unless item
+      pages = @pages.sub('-', '--')
+      return <<-"END".collect {|line| line.strip}.join("\n")
+        \\bibitem{#{item}}
+        #{@authors.join(', ')}
+        #{@title},
+        {\\em #{@journal}}, #{@volume}(#{@issue}):#{pages}, #{@year}.
+      END
+    end
+    def bibtex(section = nil)
+      section = "article" unless section
+      authors = authors_join(' and ', ' and ')
+      pages   = @pages.sub('-', '--')
+      return <<-"END".gsub(/\t/, '')
+        @#{section}{PMID:#{@pubmed},
+          author  = {#{authors}},
+          title   = {#{@title}},
+          journal = {#{@journal}},
+          year    = {#{@year}},
+          volume  = {#{@volume}},
+          number  = {#{@issue}},
+          pages   = {#{pages}},
+        }
+      END
+    end
+    def general
+      authors = @authors.join(', ')
+      "#{authors} (#{@year}). \"#{@title}\" #{@journal} #{@volume}:#{@pages}."
+    end
+    def rd(str = nil)
+      @abstract ||= str
+      lines = []
+      lines << "== " + @title
+      lines << "* " + authors_join(' and ')
+      lines << "* #{@journal} #{@year} #{@volume}:#{@pages} [PMID:#{@pubmed}]"
+      lines << @abstract
+      return lines.join("\n\n")
+    end
+    def nature(short = false)
+      if short
+        if @authors.size > 4
+          authors = "#{@authors[0]} et al."
+        elsif @authors.size == 1
+          authors = "#{@authors[0]}"
+        else
+          authors = authors_join(' & ')
+        end
+        "#{authors} #{@journal} #{@volume}, #{@pages} (#{@year})."
+      else
+        authors = authors_join(' & ')
+        "#{authors} #{@title} #{@journal} #{@volume}, #{@pages} (#{@year})."
+      end
+    end
+    def science
+      if @authors.size > 4
+        authors = rev_name(@authors[0]) + " et al."
+      else
+        authors = @authors.collect {|name| rev_name(name)}.join(', ')
+      end
+      page_from, = @pages.split('-')
+      "#{authors}, #{@journal} #{@volume} #{page_from} (#{@year})."
+    end
+    def genome_biol
+      authors = @authors.collect {|name| strip_dots(name)}.join(', ')
+      journal = strip_dots(@journal)
+      "#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
+    end
+    alias current genome_biol
+    def genome_res
+      authors = authors_join(' and ')
+      "#{authors} #{@year}.\n  #{@title} #{@journal} #{@volume}: #{@pages}."
+    end
+    def nar
+      authors = authors_join(' and ')
+      "#{authors} (#{@year}) #{@title} #{@journal}, #{@volume}, #{@pages}."
+    end
+    def cell
+      authors = authors_join(' and ')
+      "#{authors} (#{@year}). #{@title} #{@journal} #{@volume}, #{pages}."
+    end
+    def trends
+      if @authors.size > 2
+        authors = "#{@authors[0]} et al."
+      elsif @authors.size == 1
+        authors = "#{@authors[0]}"
+      else
+        authors = authors_join(' and ')
+      end
+      "#{authors} (#{@year}) #{@title} #{@journal} #{@volume}, #{@pages}"
+    end
+    private
+    def strip_dots(data)
+      data.tr(',.', '') if data
+    end
+    def authors_join(amp, sep = ', ')
+      authors = @authors.clone
+      if authors.length > 1
+        last = authors.pop
+        authors = authors.join(sep) + "#{amp}" + last
+      elsif authors.length == 1
+        authors = authors.pop
+      else
+        authors = ""
+      end
+    end
+    def rev_name(name)
+      if name =~ /,/
+        name, initial = name.split(/,\s+/)
+        name = "#{initial} #{name}"
+      end
+      return name
+    end
+  end
+  class References
+    def initialize(ary = [])
+      @references = ary
+    end
+    attr_accessor :references
+    def append(a)
+      @references.push(a) if a.is_a? Reference
+      return self
+    end
+    def each
+      @references.each do |x|
+        yield x
+      end
+    end
+  end
+end
+=begin
+= Bio::Reference
+--- Bio::Reference.new(hash)
+--- Bio::Reference#authors -> Array
+--- Bio::Reference#title -> String
+--- Bio::Reference#journal -> String
+--- Bio::Reference#volume -> Fixnum
+--- Bio::Reference#issue -> Fixnum
+--- Bio::Reference#pages -> String
+--- Bio::Reference#year -> Fixnum
+--- Bio::Reference#pubmed -> Fixnum
+--- Bio::Reference#medline -> Fixnum
+--- Bio::Reference#abstract -> String
+--- Bio::Reference#url -> String
+--- Bio::Reference#mesh -> Array
+--- Bio::Reference#affiliations -> Array
+--- Bio::Reference#format(style = nil, option = nil) -> String
+--- Bio::Reference#endnote
+--- Bio::Reference#bibitem(item = nil) -> String
+--- Bio::Reference#bibtex(section = nil) -> String
+--- Bio::Reference#rd(str = nil) -> String
+--- Bio::Reference#nature(short = false) -> String
+--- Bio::Reference#science -> String
+--- Bio::Reference#genome_biol -> String
+--- Bio::Reference#genome_res -> String
+--- Bio::Reference#nar -> String
+--- Bio::Reference#cell -> String
+--- Bio::Reference#trends -> String
+--- Bio::Reference#general -> String
+= Bio::References
+--- Bio::References.new(ary = [])
+--- Bio::References#references -> Array
+--- Bio::References#append(a) -> Bio::References
+--- Bio::References#each -> Array
+=end

data/lib/bio/sequence.rb ADDED Viewed

@@ -0,0 +1,593 @@
+#
+# = bio/sequence.rb - biological sequence class
+#
+# Copyright::   Copyright (C) 2000-2005
+#               Toshiaki Katayama <k@bioruby.org>,
+#               Yoshinori K. Okuji <okuji@embug.org>,
+#               Naohisa Goto <ng@bioruby.org>
+# License::     LGPL
+#
+# $Id: sequence.rb,v 0.49 2005/11/27 15:46:01 k Exp $
+#
+#--
+# *TODO* remove this functionality?
+# You can use Bio::Seq instead of Bio::Sequence for short.
+#++
+#
+#--
+#
+#  This library is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2 of the License, or (at your option) any later version.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+#
+#++
+#
+require 'bio/data/na'
+require 'bio/data/aa'
+require 'bio/data/codontable'
+require 'bio/location'
+module Bio
+# Nucleic/Amino Acid sequence
+class Sequence < String
+  def self.auto(str)
+    moltype = self.guess(str)
+    if moltype == NA
+      NA.new(str)
+    else
+      AA.new(str)
+    end
+  end
+  def guess(threshold = 0.9)
+    cmp = self.composition
+    bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] +
+            cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
+    total = self.length - cmp['N'] - cmp['n']
+    if bases.to_f / total > threshold
+      return NA
+    else
+      return AA
+    end
+  end
+  def self.guess(str, *args)
+    self.new(str).guess(*args)
+  end
+  def to_s
+    String.new(self)
+  end
+  alias to_str to_s
+  # Force self to re-initialize for clean up (remove white spaces,
+  # case unification).
+  def seq
+    self.class.new(self)
+  end
+  # Similar to the 'seq' method, but changes the self object destructively.
+  def normalize!
+    initialize(self)
+    self
+  end
+  alias seq! normalize!
+  def <<(*arg)
+    super(self.class.new(*arg))
+  end
+  alias concat <<
+  def +(*arg)
+    self.class.new(super(*arg))
+  end
+  # Returns the subsequence of the self string.
+  def subseq(s = 1, e = self.length)
+    return nil if s < 1 or e < 1
+    s -= 1
+    e -= 1
+    self[s..e]
+  end
+  # Output the FASTA format string of the sequence.  The 1st argument is
+  # used as the comment string.  If the 2nd option is given, the output
+  # sequence will be folded.
+  def to_fasta(header = '', width = nil)
+    ">#{header}\n" +
+    if width
+      self.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
+    else
+      self.to_s + "\n"
+    end
+  end
+  # This method iterates on sub string with specified length 'window_size'.
+  # By specifing 'step_size', codon sized shifting or spliting genome
+  # sequence with ovelapping each end can easily be yielded.
+  #
+  # The remainder sequence at the terminal end will be returned.
+  #
+  # Example:
+  #   # prints average GC% on each 100bp
+  #   seq.window_search(100) do |subseq|
+  #     puts subseq.gc
+  #   end
+  #   # prints every translated peptide (length 5aa) in the same frame
+  #   seq.window_search(15, 3) do |subseq|
+  #     puts subseq.translate
+  #   end
+  #   # split genome sequence by 10000bp with 1000bp overlap in fasta format
+  #   i = 1
+  #   remainder = seq.window_search(10000, 9000) do |subseq|
+  #     puts subseq.to_fasta("segment #{i}", 60)
+  #     i += 1
+  #   end
+  #   puts remainder.to_fasta("segment #{i}", 60)
+  #
+  def window_search(window_size, step_size = 1)
+    i = 0
+    0.step(self.length - window_size, step_size) do |i|
+      yield self[i, window_size]
+    end
+    return self[i + window_size .. -1]
+  end
+  # This method receive a hash of residues/bases to the particular values,
+  # and sum up the value along with the self sequence.  Especially useful
+  # to use with the window_search method and amino acid indices etc.
+  def total(hash)
+    hash.default = 0.0 unless hash.default
+    sum = 0.0
+    self.each_byte do |x|
+      begin
+        sum += hash[x.chr]
+      end
+    end
+    return sum
+  end
+  # Returns a hash of the occurrence counts for each residue or base.
+  def composition
+    count = Hash.new(0)
+    self.scan(/./) do |x|
+      count[x] += 1
+    end
+    return count
+  end
+  # Returns a randomized sequence keeping its composition by default.
+  # The argument is required when generating a random sequence from the empty
+  # sequence (used by the class methods NA.randomize, AA.randomize).
+  # If the block is given, yields for each random residue/base.
+  def randomize(hash = nil)
+    length = self.length
+    if hash
+      count = hash.clone
+      count.each_value {|x| length += x}
+    else
+      count = self.composition
+    end
+    seq = ''
+    tmp = {}
+    length.times do
+      count.each do |k, v|
+        tmp[k] = v * rand
+      end
+      max = tmp.max {|a, b| a[1] <=> b[1]}
+      count[max.first] -= 1
+      if block_given?
+        yield max.first
+      else
+        seq += max.first
+      end
+    end
+    return self.class.new(seq)
+  end
+  # Generate a new random sequence with the given frequency of bases
+  # or residues.  The sequence length is determined by the sum of each
+  # base/residue occurences.
+  def self.randomize(*arg, &block)
+    self.new('').randomize(*arg, &block)
+  end
+  # Receive a GenBank style position string and convert it to the Locations
+  # objects to splice the sequence itself.  See also: bio/location.rb
+  #
+  # This method depends on Locations class, see bio/location.rb
+  def splicing(position)
+    unless position.is_a?(Locations) then
+      position = Locations.new(position)
+    end
+    s = ''
+    position.each do |location|
+      if location.sequence
+        s << location.sequence
+      else
+        exon = self.subseq(location.from, location.to)
+        begin
+          exon.complement! if location.strand < 0
+        rescue NameError
+        end
+        s << exon
+      end
+    end
+    return self.class.new(s)
+  end
+  # Nucleic Acid sequence
+  class NA < Sequence
+    # Generate a nucleic acid sequence object from a string.
+    def initialize(str)
+      super
+      self.downcase!
+      self.tr!(" \t\n\r",'')
+    end
+    # This method depends on Locations class, see bio/location.rb
+    def splicing(position)
+      mRNA = super
+      if mRNA.rna?
+        mRNA.tr!('t', 'u')
+      else
+        mRNA.tr!('u', 't')
+      end
+      mRNA
+    end
+    # Returns complement sequence without reversing ("atgc" -> "tacg")
+    def forward_complement
+      s = self.class.new(self)
+      s.forward_complement!
+      s
+    end
+    # Convert to complement sequence without reversing ("atgc" -> "tacg")
+    def forward_complement!
+      if self.rna?
+        self.tr!('augcrymkdhvbswn', 'uacgyrkmhdbvswn')
+      else
+        self.tr!('atgcrymkdhvbswn', 'tacgyrkmhdbvswn')
+      end
+      self
+    end
+    # Returns reverse complement sequence ("atgc" -> "gcat")
+    def reverse_complement
+      s = self.class.new(self)
+      s.reverse_complement!
+      s
+    end
+    # Convert to reverse complement sequence ("atgc" -> "gcat")
+    def reverse_complement!
+      self.reverse!
+      self.forward_complement!
+    end
+    # Aliases for short
+    alias complement reverse_complement
+    alias complement! reverse_complement!
+    # Translate into the amino acid sequence from the given frame and the
+    # selected codon table.  The table also can be a Bio::CodonTable object.
+    # The 'unknown' character is used for invalid/unknown codon (can be
+    # used for 'nnn' and/or gap translation in practice).
+    #
+    # Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
+    # (4, 5 or 6 is also accepted) for the reverse strand.
+    def translate(frame = 1, table = 1, unknown = 'X')
+      if table.is_a?(Bio::CodonTable)
+        ct = table
+      else
+        ct = Bio::CodonTable[table]
+      end
+      naseq = self.dna
+      case frame
+      when 1, 2, 3
+        from = frame - 1
+      when 4, 5, 6
+        from = frame - 4
+        naseq.complement!
+      when -1, -2, -3
+        from = -1 - frame
+        naseq.complement!
+      else
+        from = 0
+      end
+      nalen = naseq.length - from
+      nalen -= nalen % 3
+      aaseq = naseq[from, nalen].gsub(/.{3}/) {|codon| ct[codon] or unknown}
+      return Bio::Sequence::AA.new(aaseq)
+    end
+    # Returns counts of the each codon in the sequence by Hash.
+    def codon_usage
+      hash = Hash.new(0)
+      self.window_search(3, 3) do |codon|
+        hash[codon] += 1
+      end
+      return hash
+    end
+    # Calculate the ratio of GC / ATGC bases in percent.
+    def gc_percent
+      count = self.composition
+      at = count['a'] + count['t'] + count['u']
+      gc = count['g'] + count['c']
+      gc = 100 * gc / (at + gc)
+      return gc
+    end
+    # Show abnormal bases other than 'atgcu'.
+    def illegal_bases
+      self.scan(/[^atgcu]/).sort.uniq
+    end
+    # Estimate the weight of this biological string molecule.
+    # NucleicAcid is defined in bio/data/na.rb
+    def molecular_weight
+      if self.rna?
+        NucleicAcid.weight(self, true)
+      else
+        NucleicAcid.weight(self)
+      end
+    end
+    # Convert the universal code string into the regular expression.
+    def to_re
+      if self.rna?
+        NucleicAcid.to_re(self.dna, true)
+      else
+        NucleicAcid.to_re(self)
+      end
+    end
+    # Convert the self string into the list of the names of the each base.
+    def names
+      array = []
+      self.each_byte do |x|
+        array.push(NucleicAcid.names[x.chr.upcase])
+      end
+      return array
+    end
+    # Output a DNA string by substituting 'u' to 't'.
+    def dna
+      self.tr('u', 't')
+    end
+    def dna!
+      self.tr!('u', 't')
+    end
+    # Output a RNA string by substituting 't' to 'u'.
+    def rna
+      self.tr('t', 'u')
+    end
+    def rna!
+      self.tr!('t', 'u')
+    end
+    def rna?
+      self.index('u')
+    end
+    protected :rna?
+    def pikachu
+      self.dna.tr("atgc", "pika") # joke, of course :-)
+    end
+  end
+  # Amino Acid sequence
+  class AA < Sequence
+    # Generate a amino acid sequence object from a string.
+    def initialize(str)
+      super
+      self.upcase!
+      self.tr!(" \t\n\r",'')
+    end
+    # Estimate the weight of this protein.
+    # AminoAcid is defined in bio/data/aa.rb
+    def molecular_weight
+      AminoAcid.weight(self)
+    end
+    def to_re
+      AminoAcid.to_re(self)
+    end
+    # Generate the list of the names of the each residue along with the
+    # sequence (3 letters code).
+    def codes
+      array = []
+      self.each_byte do |x|
+        array.push(AminoAcid.names[x.chr])
+      end
+      return array
+    end
+    # Similar to codes but returns long names.
+    def names
+      self.codes.map do |x|
+        AminoAcid.names[x]
+      end
+    end
+  end
+end # Sequence
+class Seq < Sequence
+  attr_accessor :entry_id, :definition, :features, :references, :comments,
+    :date, :keywords, :dblinks, :taxonomy, :moltype
+end
+end # Bio
+if __FILE__ == $0
+  puts "== Test Bio::Sequence::NA.new"
+  p Bio::Sequence::NA.new('')
+  p na = Bio::Sequence::NA.new('atgcatgcATGCATGCAAAA')
+  p rna = Bio::Sequence::NA.new('augcaugcaugcaugcaaaa')
+  puts "\n== Test Bio::Sequence::AA.new"
+  p Bio::Sequence::AA.new('')
+  p aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
+  puts "\n== Test Bio::Sequence#to_s"
+  p na.to_s
+  p aa.to_s
+  puts "\n== Test Bio::Sequence#subseq(2,6)"
+  p na
+  p na.subseq(2,6)
+  puts "\n== Test Bio::Sequence#[2,6]"
+  p na
+  p na[2,6]
+  puts "\n== Test Bio::Sequence#to_fasta('hoge', 8)"
+  puts na.to_fasta('hoge', 8)
+  puts "\n== Test Bio::Sequence#window_search(15)"
+  p na
+  na.window_search(15) {|x| p x}
+  puts "\n== Test Bio::Sequence#total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})"
+  p na.total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})
+  puts "\n== Test Bio::Sequence#composition"
+  p na
+  p na.composition
+  p rna
+  p rna.composition
+  puts "\n== Test Bio::Sequence::NA#splicing('complement(join(1..5,16..20))')"
+  p na
+  p na.splicing("complement(join(1..5,16..20))")
+  p rna
+  p rna.splicing("complement(join(1..5,16..20))")
+  puts "\n== Test Bio::Sequence::NA#complement"
+  p na.complement
+  p rna.complement
+  p Bio::Sequence::NA.new('tacgyrkmhdbvswn').complement
+  p Bio::Sequence::NA.new('uacgyrkmhdbvswn').complement
+  puts "\n== Test Bio::Sequence::NA#translate"
+  p na
+  p na.translate
+  p rna
+  p rna.translate
+  puts "\n== Test Bio::Sequence::NA#gc_percent"
+  p na.gc
+  p rna.gc
+  puts "\n== Test Bio::Sequence::NA#illegal_bases"
+  p na.illegal_bases
+  p Bio::Sequence::NA.new('tacgyrkmhdbvswn').illegal_bases
+  p Bio::Sequence::NA.new('abcdefghijklmnopqrstuvwxyz-!%#$@').illegal_bases
+  puts "\n== Test Bio::Sequence::NA#molecular_weight"
+  p na
+  p na.molecular_weight
+  p rna
+  p rna.molecular_weight
+  puts "\n== Test Bio::Sequence::NA#to_re"
+  p Bio::Sequence::NA.new('atgcrymkdhvbswn')
+  p Bio::Sequence::NA.new('atgcrymkdhvbswn').to_re
+  p Bio::Sequence::NA.new('augcrymkdhvbswn')
+  p Bio::Sequence::NA.new('augcrymkdhvbswn').to_re
+  puts "\n== Test Bio::Sequence::NA#names"
+  p na.names
+  puts "\n== Test Bio::Sequence::NA#pikachu"
+  p na.pikachu
+  puts "\n== Test Bio::Sequence::NA#randomize"
+  print "Orig  : "; p na
+  print "Rand  : "; p na.randomize
+  print "Rand  : "; p na.randomize
+  print "Rand  : "; p na.randomize.randomize
+  print "Block : "; na.randomize do |x| print x end; puts
+  print "Orig  : "; p rna
+  print "Rand  : "; p rna.randomize
+  print "Rand  : "; p rna.randomize
+  print "Rand  : "; p rna.randomize.randomize
+  print "Block : "; rna.randomize do |x| print x end; puts
+  puts "\n== Test Bio::Sequence::NA.randomize(counts)"
+  print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'t'=>40}
+  print "Rand  : "; p Bio::Sequence::NA.randomize(counts)
+  print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'u'=>40}
+  print "Rand  : "; p Bio::Sequence::NA.randomize(counts)
+  print "Block : "; Bio::Sequence::NA.randomize(counts) {|x| print x}; puts
+  puts "\n== Test Bio::Sequence::AA#codes"
+  p aa
+  p aa.codes
+  puts "\n== Test Bio::Sequence::AA#names"
+  p aa
+  p aa.names
+  puts "\n== Test Bio::Sequence::AA#molecular_weight"
+  p aa.subseq(1,20)
+  p aa.subseq(1,20).molecular_weight
+  puts "\n== Test Bio::Sequence::AA#randomize"
+  aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA'
+  s = Bio::Sequence::AA.new(aaseq)
+  print "Orig  : "; p s
+  print "Rand  : "; p s.randomize
+  print "Rand  : "; p s.randomize
+  print "Rand  : "; p s.randomize.randomize
+  print "Block : "; s.randomize {|x| print x}; puts
+  puts "\n== Test Bio::Sequence::AA.randomize(counts)"
+  print "Count : "; p counts = s.composition
+  print "Rand  : "; puts Bio::Sequence::AA.randomize(counts)
+  print "Block : "; Bio::Sequence::AA.randomize(counts) {|x| print x}; puts
+end