RubyGems - bio - Versions diffs - 0.7.0 - Mend

bio 0.7.0

Files changed (201) hide show

data/bin/bioruby +107 -0
data/bin/br_biofetch.rb +59 -0
data/bin/br_bioflat.rb +294 -0
data/bin/br_biogetseq.rb +57 -0
data/bin/br_pmfetch.rb +431 -0
data/doc/BioRuby.rd.ja +225 -0
data/doc/Changes-0.7.rd +236 -0
data/doc/Design.rd.ja +341 -0
data/doc/KEGG_API.rd +1437 -0
data/doc/KEGG_API.rd.ja +1399 -0
data/doc/TODO.rd.ja +138 -0
data/doc/Tutorial.rd +1138 -0
data/doc/Tutorial.rd.ja +2110 -0
data/etc/bioinformatics/seqdatabase.ini +210 -0
data/lib/bio.rb +256 -0
data/lib/bio/alignment.rb +1906 -0
data/lib/bio/appl/bl2seq/report.rb +350 -0
data/lib/bio/appl/blast.rb +269 -0
data/lib/bio/appl/blast/format0.rb +1402 -0
data/lib/bio/appl/blast/format8.rb +95 -0
data/lib/bio/appl/blast/report.rb +652 -0
data/lib/bio/appl/blast/rexml.rb +151 -0
data/lib/bio/appl/blast/wublast.rb +553 -0
data/lib/bio/appl/blast/xmlparser.rb +222 -0
data/lib/bio/appl/blat/report.rb +392 -0
data/lib/bio/appl/clustalw.rb +191 -0
data/lib/bio/appl/clustalw/report.rb +154 -0
data/lib/bio/appl/emboss.rb +68 -0
data/lib/bio/appl/fasta.rb +262 -0
data/lib/bio/appl/fasta/format10.rb +428 -0
data/lib/bio/appl/fasta/format6.rb +37 -0
data/lib/bio/appl/genscan/report.rb +570 -0
data/lib/bio/appl/hmmer.rb +129 -0
data/lib/bio/appl/hmmer/report.rb +556 -0
data/lib/bio/appl/mafft.rb +222 -0
data/lib/bio/appl/mafft/report.rb +119 -0
data/lib/bio/appl/psort.rb +555 -0
data/lib/bio/appl/psort/report.rb +473 -0
data/lib/bio/appl/sim4.rb +134 -0
data/lib/bio/appl/sim4/report.rb +501 -0
data/lib/bio/appl/sosui/report.rb +166 -0
data/lib/bio/appl/spidey/report.rb +604 -0
data/lib/bio/appl/targetp/report.rb +283 -0
data/lib/bio/appl/tmhmm/report.rb +238 -0
data/lib/bio/command.rb +166 -0
data/lib/bio/data/aa.rb +354 -0
data/lib/bio/data/codontable.rb +740 -0
data/lib/bio/data/na.rb +226 -0
data/lib/bio/db.rb +340 -0
data/lib/bio/db/aaindex.rb +280 -0
data/lib/bio/db/embl/common.rb +332 -0
data/lib/bio/db/embl/embl.rb +446 -0
data/lib/bio/db/embl/sptr.rb +954 -0
data/lib/bio/db/embl/swissprot.rb +32 -0
data/lib/bio/db/embl/trembl.rb +31 -0
data/lib/bio/db/embl/uniprot.rb +32 -0
data/lib/bio/db/fantom.rb +604 -0
data/lib/bio/db/fasta.rb +869 -0
data/lib/bio/db/genbank/common.rb +299 -0
data/lib/bio/db/genbank/ddbj.rb +34 -0
data/lib/bio/db/genbank/genbank.rb +354 -0
data/lib/bio/db/genbank/genpept.rb +73 -0
data/lib/bio/db/genbank/refseq.rb +31 -0
data/lib/bio/db/gff.rb +106 -0
data/lib/bio/db/go.rb +497 -0
data/lib/bio/db/kegg/brite.rb +51 -0
data/lib/bio/db/kegg/cell.rb +88 -0
data/lib/bio/db/kegg/compound.rb +130 -0
data/lib/bio/db/kegg/enzyme.rb +125 -0
data/lib/bio/db/kegg/expression.rb +173 -0
data/lib/bio/db/kegg/genes.rb +293 -0
data/lib/bio/db/kegg/genome.rb +362 -0
data/lib/bio/db/kegg/glycan.rb +213 -0
data/lib/bio/db/kegg/keggtab.rb +418 -0
data/lib/bio/db/kegg/kgml.rb +299 -0
data/lib/bio/db/kegg/ko.rb +178 -0
data/lib/bio/db/kegg/reaction.rb +97 -0
data/lib/bio/db/litdb.rb +131 -0
data/lib/bio/db/medline.rb +317 -0
data/lib/bio/db/nbrf.rb +199 -0
data/lib/bio/db/pdb.rb +38 -0
data/lib/bio/db/pdb/atom.rb +60 -0
data/lib/bio/db/pdb/chain.rb +117 -0
data/lib/bio/db/pdb/model.rb +106 -0
data/lib/bio/db/pdb/pdb.rb +1682 -0
data/lib/bio/db/pdb/residue.rb +122 -0
data/lib/bio/db/pdb/utils.rb +234 -0
data/lib/bio/db/prosite.rb +616 -0
data/lib/bio/db/rebase.rb +417 -0
data/lib/bio/db/transfac.rb +387 -0
data/lib/bio/feature.rb +201 -0
data/lib/bio/io/brdb.rb +103 -0
data/lib/bio/io/das.rb +471 -0
data/lib/bio/io/dbget.rb +212 -0
data/lib/bio/io/ddbjxml.rb +614 -0
data/lib/bio/io/fastacmd.rb +123 -0
data/lib/bio/io/fetch.rb +114 -0
data/lib/bio/io/flatfile.rb +496 -0
data/lib/bio/io/flatfile/bdb.rb +266 -0
data/lib/bio/io/flatfile/index.rb +1308 -0
data/lib/bio/io/flatfile/indexer.rb +778 -0
data/lib/bio/io/higet.rb +92 -0
data/lib/bio/io/keggapi.rb +863 -0
data/lib/bio/io/pubmed.rb +189 -0
data/lib/bio/io/registry.rb +308 -0
data/lib/bio/io/soapwsdl.rb +114 -0
data/lib/bio/io/sql.rb +428 -0
data/lib/bio/location.rb +650 -0
data/lib/bio/pathway.rb +991 -0
data/lib/bio/reference.rb +308 -0
data/lib/bio/sequence.rb +593 -0
data/lib/bio/shell.rb +51 -0
data/lib/bio/shell/core.rb +512 -0
data/lib/bio/shell/plugin/codon.rb +228 -0
data/lib/bio/shell/plugin/entry.rb +85 -0
data/lib/bio/shell/plugin/flatfile.rb +119 -0
data/lib/bio/shell/plugin/keggapi.rb +187 -0
data/lib/bio/shell/plugin/midi.rb +448 -0
data/lib/bio/shell/plugin/obda.rb +63 -0
data/lib/bio/shell/plugin/seq.rb +238 -0
data/lib/bio/shell/session.rb +214 -0
data/lib/bio/util/color_scheme.rb +214 -0
data/lib/bio/util/color_scheme/buried.rb +78 -0
data/lib/bio/util/color_scheme/helix.rb +78 -0
data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
data/lib/bio/util/color_scheme/strand.rb +78 -0
data/lib/bio/util/color_scheme/taylor.rb +69 -0
data/lib/bio/util/color_scheme/turn.rb +78 -0
data/lib/bio/util/color_scheme/zappo.rb +69 -0
data/lib/bio/util/contingency_table.rb +337 -0
data/lib/bio/util/sirna.rb +306 -0
data/lib/bioruby.rb +34 -0
data/sample/biofetch.rb +475 -0
data/sample/color_scheme_na.rb +99 -0
data/sample/dbget +37 -0
data/sample/fasta2tab.rb +99 -0
data/sample/fsplit.rb +51 -0
data/sample/gb2fasta.rb +31 -0
data/sample/gb2tab.rb +325 -0
data/sample/gbtab2mysql.rb +161 -0
data/sample/genes2nuc.rb +33 -0
data/sample/genes2pep.rb +33 -0
data/sample/genes2tab.rb +81 -0
data/sample/genome2rb.rb +29 -0
data/sample/genome2tab.rb +76 -0
data/sample/goslim.rb +311 -0
data/sample/gt2fasta.rb +47 -0
data/sample/pmfetch.rb +42 -0
data/sample/pmsearch.rb +42 -0
data/sample/psortplot_html.rb +222 -0
data/sample/ssearch2tab.rb +96 -0
data/sample/tdiary.rb +158 -0
data/sample/tfastx2tab.rb +100 -0
data/sample/vs-genes.rb +212 -0
data/test/data/SOSUI/sample.report +11 -0
data/test/data/TMHMM/sample.report +21 -0
data/test/data/blast/eco:b0002.faa +15 -0
data/test/data/blast/eco:b0002.faa.m0 +128 -0
data/test/data/blast/eco:b0002.faa.m7 +65 -0
data/test/data/blast/eco:b0002.faa.m8 +1 -0
data/test/data/embl/AB090716.embl +65 -0
data/test/data/genscan/sample.report +63 -0
data/test/data/prosite/prosite.dat +2233 -0
data/test/data/refseq/nm_126355.entret +64 -0
data/test/data/uniprot/p53_human.uniprot +1456 -0
data/test/runner.rb +10 -0
data/test/unit/bio/appl/blast/test_report.rb +427 -0
data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
data/test/unit/bio/appl/genscan/test_report.rb +195 -0
data/test/unit/bio/appl/sosui/test_report.rb +94 -0
data/test/unit/bio/appl/targetp/test_report.rb +159 -0
data/test/unit/bio/appl/test_blast.rb +159 -0
data/test/unit/bio/appl/test_fasta.rb +142 -0
data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
data/test/unit/bio/data/test_aa.rb +103 -0
data/test/unit/bio/data/test_codontable.rb +120 -0
data/test/unit/bio/data/test_na.rb +89 -0
data/test/unit/bio/db/embl/test_common.rb +130 -0
data/test/unit/bio/db/embl/test_embl.rb +227 -0
data/test/unit/bio/db/embl/test_sptr.rb +268 -0
data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
data/test/unit/bio/db/kegg/test_genes.rb +58 -0
data/test/unit/bio/db/test_fasta.rb +263 -0
data/test/unit/bio/db/test_gff.rb +140 -0
data/test/unit/bio/db/test_prosite.rb +1450 -0
data/test/unit/bio/io/test_ddbjxml.rb +87 -0
data/test/unit/bio/io/test_soapwsdl.rb +45 -0
data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
data/test/unit/bio/test_alignment.rb +1028 -0
data/test/unit/bio/test_command.rb +71 -0
data/test/unit/bio/test_db.rb +109 -0
data/test/unit/bio/test_feature.rb +128 -0
data/test/unit/bio/test_location.rb +51 -0
data/test/unit/bio/test_pathway.rb +485 -0
data/test/unit/bio/test_sequence.rb +386 -0
data/test/unit/bio/test_shell.rb +31 -0
data/test/unit/bio/util/test_color_scheme.rb +45 -0
data/test/unit/bio/util/test_contingency_table.rb +106 -0
data/test/unit/bio/util/test_sirna.rb +258 -0
metadata +295 -0

data/lib/bio/db/pdb/residue.rb ADDED Viewed

@@ -0,0 +1,122 @@
+#
+# bio/db/pdb/residue.rb - residue class for PDB
+#
+#   Copyright (C) 2004 Alex Gutteridge <alexg@ebi.ac.uk>
+#
+#  This library is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2 of the License, or (at your option) any later version.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+#
+#  $Id: residue.rb,v 1.4 2005/12/18 17:34:47 ngoto Exp $
+require 'bio/db/pdb'
+module Bio
+  class PDB
+    #Residue class - id is a composite of resSeq and iCode
+    class Residue
+      include Utils
+      include AtomFinder
+      include Enumerable
+      include Comparable
+      attr_reader :resName, :resSeq, :iCode, :id, :chain, :hetatm
+      attr_writer :resName, :chain, :hetatm
+      def initialize(resName = nil, resSeq = nil, iCode = nil,
+                     chain = nil, hetatm = false)
+        @resName = resName
+        @resSeq  = resSeq
+        @iCode   = iCode
+        @hetatm  = hetatm
+        #Residue id is required because resSeq doesn't uniquely identify
+        #a residue. ID is constructed from resSeq and iCode and is appended
+        #to 'LIGAND' if the residue is a HETATM
+        if (!@resSeq and !@iCode)
+          @id = nil
+        else
+          @id = "#{@resSeq}#{@iCode.strip}"
+          if @hetatm
+            @id = 'LIGAND' + @id
+          end
+        end
+        @chain   = chain
+        @atoms   = Array.new
+      end
+      #Keyed access to atoms based on element e.g. ["CA"]
+      def [](key)
+        atom = @atoms.find{ |atom| key == atom.element }
+      end
+      #Need to define these to make sure id is correctly updated
+      def resSeq=(resSeq)
+        @resSeq = resSeq.to_i
+        @id      = "#{@resSeq}#{@iCode.strip}"
+        if @hetatm
+          @id = 'LIGAND' + @id
+        end
+      end
+      def iCode=(iCode)
+        @iCode = iCode
+        @id    = "#{@resSeq}#{@iCode.strip}"
+        if @hetatm
+          @id = 'LIGAND' + @id
+        end
+      end
+      #Adds an atom to this residue
+      def addAtom(atom)
+        raise "Expecting ATOM or HETATM" unless atom.is_a? Bio::PDB::Record::ATOM
+        @atoms.push(atom)
+        self
+      end
+      #Iterator over the atoms
+      def each
+        @atoms.each{ |atom| yield atom }
+      end
+      #Alias to override AtomFinder#each_atom
+      alias each_atom each
+      #Sorts based on resSeq and iCode if need be
+      def <=>(other)
+        if @resSeq != other.resSeq
+          return @resSeq <=> other.resSeq
+        else
+          return @iCode <=> other.iCode
+        end
+      end
+      #Stringifies each atom
+      def to_s
+        string = ""
+        @atoms.each{ |atom| string << atom.to_s << "\n" }
+        return string
+      end
+    end
+  end
+end

data/lib/bio/db/pdb/utils.rb ADDED Viewed

@@ -0,0 +1,234 @@
+#
+# bio/db/pdb/utils.rb - Utility modules for PDB
+#
+#   Copyright (C) 2004 Alex Gutteridge <alexg@ebi.ac.uk>
+#   Copyright (C) 2004 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
+#
+#  This library is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2 of the License, or (at your option) any later version.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+#
+#  $Id: utils.rb,v 1.2 2005/09/08 01:22:11 k Exp $
+require 'matrix'
+require 'bio/db/pdb'
+module Bio; class PDB
+  module Utils
+    #The methods in this mixin should be applicalbe to all PDB objects
+    #Returns the coordinates of the geometric centre (average co-ord)
+    #of any AtomFinder (or .atoms) implementing object
+    def geometricCentre()
+      x = y = z = count = 0
+      self.each_atom{ |atom|
+        x += atom.x
+        y += atom.y
+        z += atom.z
+        count += 1
+      }
+      x = x / count
+      y = y / count
+      z = z / count
+      Coordinate[x,y,z]
+    end
+    #Returns the coords of the centre of gravity for any
+    #AtomFinder implementing object
+    #Blleurgh! - working out what element it is from the atom name is
+    #tricky - this'll work in most cases but not metals etc...
+    #a proper element field is included in some PDB files but not all.
+    ElementMass = {
+      'H' => 1,
+      'C' => 12,
+      'N' => 14,
+      'O' => 16,
+      'S' => 32,
+      'P' => 31
+    }
+    def centreOfGravity()
+      x = y = z = total = 0
+      self.each_atom{ |atom|
+        element = atom.element[0,1]
+        mass    = ElementMass[element]
+        total += mass
+        x += atom.x * mass
+        y += atom.y * mass
+        z += atom.z * mass
+      }
+      x = x / total
+      y = y / total
+      z = z / total
+      Coordinate[x,y,z]
+    end
+    #Perhaps distance and dihedral would be better off as class methods?
+    #(rather) than instance methods
+    def self.distance(coord1,coord2)
+      coord1 = to_xyz(coord1)
+      coord2 = to_xyz(coord2)
+      (coord1 - coord2).r
+    end
+    def self.dihedral_angle(coord1,coord2,coord3,coord4)
+      (a1,b1,c1,d) = calculatePlane(coord1,coord2,coord3)
+      (a2,b2,c2)   = calculatePlane(coord2,coord3,coord4)
+      torsion = acos((a1*a2 + b1*b2 + c1*c2)/(Math.sqrt(a1**2 + b1**2 + c1**2) * Math.sqrt(a2**2 + b2**2 + c2**2)))
+      if ((a1*coord4.x + b1*coord4.y + c1*coord4.z + d) < 0)
+        -torsion
+      else
+        torsion
+      end
+    end
+    #Implicit conversion into Vector or Bio::PDB::Coordinate
+    def self.to_xyz(obj)
+      unless obj.is_a?(Vector)
+        begin
+          obj = obj.xyz
+        rescue NameError
+          obj = Vector.elements(obj.to_a)
+        end
+      end
+      obj
+    end
+    #Methods required for the dihedral angle calculations
+    #perhaps these should go in some separate Math module
+    def self.rad2deg(r)
+      (r/Math::PI)*180
+    end
+    def self.acos(x)
+      Math.atan2(Math.sqrt(1 - x**2),x)
+    end
+    def self.calculatePlane(coord1,coord2,coord3)
+      a = coord1.y * (coord2.z - coord3.z) +
+          coord2.y * (coord3.z - coord1.z) +
+          coord3.y * (coord1.z - coord2.z)
+      b = coord1.z * (coord2.x - coord3.x) +
+          coord2.z * (coord3.x - coord1.x) +
+          coord3.z * (coord1.x - coord2.x)
+      c = coord1.x * (coord2.y - coord3.y) +
+          coord2.x * (coord3.y - coord1.y) +
+          coord3.x * (coord1.y - coord2.y)
+      d = -1 *
+          (
+           (coord1.x * (coord2.y * coord3.z - coord3.y * coord2.z)) +
+           (coord2.x * (coord3.y * coord1.z - coord1.y * coord3.z)) +
+           (coord3.x * (coord1.y * coord2.z - coord2.y * coord1.z))
+           )
+      return [a,b,c,d]
+    end
+    #Every class in the heirarchy implements finder, this takes
+    #a class which determines which type of object to find, the associated
+    #block is then run in classic .find style
+    def finder(findtype,&block)
+      if findtype == Bio::PDB::Atom
+        return self.find_atom(&block)
+      elsif findtype == Bio::PDB::Residue
+        return self.find_residue(&block)
+      elsif findtype == Bio::PDB::Chain
+        return self.find_chain(&block)
+      elsif findtype == Bio::PDB::Model
+        return self.find_model(&block)
+      else
+        raise TypeError, "You can't find a #{findtype}"
+      end
+    end
+  end #module Utils
+  #The *Finder modules implement a find_* method which returns
+  #an array of anything for which the block evals true
+  #(suppose Enumerable#find_all method).
+  #The each_* style methods act as classic iterators.
+  module ModelFinder
+    def find_model()
+      array = []
+      self.each_model{ |model|
+        array.push(model) if yield(model)
+      }
+      return array
+    end
+  end
+  #The heirarchical nature of the objects allow us to re-use the
+  #methods from the previous level - e.g. A PDB object can use the .models
+  #method defined in ModuleFinder to iterate through the models to find the
+  #chains
+  module ChainFinder
+    def find_chain()
+      array = []
+      self.each_chain{ |chain|
+        array.push(chain) if yield(chain)
+      }
+      return array
+    end
+    def each_chain()
+      self.each_model{ |model|
+        model.each{ |chain| yield chain }
+      }
+    end
+  end
+  module ResidueFinder
+    def find_residue()
+      array = []
+      self.each_residue{ |residue|
+        array.push(residue) if yield(residue)
+      }
+      return array
+    end
+    def each_residue()
+      self.each_chain{ |chain|
+        chain.each{ |residue| yield residue }
+      }
+    end
+  end
+  module AtomFinder
+    def find_atom()
+      array = []
+      self.each_atom{ |atom|
+        array.push(atom) if yield(atom)
+      }
+      return array
+    end
+    def each_atom()
+      self.each_residue{ |residue|
+        residue.each{ |atom| yield atom }
+      }
+    end
+  end
+end; end #module Bio; class PDB

data/lib/bio/db/prosite.rb ADDED Viewed

@@ -0,0 +1,616 @@
+#
+# = bio/db/prosite.rb - PROSITE database class
+#
+# Copyright::  Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
+# Licence::    LGPL
+#
+# $Id: prosite.rb,v 0.13 2005/12/18 18:24:08 k Exp $
+#
+# == Description
+#
+#
+# == Example
+# == References
+#--
+#
+#  This library is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2 of the License, or (at your option) any later version.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+#
+#++
+#
+require 'bio/db'
+module Bio
+  class PROSITE < EMBLDB
+    # Delimiter
+    DELIMITER = "\n//\n"
+    # Delimiter
+    RS = DELIMITER
+    # Bio::DB API
+    TAGSIZE = 5
+    def initialize(entry)
+      super(entry, TAGSIZE)
+    end
+    # ID  Identification                     (Begins each entry; 1 per entry)
+    #
+    #  ID   ENTRY_NAME; ENTRY_TYPE.  (ENTRY_TYPE : PATTERN, MATRIX, RULE)
+    #
+    # Returns
+    def name
+      unless @data['ID']
+        @data['ID'], @data['TYPE'] = fetch('ID').chomp('.').split('; ')
+      end
+      @data['ID']
+    end
+    # Returns
+    def division
+      unless @data['TYPE']
+        name
+      end
+      @data['TYPE']
+    end
+    # AC  Accession number                   (1 per entry)
+    #
+    #  AC   PSnnnnn;
+    #
+    # Returns
+    def ac
+      unless @data['AC']
+        @data['AC'] = fetch('AC').chomp(';')
+      end
+      @data['AC']
+    end
+    alias entry_id ac
+    # DT  Date                               (1 per entry)
+    #
+    #  DT   MMM-YYYY (CREATED); MMM-YYYY (DATA UPDATE); MMM-YYYY (INFO UPDATE).
+    #
+    # Returns
+    def dt
+      field_fetch('DT')
+    end
+    alias date dt
+    # DE  Short description                  (1 per entry)
+    #
+    #  DE   Description.
+    #
+    # Returns
+    def de
+      field_fetch('DE')
+    end
+    alias definition de
+    # PA  Pattern                            (>=0 per entry)
+    #
+    #  see - pa2re method
+    #
+    # Returns
+    def pa
+      field_fetch('PA')
+      @data['PA'] = fetch('PA') unless @data['PA']
+      @data['PA'].gsub!(/\s+/, '') if @data['PA']
+      @data['PA']
+    end
+    alias pattern pa
+    # MA  Matrix/profile                     (>=0 per entry)
+    #
+    #  see - ma2re method
+    #
+    # Returns
+    def ma
+      field_fetch('MA')
+    end
+    alias profile ma
+    # RU  Rule                               (>=0 per entry)
+    #
+    #  RU   Rule_Description.
+    #
+    #  The rule is described in ordinary English and is free-format.
+    #
+    # Returns
+    def ru
+      field_fetch('RU')
+    end
+    alias rule ru
+    # NR  Numerical results                  (>=0 per entry)
+    #
+    #   - SWISS-PROT scan statistics of true and false positives/negatives
+    #
+    # /RELEASE     SWISS-PROT release  number and  total  number  of  sequence
+    #              entries in that release.
+    # /TOTAL       Total number of hits in SWISS-PROT.
+    # /POSITIVE    Number of  hits on proteins that are known to belong to the
+    #              set in consideration.
+    # /UNKNOWN     Number of  hits on  proteins that  could possibly belong to
+    #              the set in consideration.
+    # /FALSE_POS   Number of false hits (on unrelated proteins).
+    # /FALSE_NEG   Number of known missed hits.
+    # /PARTIAL     Number of  partial sequences  which belong  to the  set  in
+    #              consideration, but  which  are  not  hit  by the pattern or
+    #              profile because they are partial (fragment) sequences.
+    #
+    # Returns
+    def nr
+      unless @data['NR']
+        hash = {}			# temporal hash
+        fetch('NR').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
+          if v =~ /^(\d+)\((\d+)\)$/
+            hits = $1.to_i		# the number of hits
+            seqs = $2.to_i		# the number of sequences
+            v = [hits, seqs]
+          elsif v =~ /([\d\.]+),(\d+)/
+            sprel = $1			# the number of SWISS-PROT release
+            spseq = $2.to_i		# the number of SWISS-PROT sequences
+            v = [sprel, spseq]
+          else
+            v = v.to_i
+          end
+          hash[k] = v
+        end
+        @data['NR'] = hash
+      end
+      @data['NR']
+    end
+    alias statistics nr
+    # Returns
+    def release
+      statistics['RELEASE']
+    end
+    # Returns
+    def swissprot_release_number
+      release.first
+    end
+    # Returns
+    def swissprot_release_sequences
+      release.last
+    end
+    # Returns
+    def total
+      statistics['TOTAL']
+    end
+    # Returns
+    def total_hits
+      total.first
+    end
+    # Returns
+    def total_sequences
+      total.last
+    end
+    # Returns
+    def positive
+      statistics['POSITIVE']
+    end
+    # Returns
+    def positive_hits
+      positive.first
+    end
+    # Returns
+    def positive_sequences
+      positive.last
+    end
+    # Returns
+    def unknown
+      statistics['UNKNOWN']
+    end
+    # Returns
+    def unknown_hits
+      unknown.first
+    end
+    # Returns
+    def unknown_sequences
+      unknown.last
+    end
+    # Returns
+    def false_pos
+      statistics['FALSE_POS']
+    end
+    # Returns
+    def false_positive_hits
+      false_pos.first
+    end
+    # Returns
+    def false_positive_sequences
+      false_pos.last
+    end
+    # Returns
+    def false_neg
+      statistics['FALSE_NEG']
+    end
+    alias false_negative_hits false_neg
+    # Returns
+    def partial
+      statistics['PARTIAL']
+    end
+    # CC  Comments                           (>=0 per entry)
+    #
+    #  CC   /QUALIFIER=data; /QUALIFIER=data; .......
+    #
+    # /TAXO-RANGE  Taxonomic range.
+    # /MAX-REPEAT  Maximum known  number of  repetitions of  the pattern  in a
+    #              single protein.
+    # /SITE        Indication of an `interesting' site in the pattern.
+    # /SKIP-FLAG   Indication of  an entry that can be, in some cases, ignored
+    #              by a program (because it is too unspecific).
+    #
+    # Returns
+    def cc
+      unless @data['CC']
+        hash = {}			# temporal hash
+        fetch('CC').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
+          hash[k] = v
+        end
+        @data['CC'] = hash
+      end
+      @data['CC']
+    end
+    alias comment cc
+    # Returns
+    def taxon_range(expand = nil)
+      range = comment['TAXO-RANGE']
+      if range and expand
+        expand = []
+        range.scan(/./) do |x|
+          case x
+          when 'A'; expand.push('archaebacteria')
+          when 'B'; expand.push('bacteriophages')
+          when 'E'; expand.push('eukaryotes')
+          when 'P'; expand.push('prokaryotes')
+          when 'V'; expand.push('eukaryotic viruses')
+          end
+        end
+        range = expand
+      end
+      return range
+    end
+    # Returns
+    def max_repeat
+      comment['MAX-REPEAT'].to_i
+    end
+    # Returns
+    def site
+      if comment['SITE']
+        num, desc = comment['SITE'].split(',')
+      end
+      return [num.to_i, desc]
+    end
+    # Returns
+    def skip_flag
+      if comment['SKIP-FLAG'] == 'TRUE'
+        return true
+      end
+    end
+    # DR  Cross-references to SWISS-PROT     (>=0 per entry)
+    #
+    #  DR   AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C;
+    #
+    # - `AC_NB' is the SWISS-PROT primary accession number of the entry to
+    #   which reference is being made.
+    # - `ENTRY_NAME' is the SWISS-PROT entry name.
+    # - `C' is a one character flag that can be one of the following:
+    #
+    # T For a true positive.
+    # N For a false negative; a sequence which belongs to the set under
+    #   consideration, but which has not been picked up by the pattern or
+    #   profile.
+    # P For a `potential' hit; a sequence that belongs to the set under
+    #   consideration, but which was not picked up because the region(s) that
+    #   are used as a 'fingerprint' (pattern or profile) is not yet available
+    #   in the data bank (partial sequence).
+    # ? For an unknown; a sequence which possibly could belong to the set under
+    #   consideration.
+    # F For a false positive; a sequence which does not belong to the set in
+    #   consideration.
+    #
+    # Returns
+    def dr
+      unless @data['DR']
+        hash = {}			# temporal hash
+        if fetch('DR')
+          fetch('DR').scan(/(\w+)\s*, (\w+)\s*, (.);/).each do |a, e, c|
+            hash[a] = [e, c]	# SWISS-PROT : accession, entry, true/false
+          end
+        end
+        @data['DR'] = hash
+      end
+      @data['DR']
+    end
+    alias sp_xref dr
+    # Returns
+    def list_xref(flag, by_name = nil)
+      ary = []
+      sp_xref.each do |sp_acc, value|
+        if value[1] == flag
+          if by_name
+            sp_name = value[0]
+            ary.push(sp_name)
+          else
+            ary.push(sp_acc)
+          end
+        end
+      end
+      return ary
+    end
+    # Returns
+    def list_truepositive(by_name = nil)
+      list_xref('T', by_name)
+    end
+    # Returns
+    def list_falsenegative(by_name = nil)
+      list_xref('F', by_name)
+    end
+    # Returns
+    def list_falsepositive(by_name = nil)
+      list_xref('P', by_name)
+    end
+    # Returns
+    def list_potentialhit(by_name = nil)
+      list_xref('P', by_name)
+    end
+    # Returns
+    def list_unknown(by_name = nil)
+      list_xref('?', by_name)
+    end
+    # 3D  Cross-references to PDB            (>=0 per entry)
+    #
+    #  3D   name; [name2;...]
+    #
+    # Returns
+    def pdb_xref
+      unless @data['3D']
+        @data['3D'] = fetch('3D').split(/; */)
+      end
+      @data['3D']
+    end
+    # DO  Pointer to the documentation file  (1 per entry)
+    #
+    #  DO   PDOCnnnnn;
+    #
+    # Returns
+    def pdoc_xref
+      @data['DO'] = fetch('DO').chomp(';')
+    end
+    ### prosite pattern to regular expression
+    #
+    # prosite/prosuser.txt:
+    #
+    # The PA (PAttern) lines contains the definition of a PROSITE pattern. The
+    # patterns are described using the following conventions:
+    #
+    # 0) The standard IUPAC one-letter codes for the amino acids are used.
+    # 0) Ambiguities are indicated by listing the acceptable amino acids for a
+    #   given position, between square parentheses `[ ]'. For example: [ALT]
+    #   stands for Ala or Leu or Thr.
+    # 1) A period ends the pattern.
+    # 2) When a pattern is restricted to either the N- or C-terminal of a
+    #   sequence, that pattern either starts with a `<' symbol or respectively
+    #   ends with a `>' symbol.
+    # 3) Ambiguities are also indicated by listing between a pair of curly
+    #   brackets `{ }' the amino acids that are not accepted at a given
+    #   position. For example: {AM} stands for any amino acid except Ala and
+    #   Met.
+    # 4) Repetition of an element of the pattern can be indicated by following
+    #   that element with a numerical value or a numerical range between
+    #   parenthesis. Examples: x(3) corresponds to x-x-x, x(2,4) corresponds to
+    #   x-x or x-x-x or x-x-x-x.
+    # 5) The symbol `x' is used for a position where any amino acid is accepted.
+    # 6) Each element in a pattern is separated from its neighbor by a `-'.
+    #
+    # Examples:
+    #
+    # PA  [AC]-x-V-x(4)-{ED}.
+    #
+    # This pattern is translated as: [Ala or Cys]-any-Val-any-any-any-any-{any
+    # but Glu or Asp}
+    #
+    # PA  <A-x-[ST](2)-x(0,1)-V.
+    #
+    # This pattern, which must be in the N-terminal of the sequence (`<'), is
+    # translated as: Ala-any-[Ser or Thr]-[Ser or Thr]-(any or none)-Val
+    #
+    def self.pa2re(pattern)
+      pattern.gsub!(/\s/, '')	# remove white spaces
+      pattern.sub!(/\.$/, '')	# (1) remove trailing '.'
+      pattern.sub!(/^</, '^')	# (2) restricted to the N-terminal : `<'
+      pattern.sub!(/>$/, '$')	# (2) restricted to the C-terminal : `>'
+      pattern.gsub!(/\{(\w+)\}/) { |m|
+        '[^' + $1 + ']'		# (3) not accepted at a given position : '{}'
+      }
+      pattern.gsub!(/\(([\d,]+)\)/) { |m|
+        '{' + $1 + '}'		# (4) repetition of an element : (n), (n,m)
+      }
+      pattern.tr!('x', '.')	# (5) any amino acid is accepted : 'x'
+      pattern.tr!('-', '')	# (6) each element is separated by a '-'
+      Regexp.new(pattern)
+    end
+    def pa2re(pattern)
+      self.class.pa2re(pattern)
+    end
+    ### prosite profile to regular expression
+    #
+    # prosite/profile.txt:
+    #
+    # Returns
+    def ma2re(matrix)
+      raise NotImplementedError
+    end
+  end
+end
+if __FILE__ == $0
+  begin
+    require 'pp'
+    alias p pp
+  rescue LoadError
+  end
+  ps = Bio::PROSITE.new(ARGF.read)
+  list = %w(
+    name
+    division
+    ac
+    entry_id
+    dt
+    date
+    de
+    definition
+    pa
+    pattern
+    ma
+    profile
+    ru
+    rule
+    nr
+    statistics
+    release
+    swissprot_release_number
+    swissprot_release_sequences
+    total
+    total_hits
+    total_sequences
+    positive
+    positive_hits
+    positive_sequences
+    unknown
+    unknown_hits
+    unknown_sequences
+    false_pos
+    false_positive_hits
+    false_positive_sequences
+    false_neg
+    false_negative_hits
+    partial
+    cc
+    comment
+    max_repeat
+    site
+    skip_flag
+    dr
+    sp_xref
+    pdb_xref
+    pdoc_xref
+  )
+  list.each do |method|
+    puts ">>> #{method}"
+    p ps.send(method)
+  end
+  puts ">>> taxon_range"
+  p ps.taxon_range
+  puts ">>> taxon_range(expand)"
+  p ps.taxon_range(true)
+  puts ">>> list_truepositive"
+  p ps.list_truepositive
+  puts ">>> list_truepositive(by_name)"
+  p ps.list_truepositive(true)
+  puts ">>> list_falsenegative"
+  p ps.list_falsenegative
+  puts ">>> list_falsenegative(by_name)"
+  p ps.list_falsenegative(true)
+  puts ">>> list_falsepositive"
+  p ps.list_falsepositive
+  puts ">>> list_falsepositive(by_name)"
+  p ps.list_falsepositive(true)
+  puts ">>> list_potentialhit"
+  p ps.list_potentialhit
+  puts ">>> list_potentialhit(by_name)"
+  p ps.list_potentialhit(true)
+  puts ">>> list_unknown"
+  p ps.list_unknown
+  puts ">>> list_unknown(by_name)"
+  p ps.list_unknown(true)
+end