RubyGems - bio - Versions diffs - 1.2.1 → 1.3.0 - Mend

bio 1.2.1 → 1.3.0

Files changed (259) hide show

data/ChangeLog +3421 -0
data/KNOWN_ISSUES.rdoc +88 -0
data/README.rdoc +252 -0
data/README_DEV.rdoc +285 -0
data/Rakefile +143 -0
data/bin/bioruby +0 -0
data/bin/br_biofetch.rb +0 -0
data/bin/br_bioflat.rb +12 -1
data/bin/br_biogetseq.rb +0 -0
data/bin/br_pmfetch.rb +4 -3
data/bioruby.gemspec +477 -0
data/bioruby.gemspec.erb +117 -0
data/doc/Changes-0.7.rd +7 -0
data/doc/Changes-1.3.rdoc +239 -0
data/doc/Tutorial.rd +296 -184
data/doc/Tutorial.rd.html +1031 -0
data/doc/Tutorial.rd.ja +111 -45
data/doc/Tutorial.rd.ja.html +2225 -0
data/doc/bioruby.css +281 -0
data/extconf.rb +2 -0
data/lib/bio.rb +29 -4
data/lib/bio/appl/blast.rb +306 -121
data/lib/bio/appl/blast/ddbj.rb +142 -0
data/lib/bio/appl/blast/format0.rb +35 -25
data/lib/bio/appl/blast/format8.rb +2 -2
data/lib/bio/appl/blast/genomenet.rb +263 -0
data/lib/bio/appl/blast/ncbioptions.rb +220 -0
data/lib/bio/appl/blast/remote.rb +106 -0
data/lib/bio/appl/blast/report.rb +260 -9
data/lib/bio/appl/blast/rexml.rb +12 -5
data/lib/bio/appl/blast/rpsblast.rb +277 -0
data/lib/bio/appl/blast/wublast.rb +133 -12
data/lib/bio/appl/blast/xmlparser.rb +35 -18
data/lib/bio/appl/blat/report.rb +46 -5
data/lib/bio/appl/emboss.rb +62 -13
data/lib/bio/appl/fasta.rb +9 -11
data/lib/bio/appl/genscan/report.rb +3 -3
data/lib/bio/appl/hmmer.rb +1 -1
data/lib/bio/appl/hmmer/report.rb +10 -10
data/lib/bio/appl/paml/baseml.rb +95 -0
data/lib/bio/appl/paml/baseml/report.rb +32 -0
data/lib/bio/appl/paml/codeml.rb +242 -0
data/lib/bio/appl/paml/codeml/rates.rb +67 -0
data/lib/bio/appl/paml/codeml/report.rb +67 -0
data/lib/bio/appl/paml/common.rb +348 -0
data/lib/bio/appl/paml/common_report.rb +38 -0
data/lib/bio/appl/paml/yn00.rb +103 -0
data/lib/bio/appl/paml/yn00/report.rb +32 -0
data/lib/bio/appl/psort.rb +2 -2
data/lib/bio/appl/pts1.rb +5 -5
data/lib/bio/appl/tmhmm/report.rb +10 -1
data/lib/bio/command.rb +297 -41
data/lib/bio/compat/features.rb +157 -0
data/lib/bio/compat/references.rb +128 -0
data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
data/lib/bio/db/biosql/sequence.rb +508 -0
data/lib/bio/db/embl/common.rb +28 -12
data/lib/bio/db/embl/embl.rb +107 -9
data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
data/lib/bio/db/embl/format_embl.rb +190 -0
data/lib/bio/db/embl/sptr.rb +15 -16
data/lib/bio/db/fantom.rb +6 -8
data/lib/bio/db/fasta.rb +10 -507
data/lib/bio/db/fasta/defline.rb +532 -0
data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
data/lib/bio/db/fasta/format_fasta.rb +97 -0
data/lib/bio/db/genbank/common.rb +25 -8
data/lib/bio/db/genbank/format_genbank.rb +187 -0
data/lib/bio/db/genbank/genbank.rb +36 -1
data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
data/lib/bio/db/gff.rb +1791 -119
data/lib/bio/db/kegg/glycan.rb +2 -6
data/lib/bio/db/lasergene.rb +3 -3
data/lib/bio/db/medline.rb +4 -1
data/lib/bio/db/newick.rb +10 -10
data/lib/bio/db/pdb/chain.rb +6 -2
data/lib/bio/db/pdb/pdb.rb +12 -3
data/lib/bio/db/rebase.rb +7 -8
data/lib/bio/db/soft.rb +3 -3
data/lib/bio/feature.rb +1 -88
data/lib/bio/io/biosql/biodatabase.rb +64 -0
data/lib/bio/io/biosql/bioentry.rb +29 -0
data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
data/lib/bio/io/biosql/bioentry_path.rb +12 -0
data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
data/lib/bio/io/biosql/biosequence.rb +11 -0
data/lib/bio/io/biosql/comment.rb +7 -0
data/lib/bio/io/biosql/config/database.yml +20 -0
data/lib/bio/io/biosql/dbxref.rb +13 -0
data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
data/lib/bio/io/biosql/location.rb +32 -0
data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
data/lib/bio/io/biosql/ontology.rb +10 -0
data/lib/bio/io/biosql/reference.rb +9 -0
data/lib/bio/io/biosql/seqfeature.rb +32 -0
data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
data/lib/bio/io/biosql/taxon.rb +12 -0
data/lib/bio/io/biosql/taxon_name.rb +9 -0
data/lib/bio/io/biosql/term.rb +27 -0
data/lib/bio/io/biosql/term_dbxref.rb +11 -0
data/lib/bio/io/biosql/term_path.rb +12 -0
data/lib/bio/io/biosql/term_relationship.rb +13 -0
data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
data/lib/bio/io/biosql/term_synonym.rb +10 -0
data/lib/bio/io/das.rb +7 -7
data/lib/bio/io/ddbjxml.rb +57 -0
data/lib/bio/io/ensembl.rb +2 -2
data/lib/bio/io/fetch.rb +28 -14
data/lib/bio/io/flatfile.rb +17 -853
data/lib/bio/io/flatfile/autodetection.rb +545 -0
data/lib/bio/io/flatfile/buffer.rb +237 -0
data/lib/bio/io/flatfile/index.rb +17 -7
data/lib/bio/io/flatfile/indexer.rb +30 -12
data/lib/bio/io/flatfile/splitter.rb +297 -0
data/lib/bio/io/hinv.rb +442 -0
data/lib/bio/io/keggapi.rb +2 -2
data/lib/bio/io/ncbirest.rb +733 -0
data/lib/bio/io/pubmed.rb +34 -80
data/lib/bio/io/registry.rb +2 -2
data/lib/bio/io/sql.rb +178 -357
data/lib/bio/io/togows.rb +458 -0
data/lib/bio/location.rb +106 -11
data/lib/bio/pathway.rb +120 -14
data/lib/bio/reference.rb +115 -101
data/lib/bio/sequence.rb +164 -183
data/lib/bio/sequence/adapter.rb +108 -0
data/lib/bio/sequence/common.rb +22 -45
data/lib/bio/sequence/compat.rb +2 -2
data/lib/bio/sequence/dblink.rb +54 -0
data/lib/bio/sequence/format.rb +254 -77
data/lib/bio/sequence/format_raw.rb +23 -0
data/lib/bio/shell.rb +3 -1
data/lib/bio/shell/core.rb +2 -2
data/lib/bio/shell/plugin/entry.rb +33 -4
data/lib/bio/shell/plugin/ncbirest.rb +64 -0
data/lib/bio/shell/plugin/togows.rb +40 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
data/lib/bio/tree.rb +4 -2
data/lib/bio/util/color_scheme.rb +2 -2
data/lib/bio/util/contingency_table.rb +2 -2
data/lib/bio/util/restriction_enzyme.rb +2 -2
data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
data/lib/bio/version.rb +25 -0
data/rdoc.zsh +8 -0
data/sample/any2fasta.rb +0 -0
data/sample/biofetch.rb +0 -0
data/sample/dbget +0 -0
data/sample/demo_sequence.rb +158 -0
data/sample/enzymes.rb +0 -0
data/sample/fasta2tab.rb +0 -0
data/sample/fastagrep.rb +72 -0
data/sample/fastasort.rb +54 -0
data/sample/fsplit.rb +0 -0
data/sample/gb2fasta.rb +2 -3
data/sample/gb2tab.rb +0 -0
data/sample/gbtab2mysql.rb +0 -0
data/sample/genes2nuc.rb +0 -0
data/sample/genes2pep.rb +0 -0
data/sample/genes2tab.rb +0 -0
data/sample/genome2rb.rb +0 -0
data/sample/genome2tab.rb +0 -0
data/sample/goslim.rb +0 -0
data/sample/gt2fasta.rb +0 -0
data/sample/na2aa.rb +34 -0
data/sample/pmfetch.rb +0 -0
data/sample/pmsearch.rb +0 -0
data/sample/ssearch2tab.rb +0 -0
data/sample/tfastx2tab.rb +0 -0
data/sample/vs-genes.rb +0 -0
data/setup.rb +1596 -0
data/test/data/blast/blastp-multi.m7 +188 -0
data/test/data/command/echoarg2.bat +1 -0
data/test/data/paml/codeml/control_file.txt +30 -0
data/test/data/paml/codeml/output.txt +78 -0
data/test/data/paml/codeml/rates +217 -0
data/test/data/rpsblast/misc.rpsblast +193 -0
data/test/data/soft/GDS100_partial.soft +0 -0
data/test/data/soft/GSE3457_family_partial.soft +0 -0
data/test/functional/bio/appl/test_pts1.rb +115 -0
data/test/functional/bio/io/test_ensembl.rb +123 -80
data/test/functional/bio/io/test_togows.rb +267 -0
data/test/functional/bio/sequence/test_output_embl.rb +51 -0
data/test/functional/bio/test_command.rb +301 -0
data/test/runner.rb +17 -1
data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
data/test/unit/bio/appl/blast/test_report.rb +753 -35
data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
data/test/unit/bio/appl/test_blast.rb +135 -4
data/test/unit/bio/appl/test_fasta.rb +2 -2
data/test/unit/bio/appl/test_pts1.rb +1 -64
data/test/unit/bio/db/embl/test_common.rb +15 -15
data/test/unit/bio/db/embl/test_embl.rb +4 -4
data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
data/test/unit/bio/db/embl/test_sptr.rb +38 -1
data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
data/test/unit/bio/db/test_gff.rb +1151 -25
data/test/unit/bio/db/test_medline.rb +127 -0
data/test/unit/bio/db/test_nexus.rb +5 -1
data/test/unit/bio/db/test_prosite.rb +4 -4
data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
data/test/unit/bio/io/test_ddbjxml.rb +8 -3
data/test/unit/bio/io/test_fastacmd.rb +5 -5
data/test/unit/bio/io/test_flatfile.rb +357 -106
data/test/unit/bio/io/test_soapwsdl.rb +2 -2
data/test/unit/bio/io/test_togows.rb +161 -0
data/test/unit/bio/sequence/test_common.rb +210 -11
data/test/unit/bio/sequence/test_compat.rb +3 -3
data/test/unit/bio/sequence/test_dblink.rb +58 -0
data/test/unit/bio/sequence/test_na.rb +2 -2
data/test/unit/bio/test_command.rb +111 -50
data/test/unit/bio/test_feature.rb +29 -1
data/test/unit/bio/test_location.rb +566 -6
data/test/unit/bio/test_pathway.rb +91 -65
data/test/unit/bio/test_reference.rb +67 -13
data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
metadata +202 -167
data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388

@@ -1,15 +1,16 @@
 #
 # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
 #
-# Copyright::  Copyright (C) 2001, 2007 Toshiaki Katayama <k@bioruby.org>
+# Copyright::  Copyright (C) 2001, 2007, 2008 Toshiaki Katayama <k@bioruby.org>
 # Copyright::  Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
 # License::    The Ruby License
 #
-# $Id: pubmed.rb,v 1.23 2007/12/12 13:53:26 k Exp $
+# $Id:$
 #
+require 'bio/io/ncbirest'
 require 'bio/command'
-require 'cgi' unless defined?(CGI)
+require 'cgi'
 module Bio
@@ -68,29 +69,7 @@ module Bio
 #   manuscript = Bio::PubMed.query("10592173")
 #   medline = Bio::MEDLINE.new(manuscript)
 #
-class PubMed
-  # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
-  # weekdays for any series of more than 100 requests.
-  # -> Not implemented yet in BioRuby
-  # Make no more than one request every 3 seconds.
-  NCBI_INTERVAL = 3
-  @@last_access = nil
-  private
-  def ncbi_access_wait(wait = NCBI_INTERVAL)
-    if @@last_access
-      duration = Time.now - @@last_access
-      if wait > duration
-        sleep wait - duration
-      end
-    end
-    @@last_access = Time.now
-  end
-  public
+class PubMed < Bio::NCBI::REST
   # Search the PubMed database by given keywords using E-Utils and returns
   # an array of PubMed IDs.
@@ -99,39 +78,22 @@ class PubMed
   # http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed
   # ---
   # *Arguments*:
-  # * _id_: query string (required)
-  # * _field_
-  # * _reldate_
-  # * _mindate_
-  # * _maxdate_
-  # * _datetype_
-  # * _retstart_
-  # * _retmax_ (default 100)
-  # * _retmode_
-  # * _rettype_
+  # * _str_: query string (required)
+  # * _hash_: hash of E-Utils options
+  #   * _retmode_: "xml", "html", ...
+  #   * _rettype_: "medline", ...
+  #   * _retmax_: integer (default 100)
+  #   * _retstart_: integer
+  #   * _field_
+  #   * _reldate_
+  #   * _mindate_
+  #   * _maxdate_
+  #   * _datetype_
   # *Returns*:: array of PubMed IDs or a number of results
   def esearch(str, hash = {})
-    return nil if str.empty?
-    serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
-    opts = {
-      "retmax" => 100,
-      "tool"   => "bioruby",
-      "db"     => "pubmed",
-      "term"   => str
-    }
+    opts = { "db" => "pubmed" }
     opts.update(hash)
-    ncbi_access_wait
-    response, = Bio::Command.post_form(serv, opts)
-    result = response.body
-    if opts['rettype'] == 'count'
-      result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
-    else
-      result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
-    end
-    return result
+    super(str, opts)
   end
   # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
@@ -141,29 +103,21 @@ class PubMed
   # ---
   # *Arguments*:
   # * _ids_: list of PubMed IDs (required)
+  # * _hash_: hash of E-Utils options
+  #   * _retmode_: "xml", "html", ...
+  #   * _rettype_: "medline", ...
+  #   * _retmax_: integer (default 100)
+  #   * _retstart_: integer
+  #   * _field_
+  #   * _reldate_
+  #   * _mindate_
+  #   * _maxdate_
+  #   * _datetype_
   # *Returns*:: Array of MEDLINE formatted String
   def efetch(ids, hash = {})
-    return nil if ids.to_s.empty?
-    ids = ids.join(",") if ids === Array
-    serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
-    opts = {
-      "tool"     => "bioruby",
-      "db"       => "pubmed",
-      "retmode"  => "text",
-      "rettype"  => "medline",
-      "id"       => ids,
-    }
+    opts = { "db" => "pubmed", "rettype"  => "medline" }
     opts.update(hash)
-    ncbi_access_wait
-    response, = Bio::Command.post_form(serv, opts)
-    result = response.body
-    if opts["retmode"] == "text"
-      result = result.split(/\n\n+/)
-    end
-    return result
+    super(ids, opts)
   end
   # Search the PubMed database by given keywords using entrez query and returns
@@ -180,7 +134,7 @@ class PubMed
     ncbi_access_wait
     http = Bio::Command.new_http(host)
-    response, = http.get(path + CGI.escape(str))
+    response = http.get(path + CGI.escape(str))
     result = response.body
     result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
     return result
@@ -195,12 +149,12 @@ class PubMed
   def query(*ids)
     host = "www.ncbi.nlm.nih.gov"
     path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
-    list = ids.join(",")
+    list = ids.collect { |x| CGI.escape(x.to_s) }.join(",")
     ncbi_access_wait
     http = Bio::Command.new_http(host)
-    response, = http.get(path + list)
+    response = http.get(path + list)
     result = response.body
     result = result.scan(/<pre>\s*(.*?)<\/pre>/m).flatten
@@ -229,7 +183,7 @@ class PubMed
     ncbi_access_wait
     http = Bio::Command.new_http(host)
-    response, = http.get(path + id.to_s)
+    response = http.get(path + CGI.escape(id.to_s))
     result = response.body
     if result =~ /#{id}\s+Error/
       raise( result )

data/lib/bio/io/registry.rb CHANGED

@@ -5,7 +5,7 @@
 #               Toshiaki Katayama <k@bioruby.org>
 # License::     The Ruby License
 #
-# $Id: registry.rb,v 1.19 2007/04/05 23:35:41 trevor Exp $
+# $Id:$
 #
 # == Description
 #
@@ -172,7 +172,7 @@ class Registry
   def read_remote(url)
     schema, user, host, port, reg, path, = URI.split(url)
     Bio::Command.start_http(host, port) do |http|
-      response, = http.get(path)
+      response = http.get(path)
       parse_stanza(response.body)
     end
   end

data/lib/bio/io/sql.rb CHANGED

@@ -1,365 +1,186 @@
-#
-# = bio/io/sql.rb - BioSQL access module
-#
-# Copyright::  Copyright (C) 2002 Toshiaki Katayama <k@bioruby.org>
-# Copyright::  Copyright (C) 2006 Raoul Jean Pierre Bonnal <raoul.bonnal@itb.cnr.it>
-# License::    The Ruby License
-#
-# $Id: sql.rb,v 1.8 2007/04/05 23:35:41 trevor Exp $
-#
-begin
-  require 'dbi'
-rescue LoadError
-end
-require 'bio/sequence'
-require 'bio/feature'
-module Bio
-class SQL
-  def initialize(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
-    @dbh = DBI.connect(db, user, pass)
-  end
-  def close
-    @dbh.disconnect
-  end
-  # Returns Bio::SQL::Sequence object.
-  def fetch(accession)	# or display_id for fall back
-    query = "select * from bioentry where accession = ?"
-    entry = @dbh.execute(query, accession).fetch
-    return Sequence.new(@dbh, entry) if entry
-    query = "select * from bioentry where display_id = ?"
-    entry = @dbh.execute(query, accession).fetch
-    return Sequence.new(@dbh, entry) if entry
-  end
-  alias get_by_id fetch
-  # for lazy fetching
-  class Sequence
-    def initialize(dbh, entry)
-      @dbh = dbh
-      @bioentry_id = entry['bioentry_id']
-      @database_id = entry['biodatabase_id']
-      @entry_id = entry['display_id']
-      @accession = entry['accession']
-      @version = entry['entry_version']
-      @division = entry['division']
-    end
-    attr_reader :accession, :division, :entry_id, :version
-    def to_fasta
-      if seq = seq
-        return seq.to_fasta(@accession)
-      end
-    end
-    # Returns Bio::Sequence::NA or AA object.
-    def seq
-      query = "select * from biosequence where bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      return unless row
-      mol = row['alphabet']
-      seq = row['seq']
-      case mol
-      when /.na/i			# 'dna' or 'rna'
-        Bio::Sequence::NA.new(seq)
-      else				# 'protein'
-        Bio::Sequence::AA.new(seq)
-      end
-    end
-    # Returns Bio::Sequence::NA or AA object (by lazy fetching).
-    def subseq(from, to)
-      length = to - from + 1
-      query = "select alphabet, substring(seq, ?, ?) as subseq" +
-              " from biosequence where bioentry_id = ?"
-      row = @dbh.execute(query, from, length, @bioentry_id).fetch
-      return unless row
-      mol = row['alphabet']
-      seq = row['subseq']
-      case mol
-      when /.na/i			# 'dna' or 'rna'
-        Bio::Sequence::NA.new(seq)
-      else				# 'protein'
-        Bio::Sequence::AA.new(seq)
-      end
-    end
-    # Returns Bio::Features object.
-    def features
-      array = []
-      query = "select * from seqfeature where bioentry_id = ?"
-      @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
-        next unless row
-        f_id = row['seqfeature_id']
-        k_id = row['type_term_id']
-        s_id = row['source_term_id']
-        rank = row['rank'].to_i - 1
-        # key : type (gene, CDS, ...)
-        type = feature_key(k_id)
-        # source : database (EMBL/GenBank/SwissProt)
-        database = feature_source(s_id)
-        # location : position
-        locations = feature_locations(f_id)
-        # qualifier
-        qualifiers = feature_qualifiers(f_id)
-        # rank
-        array[rank] = Bio::Feature.new(type, locations, qualifiers)
-      end
-      return Bio::Features.new(array)
-    end
-    # Returns reference informations in Array of Hash (not Bio::Reference).
-    def references
-      array = []
-      query = <<-END
-        select * from bioentry_reference, reference
-        where bioentry_id = ? and
-        bioentry_reference.reference_id = reference.reference_id
-      END
-      @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
-        next unless row
-        hash = {
-          'start'	=> row['start_pos'],
-          'end'		=> row['end_pos'],
-          'journal'	=> row['location'],
-          'title'	=> row['title'],
-          'authors'	=> row['authors'],
-          'medline'	=> row['crc']
-        }
-        hash.default = ''
-        rank = row['rank'].to_i - 1
-        array[rank] = hash
-      end
-      return array
-    end
-    # Returns the first comment.  For complete comments, use comments method.
-    def comment
-      query = "select * from comment where bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      row ? row['comment_text'] : ''
-    end
-    # Returns comments in an Array of Strings.
-    def comments
-      array = []
-      query = "select * from comment where bioentry_id = ?"
-      @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
-        next unless row
-        rank = row['rank'].to_i - 1
-        array[rank] = row['comment_text']
-      end
-      return array
-    end
-    def database
-      query = "select * from biodatabase where biodatabase_id = ?"
-      row = @dbh.execute(query, @database_id).fetch
-      row ? row['name'] : ''
-    end
-    def date
-      query = "select * from bioentry_date where bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      row ? row['date'] : ''
-    end
-    def dblink
-      query = "select * from bioentry_direct_links where source_bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      row ? [row['dbname'], row['accession']] : []
-    end
-    def definition
-      query = "select * from bioentry_description where bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      row ? row['description'] : ''
-    end
-    def keyword
-      query = "select * from bioentry_keywords where bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      row ? row['keywords'] : ''
-    end
-    # Use lineage, common_name, ncbi_taxa_id methods to extract in detail.
-    def taxonomy
-      query = <<-END
-        select taxon_name.name, taxon.ncbi_taxon_id from bioentry
-        join taxon_name using(taxon_id) join taxon using (taxon_id)
-        where bioentry_id = ?
-      END
-      row = @dbh.execute(query, @bioentry_id).fetch
-#     @lineage = row ? row['full_lineage'] : ''
-      @common_name = row ? row['name'] : ''
-      @ncbi_taxa_id = row ? row['ncbi_taxon_id'] : ''
-      row ? [@lineage, @common_name, @ncbi_taxa_id] : []
-    end
-    def lineage
-      taxonomy unless @lineage
-      return @lineage
-    end
-    def common_name
-      taxonomy unless @common_name
-      return @common_name
-    end
-    def ncbi_taxa_id
-      taxonomy unless @ncbi_taxa_id
-      return @ncbi_taxa_id
-    end
-    private
-    def feature_key(k_id)
-      query = "select * from term where term_id= ?"
-      row = @dbh.execute(query, k_id).fetch
-      row ? row['name'] : ''
-    end
-    def feature_source(s_id)
-      query = "select * from term where term_id = ?"
-      row = @dbh.execute(query, s_id).fetch
-      row ? row['name'] : ''
-    end
-    def feature_locations(f_id)
-      locations = []
-      query = "select * from location where seqfeature_id = ?"
-      @dbh.execute(query, f_id).fetch_all.each do |row|
-        next unless row
+require 'rubygems'
+require 'erb'
+require 'composite_primary_keys'
+# BiosqlPlug
-        location = Bio::Location.new
-        location.strand = row['strand']
-        location.from = row['start_pos']
-        location.to = row['end_pos']
+=begin
+Ok Hilmar gives to me some clarification
+1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
+   If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
-        xref = feature_locations_remote(row['dbxref_if'])
-        location.xref_id = xref.shift unless xref.empty?
-        # just omit fuzzy location for now...
-        #feature_locations_qv(row['seqfeature_location_id'])
-        rank = row['rank'].to_i - 1
-        locations[rank] = location
-      end
-      return Bio::Locations.new(locations)
-    end
-    def feature_locations_remote(l_id)
-      query = "select * from  dbxref where dbxref_id = ?"
-      row = @dbh.execute(query, l_id).fetch
-      row ? [row['accession'], row['version']] : []
-    end
-    def feature_locations_qv(l_id)
-      query = "select * from location_qualifier_value where location_id = ?"
-      row = @dbh.execute(query, l_id).fetch
-      row ? [row['value'], row['int_value']] : []
-    end
-    def feature_qualifiers(f_id)
-      qualifiers = []
-      query = "select * from seqfeature_qualifier_value where seqfeature_id = ?"
-      @dbh.execute(query, f_id).fetch_all.each do |row|
-        next unless row
-        key = feature_qualifiers_key(row['seqfeature_id'])
-        value = row['value']
-        qualifier = Bio::Feature::Qualifier.new(key, value)
-        rank = row['rank'].to_i - 1
-        qualifiers[rank] = qualifier
-      end
-      return qualifiers.compact	# .compact is nasty hack for a while
-    end
-    def feature_qualifiers_key(q_id)
-      query = <<-END
-        select * from seqfeature_qualifier_value
-        join term using(term_id) where seqfeature_id = ?
-      END
-      row = @dbh.execute(query, q_id).fetch
-      row ? row['name'] : ''
-    end
-  end
-end # SQL
-end # Bio
+=end
+=begin
+TODO:
+1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
+2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
+3) Chk Locations in Biofeatures ArSQL
+=end
+module Bio
+  class SQL
+    #no check is made
+    def self.establish_connection(configurations, env)
+      #configurations is an hash similar what YAML returns.
+      #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
+      configurations.assert_valid_keys('development', 'production','test')
+      configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
+      DummyBase.configurations = configurations
+      DummyBase.establish_connection "#{env}"
+    end
+    def self.fetch_id(id)
+      Bio::SQL::Bioentry.find(id)
+    end
+    def self.fetch_accession(accession)
+      accession = accession.upcase
+      Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
+    end
+    def self.exists_accession(accession)
+      Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
+    end
+    def self.exists_database(name)
+      Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
+    end
+    def self.list_entries
+      Bio::SQL::Bioentry.find(:all).collect{|entry|
+        {:id=>entry.bioentry_id, :accession=>entry.accession}
+      }
+    end
+    def self.list_databases
+      Bio::SQL::Biodatabase.find(:all).collect{|entry|
+        {:id=>entry.biodatabase_id, :name => entry.name}
+      }
+    end
+    def self.delete_entry_id(id)
+      Bioentry.delete(id)
+    end
+    def self.delete_entry_accession(accession)
+      Bioentry.delete(Bioentry.find_by_accession(accession))
+    end
+    class DummyBase <  ActiveRecord::Base
+      #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
+      #NOTE: this class will not establish the connection automatically
+      self.abstract_class = true
+      self.pluralize_table_names = false
+      #prepend table name to the usual id, avoid to specify primary id for every table
+      self.primary_key_prefix_type = :table_name_with_underscore
+      #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
+      #self.configurations=biosql_configurations
+      #self.establish_connection "development"
+    end #DummyBase
+    autoload :Biodatabase, 'bio/io/biosql/biodatabase'
+    autoload :Bioentry, 'bio/io/biosql/bioentry'
+    autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
+    autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
+    autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
+    autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
+    autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
+    autoload :Biosequence, 'bio/io/biosql/biosequence'
+    autoload :Comment, 'bio/io/biosql/comment'
+    autoload :Dbxref, 'bio/io/biosql/dbxref'
+    autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
+    autoload :Location, 'bio/io/biosql/location'
+    autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
+    autoload :Ontology, 'bio/io/biosql/ontology'
+    autoload :Reference, 'bio/io/biosql/reference'
+    autoload :Seqfeature, 'bio/io/biosql/seqfeature'
+    autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
+    autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
+    autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
+    autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
+    autoload :Taxon, 'bio/io/biosql/taxon'
+    autoload :TaxonName, 'bio/io/biosql/taxon_name'
+    autoload :Term, 'bio/io/biosql/term'
+    autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
+    autoload :TermPath, 'bio/io/biosql/term_path'
+    autoload :TermRelationship, 'bio/io/biosql/term_relationship'
+    autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
+    autoload :Sequence, 'bio/db/biosql/sequence'
+  end #biosql
+end #Bio
 if __FILE__ == $0
-  begin
-    require 'pp'
-    alias p pp
-  rescue LoadError
+  require 'rubygems'
+  require 'composite_primary_keys'
+  require 'bio'
+  require 'pp'
+  #  pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
+  connection = Bio::SQL.establish_connection({'development'=>{'database'=>"bio_test", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
+  #pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
+  if true
+    #Bio::SQL.list_entries
+#  	biosequence = data.to_biosequence
+#	puts biosequence.output(:genbank)
+	db=Bio::SQL::Biodatabase.new(:name=>'JEFF', :authority=>'ME', :description=>'YOU')
+	db.save!
+    puts "### FileFile.auto"
+    if ARGV.size > 0
+	#embl = Bio::FlatFile.auto(ARGF.read)
+	Bio::FlatFile.auto(ARGF) do |ff|
+		ff.each do |data|
+			biosequence=data.to_biosequence
+			puts biosequence.output(:fasta)
+			sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
+			sqlseq.save
+			sqlseq.to_biosequence.output(:fasta)
+		end
+	end
+    else
+	require 'bio/io/fetch'
+	server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
+	data = Bio::EMBL.new(server.fetch('embl','AJ224123'))
+    end
+#	sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
+#	sqlseq.save
+#	sqlseq_bioseq=sqlseq.to_biosequence
+#	puts sqlseq_bioseq.output(:genbank)
+	#    bioseq = Bio::SQL.fetch_accession('AJ224122')
+	#    pp bioseq
+	#    pp bioseq.entry_id
+    #TODO create a test only for tables not sequence here
+#    pp bioseq.molecule_type
+    #pp  bioseq.molecule_type.class
+    #bioseq.molecule_type_update('dna', 1)
+##    pp Bio::SQL::Taxon.find(8121).taxon_names
+	    #sqlseq.to_biosequence
+#	sqlseq.delete
+#	db.destroy
   end
-  db = ARGV.empty? ? 'dbi:Mysql:database=biosql;host=localhost' : ARGV.shift
-  serv = Bio::SQL.new(db, 'root')
-  ent0 = serv.fetch('X76706')
-  ent0 = serv.fetch('A15H9FIB')
-  ent1 = serv.fetch('J01902')
-  ent2 = serv.fetch('X04311')
-  pp ent0.features
-  pp ent0.references
-  pp ent1.seq
-  pp ent1.seq.translate
-  pp ent1.seq.gc
-  pp ent1.subseq(1,20)
-  pp ent2.accession
-  pp ent2.comment
-  pp ent2.comments
-  pp ent2.common_name
-  pp ent2.database
-  pp ent2.date
-  pp ent2.dblink
-  pp ent2.definition
-  pp ent2.division
-  pp ent2.entry_id
-  pp ent2.features
-  pp ent2.keyword
-  pp ent2.lineage
-  pp ent2.ncbi_taxa_id
-  pp ent2.references
-  pp ent2.seq
-  pp ent2.subseq(1,10)
-  pp ent2.taxonomy
-  pp ent2.version
+  #pp  bioseq.molecule_type
+  #term = Bio::SQL::Term.find_by_name('mol_type')
+  #pp term
+  #pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
+  #pp bioseq.entry.bioentry_qualifier_values.inspect
+  #pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
+  #pp primo.class
+  #  pp primo.value='dna'
+  #  pp primo.save
+  #pp bioseq.molecule_type= 'prova'
+  #Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
 end