RubyGems - bio - Versions diffs - 1.1.0 → 1.2.0 - Mend

bio 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

data/bin/bioruby +4 -3
data/lib/bio.rb +3 -3
data/lib/bio/appl/blast/format0.rb +3 -2
data/lib/bio/appl/blast/format8.rb +5 -3
data/lib/bio/db/kegg/compound.rb +6 -1
data/lib/bio/db/kegg/enzyme.rb +3 -3
data/lib/bio/db/kegg/genes.rb +2 -2
data/lib/bio/db/kegg/glycan.rb +5 -5
data/lib/bio/db/kegg/orthology.rb +27 -3
data/lib/bio/db/newick.rb +203 -55
data/lib/bio/io/flatfile.rb +2 -2
data/lib/bio/io/flatfile/indexer.rb +2 -2
data/lib/bio/io/keggapi.rb +2 -1
data/lib/bio/io/pubmed.rb +223 -81
data/lib/bio/sequence/common.rb +6 -3
data/lib/bio/shell/interface.rb +2 -2
data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +5 -5
data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +7 -8
data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +1 -1
data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +21 -17
data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/spinner.gif +0 -0
data/test/functional/bio/io/test_ensembl.rb +87 -4
data/test/unit/bio/db/test_newick.rb +238 -1
data/test/unit/bio/sequence/test_aa.rb +3 -2
data/test/unit/bio/sequence/test_common.rb +11 -2
data/test/unit/bio/sequence/test_na.rb +63 -1
metadata +4 -4
data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0

data/lib/bio/io/flatfile.rb CHANGED

@@ -5,7 +5,7 @@
 #
 # License:: The Ruby License
 #
-#  $Id: flatfile.rb,v 1.60 2007/07/09 14:08:34 ngoto Exp $
+#  $Id: flatfile.rb,v 1.61 2007/11/15 07:07:16 k Exp $
 #
 #
 # Bio::FlatFile is a helper and wrapper class to read a biological data file.
@@ -1130,7 +1130,7 @@ module Bio
           genpept  = RuleRegexp[ 'Bio::GenPept',
             /^LOCUS       .+ aa .+/ ],
           medline  = RuleRegexp[ 'Bio::MEDLINE',
-            /^UI  \- [0-9]+$/ ],
+            /^PMID\- [0-9]+$/ ],
           embl     = RuleRegexp[ 'Bio::EMBL',
             /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],
           sptr     = RuleRegexp2[ 'Bio::SPTR',

data/lib/bio/io/flatfile/indexer.rb CHANGED

@@ -4,7 +4,7 @@
 # Copyright:: Copyright (C) 2002 GOTO Naohisa <ng@bioruby.org>
 # License::   The Ruby License
 #
-#  $Id: indexer.rb,v 1.25 2007/04/05 23:35:41 trevor Exp $
+#  $Id: indexer.rb,v 1.26 2007/12/11 15:13:32 ngoto Exp $
 #
 require 'bio/io/flatfile/index'
@@ -714,7 +714,7 @@ module Bio
     ##############################################################
     def self.formatstring2class(format_string)
-      case format
+      case format_string
       when /genbank/i
         dbclass = Bio::GenBank
       when /genpept/i

data/lib/bio/io/keggapi.rb CHANGED

@@ -4,7 +4,7 @@
 # Copyright::  Copyright (C) 2003, 2004 Toshiaki Katayama <k@bioruby.org>
 # License::    The Ruby License
 #
-# $Id: keggapi.rb,v 1.14 2007/04/05 23:35:41 trevor Exp $
+# $Id: keggapi.rb,v 1.15 2007/07/20 21:56:45 k Exp $
 #
 require 'bio/io/soapwsdl'
@@ -331,6 +331,7 @@ class API < Bio::SOAPWSDL
   def add_filter(results)
     if results.is_a?(Array)
       results.each do |result|
+	next if result.is_a?(Fixnum)
         def result.filter(fields)
           fields.collect { |field| self.send(field) }
         end

data/lib/bio/io/pubmed.rb CHANGED

@@ -1,16 +1,15 @@
 #
 # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
 #
-# Copyright::  Copyright (C) 2001 Toshiaki Katayama <k@bioruby.org>
+# Copyright::  Copyright (C) 2001, 2007 Toshiaki Katayama <k@bioruby.org>
 # Copyright::  Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
 # License::    The Ruby License
 #
-# $Id: pubmed.rb,v 1.16 2007/04/05 23:35:41 trevor Exp $
+# $Id: pubmed.rb,v 1.23 2007/12/12 13:53:26 k Exp $
 #
-require 'net/http'
-require 'cgi' unless defined?(CGI)
 require 'bio/command'
+require 'cgi' unless defined?(CGI)
 module Bio
@@ -18,18 +17,19 @@ module Bio
 #
 # The Bio::PubMed class provides several ways to retrieve bibliographic
 # information from the PubMed database at
-# http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two
-# types of queries are possible:
+#   http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed
+#
+# Basically, two types of queries are possible:
 #
 # * searching for PubMed IDs given a query string:
-#   * Bio::PubMed#search
-#   * Bio::PubMed#esearch
+#   * Bio::PubMed#esearch  (recommended)
+#   * Bio::PubMed#search   (only retrieves top 20 hits)
 #
 # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...)
 #   given a PubMed ID
-#   * Bio::PubMed#query
-#   * Bio::PubMed#pmfetch
-#   * Bio::PubMed#efetch
+#   * Bio::PubMed#efetch   (recommended)
+#   * Bio::PubMed#query    (unstable for the change of the HTML design)
+#   * Bio::PubMed#pmfetch  (still working but could be obsoleted by NCBI)
 #
 # The different methods within the same group are interchangeable and should
 # return the same result.
@@ -37,54 +37,61 @@ module Bio
 # Additional information about the MEDLINE format and PubMed programmable
 # APIs can be found on the following websites:
 #
-# * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
-# * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
-# * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat
-# * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags
-# * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
-# * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html
-# * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
+# * PubMed Overview:
+#     http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
+# * PubMed help:
+#     http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
+# * Entrez utilities index:
+#      http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
+# * How to link:
+#     http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp
 #
 # == Usage
 #
 #   require 'bio'
 #
 #   # If you don't know the pubmed ID:
-#   Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+#   Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
 #     p x
 #   end
-#   Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
+#
+#   Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x|
 #     p x
 #   end
 #
 #   # To retrieve the MEDLINE entry for a given PubMed ID:
+#   puts Bio::PubMed.efetch("10592173", "14693808")
 #   puts Bio::PubMed.query("10592173")
 #   puts Bio::PubMed.pmfetch("10592173")
-#   puts Bio::PubMed.efetch("10592173", "14693808")
+#
 #   # This can be converted into a Bio::MEDLINE object:
 #   manuscript = Bio::PubMed.query("10592173")
-#   medline = Bio::MEDLINE(manuscript)
+#   medline = Bio::MEDLINE.new(manuscript)
 #
 class PubMed
-  # Search the PubMed database by given keywords using entrez query and returns
-  # an array of PubMed IDs.
-  # ---
-  # *Arguments*:
-  # * _id_: query string (required)
-  # *Returns*:: array of PubMed IDs
-  def self.search(str)
-    host = "www.ncbi.nlm.nih.gov"
-    path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term="
+  # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
+  # weekdays for any series of more than 100 requests.
+  # -> Not implemented yet in BioRuby
-    http = Bio::Command.new_http(host)
-    response, = http.get(path + CGI.escape(str))
-    result = response.body
-    result = result.gsub("\r", "\n").squeeze("\n")
-    result = result.scan(/<pre>(.*?)<\/pre>/m).flatten
-    return result
+  # Make no more than one request every 3 seconds.
+  NCBI_INTERVAL = 3
+  @@last_access = nil
+  private
+  def ncbi_access_wait(wait = NCBI_INTERVAL)
+    if @@last_access
+      duration = Time.now - @@last_access
+      if wait > duration
+        sleep wait - duration
+      end
+    end
+    @@last_access = Time.now
   end
+  public
   # Search the PubMed database by given keywords using E-Utils and returns
   # an array of PubMed IDs.
   #
@@ -102,22 +109,80 @@ class PubMed
   # * _retmax_ (default 100)
   # * _retmode_
   # * _rettype_
-  # *Returns*:: array of PubMed IDs
-  def self.esearch(str, hash = {})
-    hash['retmax'] = 100 unless hash['retmax']
+  # *Returns*:: array of PubMed IDs or a number of results
+  def esearch(str, hash = {})
+    return nil if str.empty?
-    opts = []
-    hash.each do |k, v|
-      opts << "#{k}=#{v}"
+    serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+    opts = {
+      "retmax" => 100,
+      "tool"   => "bioruby",
+      "db"     => "pubmed",
+      "term"   => str
+    }
+    opts.update(hash)
+    ncbi_access_wait
+    response, = Bio::Command.post_form(serv, opts)
+    result = response.body
+    if opts['rettype'] == 'count'
+      result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
+    else
+      result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
     end
+    return result
+  end
+  # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+  # entrez efetch. Multiple PubMed IDs can be provided:
+  #   Bio::PubMed.efetch(123)
+  #   Bio::PubMed.efetch([123,456,789])
+  # ---
+  # *Arguments*:
+  # * _ids_: list of PubMed IDs (required)
+  # *Returns*:: Array of MEDLINE formatted String
+  def efetch(ids, hash = {})
+    return nil if ids.to_s.empty?
+    ids = ids.join(",") if ids === Array
+    serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
+    opts = {
+      "tool"     => "bioruby",
+      "db"       => "pubmed",
+      "retmode"  => "text",
+      "rettype"  => "medline",
+      "id"       => ids,
+    }
+    opts.update(hash)
+    ncbi_access_wait
+    response, = Bio::Command.post_form(serv, opts)
+    result = response.body
+    if opts["retmode"] == "text"
+      result = result.split(/\n\n+/)
+    end
+    return result
+  end
+  # Search the PubMed database by given keywords using entrez query and returns
+  # an array of PubMed IDs. Caution: this method returns the first 20 hits only.
+  # Instead, use of the 'esearch' method is strongly recomended.
+  # ---
+  # *Arguments*:
+  # * _id_: query string (required)
+  # *Returns*:: array of PubMed IDs
+  def search(str)
+    host = "www.ncbi.nlm.nih.gov"
+    path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
-    host = "eutils.ncbi.nlm.nih.gov"
-    path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
+    ncbi_access_wait
     http = Bio::Command.new_http(host)
     response, = http.get(path + CGI.escape(str))
     result = response.body
-    result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
+    result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
     return result
   end
@@ -127,18 +192,27 @@ class PubMed
   # *Arguments*:
   # * _id_: PubMed ID (required)
   # *Returns*:: MEDLINE formatted String
-  def self.query(id)
+  def query(*ids)
     host = "www.ncbi.nlm.nih.gov"
-    path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
+    path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
+    list = ids.join(",")
+    ncbi_access_wait
     http = Bio::Command.new_http(host)
-    response, = http.get(path + id.to_s)
+    response, = http.get(path + list)
     result = response.body
-    if result =~ /#{id}\s+Error/
+    result = result.scan(/<pre>\s*(.*?)<\/pre>/m).flatten
+    if result =~ /id:.*Error occurred/
+      # id: xxxxx Error occurred: Article does not exist
       raise( result )
     else
-      result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
-      return result
+      if ids.size > 1
+        return result
+      else
+        return result.first
+      end
     end
   end
@@ -148,10 +222,12 @@ class PubMed
   # *Arguments*:
   # * _id_: PubMed ID (required)
   # *Returns*:: MEDLINE formatted String
-  def self.pmfetch(id)
+  def pmfetch(id)
     host = "www.ncbi.nlm.nih.gov"
     path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
+    ncbi_access_wait
     http = Bio::Command.new_http(host)
     response, = http.get(path + id.to_s)
     result = response.body
@@ -163,28 +239,24 @@ class PubMed
     end
   end
-  # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
-  # entrez efetch. Multiple PubMed IDs can be provided:
-  #   Bio::PubMed.efetch(123)
-  #   Bio::PubMed.efetch(123,456,789)
-  #   Bio::PubMed.efetch([123,456,789])
-  # ---
-  # *Arguments*:
-  # * _ids_: list of PubMed IDs (required)
-  # *Returns*:: MEDLINE formatted String
-  def self.efetch(*ids)
-    return [] if ids.empty?
+  def self.esearch(*args)
+    self.new.esearch(*args)
+  end
+  def self.efetch(*args)
+    self.new.efetch(*args)
+  end
-    host = "eutils.ncbi.nlm.nih.gov"
-    path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
+  def self.search(*args)
+    self.new.search(*args)
+  end
-    ids = ids.join(",")
+  def self.query(*args)
+    self.new.query(*args)
+  end
-    http = Bio::Command.new_http(host)
-    response, = http.get(path + ids)
-    result = response.body
-    result = result.split(/\n\n+/)
-    return result
+  def self.pmfetch(*args)
+    self.new.pmfetch(*args)
   end
 end # PubMed
@@ -194,18 +266,88 @@ end # Bio
 if __FILE__ == $0
-  puts Bio::PubMed.query("10592173")
-  puts "--- ---"
-  puts Bio::PubMed.pmfetch("10592173")
-  puts "--- ---"
-  Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+  puts "=== instance methods ==="
+  pubmed = Bio::PubMed.new
+  puts "--- Search PubMed by E-Utils ---"
+  opts = {"rettype" => "count"}
+  puts Time.now
+  puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
+  puts Time.now
+  puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
+  puts Time.now
+  puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
+  puts Time.now
+  pubmed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
+    puts x
+  end
+  puts "--- Retrieve PubMed entry by E-Utils ---"
+  puts Time.now
+  puts pubmed.efetch(16381885)
+  puts Time.now
+  puts pubmed.efetch("16381885")
+  puts Time.now
+  puts pubmed.efetch("16381885")
+  puts Time.now
+  opts = {"retmode" => "xml"}
+  puts pubmed.efetch([10592173, 14693808], opts)
+  puts Time.now
+  puts pubmed.efetch(["10592173", "14693808"], opts)
+  puts "--- Search PubMed by Entrez CGI ---"
+  pubmed.search("(genome AND analysis) OR bioinformatics").each do |x|
     p x
   end
-  puts "--- ---"
-  Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
+  puts "--- Retrieve PubMed entry by Entrez CGI ---"
+  puts pubmed.query("16381885")
+  puts "--- Retrieve PubMed entry by PMfetch ---"
+  puts pubmed.pmfetch("16381885")
+  puts "=== class methods ==="
+  puts "--- Search PubMed by E-Utils ---"
+  opts = {"rettype" => "count"}
+  puts Time.now
+  puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
+  puts Time.now
+  puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
+  puts Time.now
+  puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
+  puts Time.now
+  Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
+    puts x
+  end
+  puts "--- Retrieve PubMed entry by E-Utils ---"
+  puts Time.now
+  puts Bio::PubMed.efetch(16381885)
+  puts Time.now
+  puts Bio::PubMed.efetch("16381885")
+  puts Time.now
+  puts Bio::PubMed.efetch("16381885")
+  puts Time.now
+  opts = {"retmode" => "xml"}
+  puts Bio::PubMed.efetch([10592173, 14693808], opts)
+  puts Time.now
+  puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
+  puts "--- Search PubMed by Entrez CGI ---"
+  Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x|
     p x
   end
-  puts "--- ---"
-  puts Bio::PubMed.efetch("10592173", "14693808")
+  puts "--- Retrieve PubMed entry by Entrez CGI ---"
+  puts Bio::PubMed.query("16381885")
+  puts "--- Retrieve PubMed entry by PMfetch ---"
+  puts Bio::PubMed.pmfetch("16381885")
 end