RubyGems - bio - Versions diffs - 1.4.1 → 1.4.2 - Mend

bio 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

data/ChangeLog +954 -0
data/KNOWN_ISSUES.rdoc +40 -5
data/README.rdoc +36 -35
data/RELEASE_NOTES.rdoc +87 -59
data/bioruby.gemspec +24 -2
data/doc/RELEASE_NOTES-1.4.1.rdoc +104 -0
data/doc/Tutorial.rd +162 -200
data/doc/Tutorial.rd.html +149 -146
data/lib/bio.rb +1 -0
data/lib/bio/appl/blast.rb +1 -1
data/lib/bio/appl/blast/ddbj.rb +26 -34
data/lib/bio/appl/blast/genomenet.rb +21 -11
data/lib/bio/db/embl/sptr.rb +193 -21
data/lib/bio/db/fasta.rb +1 -1
data/lib/bio/db/fastq.rb +14 -0
data/lib/bio/db/fastq/format_fastq.rb +2 -2
data/lib/bio/db/genbank/ddbj.rb +1 -2
data/lib/bio/db/genbank/format_genbank.rb +1 -1
data/lib/bio/db/medline.rb +1 -0
data/lib/bio/db/newick.rb +3 -1
data/lib/bio/db/pdb/pdb.rb +9 -9
data/lib/bio/db/pdb/residue.rb +2 -2
data/lib/bio/io/ddbjrest.rb +344 -0
data/lib/bio/io/ncbirest.rb +121 -1
data/lib/bio/location.rb +2 -2
data/lib/bio/reference.rb +3 -4
data/lib/bio/shell/plugin/entry.rb +7 -3
data/lib/bio/shell/plugin/ncbirest.rb +5 -1
data/lib/bio/util/restriction_enzyme.rb +3 -0
data/lib/bio/util/restriction_enzyme/dense_int_array.rb +195 -0
data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +7 -7
data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +57 -18
data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +2 -2
data/lib/bio/util/restriction_enzyme/sorted_num_array.rb +219 -0
data/lib/bio/version.rb +1 -1
data/sample/test_restriction_enzyme_long.rb +4403 -0
data/test/data/fasta/EFTU_BACSU.fasta +8 -0
data/test/data/genbank/CAA35997.gp +48 -0
data/test/data/genbank/SCU49845.gb +167 -0
data/test/data/litdb/1717226.litdb +13 -0
data/test/data/pir/CRAB_ANAPL.pir +6 -0
data/test/functional/bio/appl/blast/test_remote.rb +93 -0
data/test/functional/bio/appl/test_blast.rb +61 -0
data/test/functional/bio/io/test_ddbjrest.rb +47 -0
data/test/functional/bio/test_command.rb +3 -3
data/test/unit/bio/db/embl/test_sptr.rb +6 -6
data/test/unit/bio/db/embl/test_uniprot_new_part.rb +208 -0
data/test/unit/bio/db/genbank/test_common.rb +274 -0
data/test/unit/bio/db/genbank/test_genbank.rb +401 -0
data/test/unit/bio/db/genbank/test_genpept.rb +81 -0
data/test/unit/bio/db/pdb/test_pdb.rb +3287 -11
data/test/unit/bio/db/test_fasta.rb +34 -12
data/test/unit/bio/db/test_fastq.rb +26 -0
data/test/unit/bio/db/test_litdb.rb +95 -0
data/test/unit/bio/db/test_medline.rb +1 -0
data/test/unit/bio/db/test_nbrf.rb +82 -0
data/test/unit/bio/db/test_newick.rb +22 -4
data/test/unit/bio/test_reference.rb +35 -0
data/test/unit/bio/util/restriction_enzyme/test_dense_int_array.rb +201 -0
data/test/unit/bio/util/restriction_enzyme/test_sorted_num_array.rb +281 -0
metadata +44 -38

data/lib/bio/io/ncbirest.rb CHANGED

@@ -127,6 +127,7 @@ class REST
   def ncbi_post_form(serv, opts)
     ncbi_check_parameters(opts)
     ncbi_access_wait
+    #$stderr.puts opts.inspect
     response = Bio::Command.post_form(serv, opts)
     response
   end
@@ -485,7 +486,7 @@ class REST
       #  nucleotide = nuccore + nucest + nucgss
       #
       # format (rettype):
-      # * native       all but Gene    Default format for viewing sequences
+      # * native       all but Gene    ASN Default format for viewing sequences
       # * fasta        all sequence    FASTA view of a sequence
       # * gb           NA sequence     GenBank view for sequences
       # * gbc          NA sequence     INSDSeq structured flat file
@@ -540,6 +541,125 @@ class REST
         Bio::NCBI::REST.efetch(ids, opts)
       end
+      # Retrieve nucleotide sequence entries by given IDs using E-Utils
+      # (efetch).
+      #
+      # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchseq_help.html
+      #  nucleotide = nuccore + nucest + nucgss
+      #
+      # format (rettype):
+      # * native       all but Gene    ASN Default format for viewing sequences
+      # * fasta        all sequence    FASTA view of a sequence
+      # * gb           NA sequence     GenBank view for sequences
+      # * gbc          NA sequence     INSDSeq structured flat file
+      # * gbwithparts  NA sequence     GenBank CON division with sequences
+      # * est          dbEST sequence  EST Report
+      # * gss          dbGSS sequence  GSS Report
+      # * gp           AA sequence     GenPept view
+      # * gpc          AA sequence     INSDSeq structured flat file
+      # * seqid        all sequence    Convert GIs into seqids
+      # * acc          all sequence    Convert GIs into accessions
+      # * chr          dbSNP only      SNP Chromosome Report
+      # * flt          dbSNP only      SNP Flat File report
+      # * rsr          dbSNP only      SNP RS Cluster report
+      # * brief        dbSNP only      SNP ID list
+      # * docset       dbSNP only      SNP RS summary
+      #
+      # == Usage
+      #
+      #  Bio::NCBI::REST::EFetch.nucleotide("123,U12345,U12345.1,gb|U12345|")
+      #
+      #  list = [123, "U12345.1", "gb|U12345|"]
+      #  Bio::NCBI::REST::EFetch.nucleotide(list)
+      #  Bio::NCBI::REST::EFetch.nucleotide(list, "fasta")
+      #  Bio::NCBI::REST::EFetch.nucleotide(list, "acc")
+      #  Bio::NCBI::REST::EFetch.nucleotide(list, "xml")
+      #
+      #  Bio::NCBI::REST::EFetch.nucleotide("AE009950")
+      #  Bio::NCBI::REST::EFetch.nucleotide("AE009950", "gbwithparts")
+      #
+      #  ncbi = Bio::NCBI::REST::EFetch.new
+      #  ncbi.nucleotide("123,U12345,U12345.1,gb|U12345|")
+      #  ncbi.nucleotide(list)
+      #  ncbi.nucleotide(list, "fasta")
+      #  ncbi.nucleotide(list, "acc")
+      #  ncbi.nucleotide(list, "xml")
+      #  ncbi.nucleotide("AE009950")
+      #  ncbi.nucleotide("AE009950", "gbwithparts")
+      #
+      # ---
+      #
+      # *Arguments*:
+      # * _ids_: list of NCBI entry IDs (required)
+      # * _format_: "gb", "gbc", "fasta", "acc", "xml" etc.
+      # *Returns*:: String
+      def nucleotide(ids, format = "gb", hash = {})
+        case format
+        when "xml"
+          format = "gbc"
+        end
+        opts = { "db" => "nucleotide", "rettype" => format }
+        opts.update(hash)
+        Bio::NCBI::REST.efetch(ids, opts)
+      end
+      # Retrieve protein sequence entries by given IDs using E-Utils
+      # (efetch).
+      #
+      # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchseq_help.html
+      #  protein
+      #
+      # format (rettype):
+      # * native       all but Gene    ASN Default format for viewing sequences
+      # * fasta        all sequence    FASTA view of a sequence
+      # * gb           NA sequence     GenBank view for sequences
+      # * gbc          NA sequence     INSDSeq structured flat file
+      # * gbwithparts  NA sequence     GenBank CON division with sequences
+      # * est          dbEST sequence  EST Report
+      # * gss          dbGSS sequence  GSS Report
+      # * gp           AA sequence     GenPept view
+      # * gpc          AA sequence     INSDSeq structured flat file
+      # * seqid        all sequence    Convert GIs into seqids
+      # * acc          all sequence    Convert GIs into accessions
+      # * chr          dbSNP only      SNP Chromosome Report
+      # * flt          dbSNP only      SNP Flat File report
+      # * rsr          dbSNP only      SNP RS Cluster report
+      # * brief        dbSNP only      SNP ID list
+      # * docset       dbSNP only      SNP RS summary
+      #
+      # == Usage
+      #
+      #  Bio::NCBI::REST::EFetch.protein("7527480,AAF63163.1,AAF63163")
+      #
+      #  list = [ 7527480, "AAF63163.1", "AAF63163"]
+      #  Bio::NCBI::REST::EFetch.protein(list)
+      #  Bio::NCBI::REST::EFetch.protein(list, "fasta")
+      #  Bio::NCBI::REST::EFetch.protein(list, "acc")
+      #  Bio::NCBI::REST::EFetch.protein(list, "xml")
+      #
+      #  ncbi = Bio::NCBI::REST::EFetch.new
+      #  ncbi.protein("7527480,AAF63163.1,AAF63163")
+      #  ncbi.protein(list)
+      #  ncbi.protein(list, "fasta")
+      #  ncbi.protein(list, "acc")
+      #  ncbi.protein(list, "xml")
+      #
+      # ---
+      #
+      # *Arguments*:
+      # * _ids_: list of NCBI entry IDs (required)
+      # * _format_: "gp", "gpc", "fasta", "acc", "xml" etc.
+      # *Returns*:: String
+      def protein(ids, format = "gp", hash = {})
+        case format
+        when "xml"
+          format = "gpc"
+        end
+        opts = { "db" => "protein", "rettype" => format }
+        opts.update(hash)
+        Bio::NCBI::REST.efetch(ids, opts)
+      end
       # Retrieve PubMed entries by given IDs using E-Utils (efetch).
       #
       # * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.html

data/lib/bio/location.rb CHANGED

@@ -632,8 +632,8 @@ class Locations
         end
       end
-      join_list.each do |position|
-        ary << gbl_pos2loc(position)
+      join_list.each do |pos|
+        ary << gbl_pos2loc(pos)
       end
     when /^complement\((.*)\)$/				# (J) complement()

data/lib/bio/reference.rb CHANGED

@@ -272,7 +272,7 @@ module Bio
       lines << "%N #{@issue}" unless @issue.to_s.empty?
       lines << "%P #{@pages}" unless @pages.empty?
       lines << "%M #{@pubmed}" unless @pubmed.to_s.empty?
-      u = @url.empty? ? pubmed_url : @url
+      u = @url.to_s.empty? ? pubmed_url : @url
       lines << "%U #{u}" unless u.empty?
       lines << "%X #{@abstract}" unless @abstract.empty?
       @mesh.each do |term|
@@ -587,9 +587,8 @@ module Bio
     # *Returns*:: String
     def pubmed_url
       unless @pubmed.to_s.empty?
-        cgi = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
-        opts = "cmd=Retrieve&db=PubMed&dopt=Citation&list_uids"
-        return "#{cgi}?#{opts}=#{@pubmed}"
+        head = "http://www.ncbi.nlm.nih.gov/pubmed"
+        return "#{head}/#{@pubmed}"
       end
       ''
     end

data/lib/bio/shell/plugin/entry.rb CHANGED

@@ -62,7 +62,7 @@ module Bio::Shell
   #   * "db:entry"  -- local BioFlat, OBDA, EMBOSS, KEGG API
   def getent(arg)
     entry = ""
-    db, entry_id = arg.to_s.strip.split(/:/)
+    db, entry_id = arg.to_s.strip.split(/\:/, 2)
     # local file
     if arg.respond_to?(:gets) or File.exists?(arg)
@@ -81,8 +81,12 @@ module Bio::Shell
     else
       # EMBOSS USA in ~/.embossrc
-      str = entret(arg)
-      if $?.exitstatus == 0 and str.length != 0
+      begin
+        str = entret(arg)
+      rescue SystemCallError
+        str = ''
+      end
+      if $? and $?.exitstatus == 0 and str.length != 0
         puts "Retrieving entry from EMBOSS (#{arg})"
         entry = str

data/lib/bio/shell/plugin/ncbirest.rb CHANGED

@@ -31,7 +31,11 @@ module Bio::Shell
   # Otherwise, it acts the same as Bio::NCBI::REST.efetch.
   def efetch(ids, *arg)
     if arg.empty? then
-      Bio::NCBI::REST::EFetch.sequence(ids)
+      ret = Bio::NCBI::REST::EFetch.nucleotide(ids)
+      unless /^LOCUS       / =~ ret.to_s then
+        ret = Bio::NCBI::REST::EFetch.protein(ids)
+      end
+      ret
     elsif arg[0].kind_of?(Symbol)
       meth = arg[0]
       case meth.to_s

data/lib/bio/util/restriction_enzyme.rb CHANGED

@@ -125,6 +125,9 @@ class RestrictionEnzyme
   autoload :Analysis,                'bio/util/restriction_enzyme/analysis'
   autoload :Range,                   'bio/util/restriction_enzyme/range/sequence_range'
+  autoload :SortedNumArray,          'bio/util/restriction_enzyme/sorted_num_array'
+  autoload :DenseIntArray,          'bio/util/restriction_enzyme/dense_int_array'
   include CutSymbol
   extend CutSymbol

data/lib/bio/util/restriction_enzyme/dense_int_array.rb ADDED

@@ -0,0 +1,195 @@
+#
+# bio/util/restriction_enzyme/dense_int_array.rb - Internal data storage for Bio::RestrictionEnzyme::Range::SequenceRange
+#
+# Copyright::   Copyright (C) 2011
+#               Naohisa Goto <ng@bioruby.org>
+#               Tomoaki NISHIYAMA
+# License::     The Ruby License
+#
+module Bio
+class RestrictionEnzyme
+  # a class to store integer numbers, containing many contiguous
+  # integral numbers.
+  #
+  # Bio::RestrictionEnzyme internal use only.
+  # Please do not create the instance outside Bio::RestrictionEnzyme.
+  class DenseIntArray
+    MutableRange = Struct.new(:first, :last)
+    include Enumerable
+    # Same usage as Array.[]
+    def self.[](*args)
+      a = self.new
+      args.each do |elem|
+        a.push elem
+      end
+      a
+    end
+    # creates a new object
+    def initialize
+      @data = []
+    end
+    # initialize copy
+    def initialize_copy(other)
+      super(other)
+      @data = @data.collect { |elem| elem.dup }
+    end
+    # sets internal data object
+    def internal_data=(a)
+      #clear_cache
+      @data = a
+      self
+    end
+    protected :internal_data=
+    # gets internal data object
+    def internal_data
+      @data
+    end
+    protected :internal_data
+    # Same usage as Array#[]
+    def [](*arg)
+      #$stderr.puts "SortedIntArray#[]"
+      to_a[*arg]
+    end
+    # Not implemented
+    def []=(*arg)
+      raise NotImplementedError, 'DenseIntArray#[]= is not implemented.'
+    end
+    # Same usage as Array#each
+    def each
+      @data.each do |elem|
+        elem.first.upto(elem.last) { |num| yield num }
+      end
+      self
+    end
+    # Same usage as Array#reverse_each
+    def reverse_each
+      @data.reverse_each do |elem|
+        elem.last.downto(elem.first) { |num| yield num }
+      end
+      self
+    end
+    # Same usage as Array#+, but accepts only the same classes instance.
+    def +(other)
+      unless other.is_a?(self.class) then
+        raise TypeError, 'unsupported data type'
+      end
+      tmpdata = @data + other.internal_data
+      tmpdata.sort! { |a,b| a.first <=> b.first }
+      result = self.class.new
+      return result if tmpdata.empty?
+      newdata = result.internal_data
+      newdata.push tmpdata[0].dup
+      (1...(tmpdata.size)).each do |i|
+        if (x = newdata[-1].last) >= tmpdata[i].first then
+          newdata[-1].last = tmpdata[i].last if tmpdata[i].last > x
+        else
+          newdata.push tmpdata[i].dup
+        end
+      end
+      result
+    end
+    # Same usage as Array#==
+    def ==(other)
+      if r = super(other) then
+        r
+      elsif other.is_a?(self.class) then
+        other.internal_data == @data
+      else
+        false
+      end
+    end
+    # Same usage as Array#concat
+    def concat(ary)
+      ary.each { |elem| self.<<(elem) }
+      self
+    end
+    # Same usage as Array#push
+    def push(*args)
+      args.each do |elem|
+        self.<<(elem)
+      end
+      self
+    end
+    # Same usage as Array#unshift
+    def unshift(*arg)
+      raise NotImplementedError, 'DenseIntArray#unshift is not implemented.'
+    end
+    # Same usage as Array#<<
+    def <<(elem)
+      if !@data.empty? and
+          @data[-1].last + 1 == elem then
+        @data[-1].last = elem
+      else
+        @data << MutableRange.new(elem, elem)
+      end
+      self
+    end
+    # Same usage as Array#include?
+    def include?(elem)
+      return false if @data.empty? or elem < self.first or self.last < elem
+      @data.any? do |range|
+        range.first <= elem && elem <= range.last
+      end
+    end
+    # Same usage as Array#first
+    def first
+      elem = @data.first
+      elem ? elem.first : nil
+    end
+    # Same usage as Array#last
+    def last
+      elem = @data.last
+      elem ? elem.last : nil
+    end
+    # Same usage as Array#size
+    def size
+      sum = 0
+      @data.each do |range|
+        sum += (range.last - range.first + 1)
+      end
+      sum
+    end
+    alias length size
+    # Same usage as Array#delete
+    def delete(elem)
+      raise NotImplementedError, 'DenseIntArray#delete is not implemented.'
+    end
+    # Does nothing
+    def sort!(&block)
+      # does nothing
+      self
+    end
+    # Does nothing
+    def uniq!
+      # does nothing
+      self
+    end
+  end #class DenseIntArray
+end #class RestrictionEnzyme
+end #module Bio

data/lib/bio/util/restriction_enzyme/range/sequence_range.rb CHANGED

@@ -5,7 +5,7 @@
 # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
 # License::   The Ruby License
 #
-#  $Id: sequence_range.rb,v 1.9 2007/07/16 19:28:48 k Exp $
+#  $Id:$
 #
 require 'bio/util/restriction_enzyme'
@@ -160,7 +160,7 @@ class SequenceRange
     @__fragments_current = true
     num_txt = '0123456789'
-    num_txt_repeat = (num_txt * ( @size / num_txt.size.to_f ).ceil)[0..@size-1]
+    num_txt_repeat = (num_txt * ( @size.div(num_txt.size) + 1))[0..@size-1]
     fragments = Fragments.new(num_txt_repeat, num_txt_repeat)
     cc = Bio::RestrictionEnzyme::Range::SequenceRange::CalculatedCuts.new(@size)
@@ -193,9 +193,9 @@ class SequenceRange
   # * +cc+: Bio::RestrictionEnzyme::Range::SequenceRange::CalculatedCuts
   # *Returns*:: +Hash+ Keys are unique, values are Bio::RestrictionEnzyme::Range::SequenceRange::Bin objects filled with indexes of the sequence locations they represent.
   def create_bins(cc)
-    p_cut = cc.vc_primary
-    c_cut = cc.vc_complement
-    h_cut = cc.hc_between_strands
+    p_cut = cc.vc_primary_as_original_class
+    c_cut = cc.vc_complement_as_original_class
+    h_cut = cc.hc_between_strands_as_original_class
     if @circular
       # NOTE
@@ -247,8 +247,8 @@ class SequenceRange
   # initializing the bin.
   def setup_new_bin(bins, bin_id)
     bins[ bin_id ] = Bin.new
-    bins[ bin_id ].p = []
-    bins[ bin_id ].c = []
+    bins[ bin_id ].p = DenseIntArray[] #could be replaced by SortedNumArray[]
+    bins[ bin_id ].c = DenseIntArray[] #could be replaced by SortedNumArray[]
   end
 end # SequenceRange