RubyGems - bioinform - Versions diffs - 0.1.4 → 0.1.5 - Mend

bioinform 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

data/lib/bioinform/data_models/pcm.rb +8 -1
data/lib/bioinform/data_models/pm.rb +20 -15
data/lib/bioinform/data_models/ppm.rb +3 -1
data/lib/bioinform/data_models/pwm.rb +14 -2
data/lib/bioinform/parsers/parser.rb +67 -28
data/lib/bioinform/parsers/string_fantom_parser.rb +3 -9
data/lib/bioinform/parsers/string_parser.rb +64 -24
data/lib/bioinform/parsers/trivial_parser.rb +17 -0
data/lib/bioinform/parsers.rb +1 -0
data/lib/bioinform/support/advanced_scan.rb +8 -0
data/lib/bioinform/support/multiline_squish.rb +1 -1
data/lib/bioinform/support.rb +3 -1
data/lib/bioinform/version.rb +1 -1
data/spec/data_models/pcm_spec.rb +24 -6
data/spec/data_models/pm_spec.rb +15 -10
data/spec/data_models/ppm_spec.rb +8 -0
data/spec/parsers/parser_spec.rb +89 -0
data/spec/parsers/string_fantom_parser_spec.rb +16 -14
data/spec/parsers/string_parser_spec.rb +46 -0
data/spec/parsers/trivial_parser_spec.rb +22 -0
data/spec/spec_helper.rb +16 -10
data/spec/support/advanced_scan_spec.rb +32 -0
data/spec/support/multiline_squish_spec.rb +6 -0
metadata +10 -2

data/lib/bioinform/data_models/pcm.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 require 'bioinform/support'
 require 'bioinform/data_models/pm'
+require 'bioinform/data_models/ppm'
+require 'bioinform/data_models/pwm'
 module Bioinform
   class PCM < PM
     def count
@@ -12,7 +14,12 @@ module Bioinform
           Math.log((pos[ind] + probability[ind] * pseudocount) / (probability[ind]*(count + pseudocount)) )
         end
       end
-      PWM.new(mat)
+      PWM.new(matrix: mat, name: name)
+    end
+    def to_ppm
+      mat = each_position.map{|pos| pos.map{|el| el.to_f / count }}
+      PPM.new(matrix: mat, name: name)
     end
   end

data/lib/bioinform/data_models/pm.rb CHANGED Viewed

@@ -2,23 +2,23 @@ require 'bioinform/support'
 require 'bioinform/parsers'
 module Bioinform
-  IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}
-  LetterByIndex = {0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T'}
+  IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3}
+  LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
   class PM
     attr_reader :matrix
     attr_accessor :background, :name
-    def choose_parser(input)
-      input.is_a?(String) ? StringParser : Parser
-      [Parser, StringParser, StringFantomParser].find do |parser|
-        self.class.new(input, parser) rescue nil
+    def self.choose_parser(input)
+      [TrivialParser, Parser, StringParser, StringFantomParser].find do |parser|
+        self.new(input, parser) rescue nil
       end
     end
     def initialize(input, parser = nil)
-      parser ||= choose_parser(input)
-      result = parser.new(input).parse
+      parser ||= self.class.choose_parser(input)
+      raise 'No one parser can process input'  unless parser
+      result = parser.new(input).parse
       @matrix = result[:matrix]
       @name = result[:name]
       @background = [1, 1, 1, 1]
@@ -29,15 +29,20 @@ module Bioinform
       @matrix == other.matrix && @background == other.background
     end
-    def valid?
-      @matrix.is_a?(Array) &&
-      @matrix.all?(&:is_a?.(Array)) &&
-      @matrix.all?{|pos| pos.size == 4} &&
-      @matrix.all?(&:all?.(&:is_a?.(Numeric)))
+    def self.valid_matrix?(matrix)
+      matrix.is_a?(Array) &&
+      ! matrix.empty? &&
+      matrix.all?(&:is_a?.(Array)) &&
+      matrix.all?{|pos| pos.size == 4} &&
+      matrix.all?(&:all?.(&:is_a?.(Numeric)))
     rescue
       false
     end
+    def valid?
+      self.class.valid_matrix?(@matrix)
+    end
     def each_position
       if block_given?
         matrix.each{|pos| yield pos}
@@ -54,7 +59,7 @@ module Bioinform
     def to_s(with_name = true)
       matrix_str = each_position.map(&:join.("\t")).join("\n")
       if with_name && @name
-        "#{@name}\n#{matrix_str}"
+        @name + "\n" + matrix_str
       else
         matrix_str
       end

data/lib/bioinform/data_models/ppm.rb CHANGED Viewed

@@ -3,6 +3,8 @@ require 'bioinform/data_models/pm'
 module Bioinform
   class PPM < PM
+    def to_ppm
+      self
+    end
   end
 end

data/lib/bioinform/data_models/pwm.rb CHANGED Viewed

@@ -21,8 +21,20 @@ module Bioinform
     def score(word)
       word = word.upcase
       raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix'  unless word.length == length
-      raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters'  unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
-      word.each_char.map.with_index{|letter, pos| matrix[pos][IndexByLetter[letter]] }.inject(&:+)
+      #raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters'  unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
+      (0...length).map do |pos|
+        begin
+        # Need support of N-letters and other IUPAC
+          letter = word[pos]
+          matrix[pos][IndexByLetter[letter]]
+        rescue
+          raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters'
+        end
+      end.inject(&:+)
+    end
+    def to_pwm
+      self
     end
   end
 end

data/lib/bioinform/parsers/parser.rb CHANGED Viewed

@@ -1,40 +1,79 @@
 require 'bioinform/support'
+require 'bioinform/data_models/pm'
 module Bioinform
   class Parser
-    attr_reader :input, :matrix
+    attr_reader :input
-    def initialize(input)
-      @input = input
+    def initialize(*input)
+      if input.size == 1  # [ [1,2,3,4] ],  [  [[1,2,3,4],[5,6,7,8]] ]
+        if input.first.is_a?(Array) && input.first.all?{|el| el.is_a? Numeric}  # [ [1,2,3,4] ]
+          @input = input
+        else  # [  [[1,2,3,4],[5,6,7,8]] ]
+          @input = input.first
+        end
+      else #[ [1,2,3,4], [5,6,7,8] ], [   ]
+        @input = input
+      end
+    end
+    def parse!
+      matrix = self.class.transform_input(input)
+      raise 'Parsing error' unless self.class.valid_matrix?(matrix)
+      {matrix: matrix}
     end
     def parse
-      inp = input
-      transpose =  inp.is_a?(Hash)
-      inp = ClassMethods.try_convert_to_array(inp)
-      inp.map!{|x| ClassMethods.try_convert_to_array(x)}
-      transpose = true  if (not inp.all?{|x| x.size == 4}) && inp.size == 4 && inp.same_by?(&:size)
-      @matrix = transpose ? inp.transpose : inp
-      result
-    rescue
-      {}
-    end
-    def result(options={})
-      raise 'Parsing Error' unless matrix.is_a?(Array) && matrix.all?(&:is_a?.(Array)) && matrix.all?{|pos| pos.size == 4} && matrix.all?(&:all?.(&:is_a?.(Numeric)))
-      options.merge(matrix: @matrix)
-    end
-    class ClassMethods
-      def self.array_from_acgt_hash(hsh)
-        hsh = hsh.collect_hash{|key,value| [key.to_s.upcase, value] }
-        raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == %w[A C G T]
-        %w[A C G T].collect{|letter| hsh[letter] }
-      end
-      def self.try_convert_to_array(inp)
-        return inp  if inp.is_a? Array
-        array_from_acgt_hash(inp)
+      parse! rescue nil
+    end
+    def self.parse!(*input)
+      self.new(*input).parse!
+    end
+    def self.parse(*input)
+      self.new(*input).parse
+    end
+    def self.valid_matrix?(matrix)
+      PM.valid_matrix?(matrix)
+    end
+    # {A: 1, C: 2, G: 3, T: 4}  -->  [1,2,3,4]
+    # {A: [1,2], C: [3,4], G: [5,6], T: [7,8]}  --> [[1,3,5,7],[2,4,6,8]] ( == [[1,2], [3,4], [5,6], [7,8]].transpose)
+    def self.array_from_acgt_hash(hsh)
+      hsh = normalize_hash_keys(hsh)
+      raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == [:A,:C,:G,:T]
+      result = [:A,:C,:G,:T].collect{|letter| hsh[letter] }
+      result.all?{|el| el.is_a?(Array)} ? result.transpose : result
+    end
+    # {a: 1, C: 2, 'g' => 3, 'T' => 4} --> {A: 1, C: 2, G: 3, T: 4}
+    def self.normalize_hash_keys(hsh)
+      hsh.collect_hash{|key,value| [key.to_s.upcase.to_sym, value] }
+    end
+    # [[1,2,3,4], [2,3,4,5]] --> [[1,2,3,4], [2,3,4,5]]
+    # [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}] --> [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}]
+    # {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} --> [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
+    def self.try_convert_to_array(input)
+      case input
+      when Array then input
+      when Hash then array_from_acgt_hash(input)
+      else raise TypeError, 'input of Bioinform::Parser::array_from_acgt_hash should be Array or Hash'
       end
     end
+    def self.transform_input(input)
+      result = try_convert_to_array(input).map{|el| try_convert_to_array(el)}
+      need_tranpose?(result) ? result.transpose : result
+    end
+    # point whether matrix input positions(need not be transposed -- false) or letters(need -- true) as first index
+    # [[1,3,5,7], [2,4,6,8]] --> false
+    # [[1,2],[3,4],[5,6],[7,8]] --> true
+    def self.need_tranpose?(input)
+      (input.size == 4) && input.any?{|x| x.size != 4}
+    end
   end
 end

data/lib/bioinform/parsers/string_fantom_parser.rb CHANGED Viewed

@@ -3,18 +3,12 @@ require 'bioinform/parsers/string_parser'
 module Bioinform
   class StringFantomParser < StringParser
-    def row_pat
-      '[\w\d]+ ' + "(#{number_pat} )*#{number_pat}"
-    end
-    def name_pat
-      'NA (?<name>[\w.+:-]+)'
-    end
     def header_pat
-      "#{name_pat}\n" + '[\w\d]+ ' +"A C G T\n"
+      /NA (?<name>[\w.+:-]+)\n[\w\d]+ A C G T\n/
     end
-    def matrix_preprocess(matrix)
-      matrix.split("\n").map{|line| line.split[1..-1].map(&:to_f)}
+    def row_pat
+      /[\w\d]+ (?<row>(#{number_pat} )*#{number_pat})\n?/
     end
   end
 end

data/lib/bioinform/parsers/string_parser.rb CHANGED Viewed

@@ -1,45 +1,85 @@
+require 'strscan'
 require 'bioinform/support'
 require 'bioinform/parsers/parser'
 module Bioinform
   class StringParser < Parser
+    attr_reader :scanner
+    def initialize(input)
+      raise ArgumentError  unless input.is_a?(String)
+      super
+      @scanner = StringScanner.new(input.multiline_squish)
+    end
     def number_pat
-      '[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?'
+      /[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?/
     end
+    def header_pat
+      />?\s*(?<name>\S+)\n/
+    end
     def row_pat
-      "(#{number_pat} )*#{number_pat}"
+      /(?<row>(#{number_pat} )*#{number_pat})\n?/
     end
-    def name_pat
-      '(>\s*)?(?<name>\S+)'
+    def scan_row
+      match = scanner.advanced_scan(row_pat)
+      match && match[:row]
     end
-    def matrix_pat
-      "(?<matrix>(#{row_pat}\n)*#{row_pat})"
+    def split_row(row_string)
+      row_string.split.map(&:to_f)
     end
-    def header_pat
-      "(#{name_pat}\n)?"
+    def scan_any_spaces
+      scanner.scan(/\s+/)
     end
-    def pattern
-      /\A#{header_pat}#{matrix_pat}\z/
+    def parse_name
+      match = scanner.advanced_scan(header_pat)
+      match && match[:name]
     end
-    # when matrix is extracted from the string it should be transformed to a matrix of numerics
-    def matrix_preprocess(matrix)
-      matrix.split("\n").map{|line| line.split.map(&:to_f)}
+    def parse_matrix
+      matrix = []
+      while row_string = scan_row
+        matrix << split_row(row_string)
+      end
+      matrix
     end
-    def parse
-      case input
-      when String
-        match = input.multiline_squish.match(pattern)
-        raise ArgumentError  unless match
-        matrix = matrix_preprocess( match[:matrix] )
-        raise ArgumentError  unless matrix
-        Parser.new(matrix).parse.merge(name: match[:name])
+    def parse!
+      scan_any_spaces
+      name = parse_name
+      matrix = parse_matrix
+      Parser.parse!(matrix).merge(name: name)
+    end
+    def scanner_reset
+      scanner.reset
+    end
+    def each
+      if block_given?
+        scanner_reset
+        while result = parse
+          yield result
+        end
       else
-        raise ArgumentError
+        Enumerator.new(self, :each)
       end
-    rescue
-      {}
     end
+    include Enumerable
+    alias_method :split, :to_a
+    def self.split(input)
+      self.new(input).split
+    end
+    def self.split_on_motifs(input, pm_klass = PM)
+      split(input).map{|el| pm_klass.new(el)}
+    end
   end
 end

data/lib/bioinform/parsers/trivial_parser.rb ADDED Viewed

@@ -0,0 +1,17 @@
+require 'bioinform/support'
+require 'bioinform/parsers/parser'
+module Bioinform
+  # TrivialParser can be used to parse hashes returned by #parse method of other parsers:
+  # PM.new({matrix:[[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
+  # PM.new(StringParser.new("1 2 3 4\n5 6 7 8").parse)
+  # StringParser.new("First\n1 2 3 4\n5 6 7 8\nSecond\n0 0 0 0").map{|inp| PM.new(inp, TrivialParser)}
+  class TrivialParser < Parser
+    def initialize(input)
+      @input = input
+    end
+    def parse!
+      input
+    end
+  end
+end

data/lib/bioinform/parsers.rb CHANGED Viewed

@@ -1,3 +1,4 @@
 require 'bioinform/parsers/parser'
+require 'bioinform/parsers/trivial_parser'
 require 'bioinform/parsers/string_parser'
 require 'bioinform/parsers/string_fantom_parser'

data/lib/bioinform/support/advanced_scan.rb ADDED Viewed

@@ -0,0 +1,8 @@
+require 'strscan'
+class StringScanner
+  def advanced_scan(pat)
+    result = scan(pat)
+    result && result.match(pat)
+  end
+end

data/lib/bioinform/support/multiline_squish.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require 'active_support/core_ext/string/filters'
 class String
   def multiline_squish
-    split("\n").map(&:squish).drop_while(&:empty?).take_while{|line| !line.empty?}.join("\n")
+    split("\n").map(&:squish).join("\n").gsub(/\A\n+/,'').gsub(/\n+\z/,'')
   end
 end

data/lib/bioinform/support.rb CHANGED Viewed

@@ -14,4 +14,6 @@ require 'bioinform/support/deep_dup'
 require 'bioinform/support/partial_sums'
 require 'bioinform/support/array_zip'
-require 'bioinform/support/array_product'
+require 'bioinform/support/array_product'
+require 'bioinform/support/advanced_scan'

data/lib/bioinform/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Bioinform
-  VERSION = "0.1.4"
+  VERSION = "0.1.5"
 end

data/spec/data_models/pcm_spec.rb CHANGED Viewed

@@ -5,23 +5,41 @@ module Bioinform
   describe PCM do
     describe '#count' do
       it 'should be equal to sum of elements at position' do
-        PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).count.should == 7
-        PCM.new([[1, 2.3, 3.2, 1],[4.4, 1.1, 1, 2]]).count.should == 7.5
+        PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).count.should == 7
+        PCM.new([[1, 2.3, 3.2, 1],[4.4, 0.1, 1, 2]]).count.should == 7.5
       end
     end
     describe '#to_pwm' do
       it 'should return PWM' do
-        PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm.should be_kind_of(PWM)
+        PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm.should be_kind_of(PWM)
       end
       it 'should make transformation: el --> log( (el + p_i*pseudocount) / (p_i*(count + pseudocount)) )' do
-        PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(1).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.47, 0.118,0.486,-0.47],[0.754,-0.47,-0.47,0.118]]
-        PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(10).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.194, 0.057,0.258,-0.194],[0.425,-0.194,-0.194,0.057]]
+        PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(1).matrix.map{|line|line.map{|el| el.round(3)}}.should  == [[-0.47, 0.118, 0.486, -0.47],[0.754, -2.079, -0.47, 0.118]]
+        PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(10).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.194, 0.057, 0.258, -0.194],[0.425, -0.531, -0.194, 0.057]]
       end
       it 'should use default pseudocount equal to log(count)' do
-        PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm.should == PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(Math.log(7))
+        PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm.should == PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(Math.log(7))
+      end
+      it 'should preserve name' do
+        PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: nil).to_pwm.name.should be_nil
+        PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: 'Stub name').to_pwm.name.should == 'Stub name'
       end
     end
+    describe '#to_ppm' do
+      it 'should return PPM' do
+        PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_ppm.should be_kind_of(PPM)
+      end
+      it 'should make transformation el --> el / count' do
+        PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_ppm.should == PPM.new([[1.0/7, 2.0/7, 3.0/7, 1.0/7],[4.0/7, 0.0/7, 1.0/7, 2.0/7]])
+      end
+      it 'should preserve name' do
+        PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: nil).to_ppm.name.should be_nil
+        PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: 'Stub name').to_ppm.name.should == 'Stub name'
+      end
+    end
   end
 end

data/spec/data_models/pm_spec.rb CHANGED Viewed

@@ -3,18 +3,17 @@ require 'bioinform/data_models/pm'
 module Bioinform
   describe PM do
-    describe '#valid?' do
+    describe '::valid_matrix?' do
       it 'should be true iff an argument is an array of arrays of 4 numerics in a column' do
-        PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3,4],[1,4,5,6.5]]; self }.valid?.should be_true
-        PM.new([[0,0,0,0]]).instance_eval{@matrix = {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]}; self }.valid?.should be_false
-        PM.new([[0,0,0,0]]).instance_eval{@matrix = [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}]; self }.valid?.should be_false
-        PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3,4],[1,4,6.5]]; self }.valid?.should be_false
-        PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3],[1,4,6.5]]; self }.valid?.should be_false
-        PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,'3','4'],[1,'4','5',6.5]]; self }.valid?.should be_false
+        PM.valid_matrix?( [[1,2,3,4],[1,4,5,6.5]] ).should be_true
+        PM.valid_matrix?( {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]} ).should be_false
+        PM.valid_matrix?( [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}] ).should be_false
+        PM.valid_matrix?( [[1,2,3,4],[1,4,6.5]] ).should be_false
+        PM.valid_matrix?( [[1,2,3],[1,4,6.5]] ).should be_false
+        PM.valid_matrix?( [[1,2,'3','4'],[1,'4','5',6.5]] ).should be_false
       end
-    end
+    end
     describe '#to_s' do
       before :each do
@@ -249,6 +248,12 @@ module Bioinform
         @pm.best_suffix(2).should == (-1.0)
         @pm.best_suffix(3).should == (0.0)
       end
+      it 'should give right results after left(right)_augment, discrete, reverse_complement etc' do
+        pm = PM.new([[1, 2, 3, 4], [10,10.5,11,11.5]])
+        pm.best_suffix(1).should == 11.5
+        pm.left_augment!(1)
+        pm.best_suffix(1).should == 15.5
+      end
     end
     describe '#worst_suffix' do
       it 'should return minimal score of suffices from i-th position inclusively i.e. [i..end]' do

data/spec/data_models/ppm_spec.rb ADDED Viewed

@@ -0,0 +1,8 @@
+require 'spec_helper'
+require 'bioinform/data_models/pcm'
+module Bioinform
+  describe PPM do
+  end
+end

data/spec/parsers/parser_spec.rb CHANGED Viewed

@@ -3,6 +3,86 @@ require 'bioinform/parsers/parser'
 module Bioinform
   describe Parser do
+    context '#initialize' do
+      it 'should accept an array correctly' do
+        Parser.new([[1,2,3,4],[5,6,7,8]]).parse[:matrix].should == [[1,2,3,4],[5,6,7,8]]
+      end
+      it 'should treat several arguments as an array composed of them' do
+        Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
+      end
+      it 'should treat one Array of numbers as an Array(with 1 element) of Arrays' do
+        Parser.new([1,2,3,4]).parse.should == Parser.new([[1,2,3,4]]).parse
+      end
+    end
+    context '::parse!' do
+      it 'should behave like Parser.new(input).parse!' do
+        Parser.parse!([1,2,3,4],[5,6,7,8]).should  == Parser.new([1,2,3,4],[5,6,7,8]).parse!
+        expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
+      end
+    end
+    context '::parse' do
+      it 'should behave like Parser.new(input).parse!' do
+        Parser.parse([1,2,3,4],[5,6,7,8]).should  == Parser.new([1,2,3,4],[5,6,7,8]).parse
+        Parser.parse([1,2,3],[4,5,6]).should be_nil
+      end
+    end
+    context '::normalize_hash_keys' do
+      it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
+        Parser.normalize_hash_keys( {a: 1, C: 2, 'g' => 3, 'T' => 4} ).should == {A: 1, C: 2, G: 3, T: 4}
+      end
+    end
+    context '::need_transpose?' do
+      it 'should point whether matrix have positions(need not be transposed -- false) or letters(true) as first index' do
+        Parser.need_tranpose?([[1,3,5,7], [2,4,6,8]]).should be_false
+        Parser.need_tranpose?([[1,2],[3,4],[5,6],[7,8]]).should be_true
+      end
+    end
+    context '::array_from_acgt_hash' do
+      it 'should convert hash of arrays to a transposed array of arrays' do
+        input = {A: [1,2,3], C: [2,3,4], G: [3,4,5], T: [4,5,6]}
+        Parser.array_from_acgt_hash(input).should == [[1,2,3], [2,3,4], [3,4,5], [4,5,6]].transpose
+      end
+      it 'should convert hash of numbers to an array of numbers' do
+        input = {A: 1, C: 2, G: 3, T: 4}
+        Parser.array_from_acgt_hash(input).should == [1,2,3,4]
+      end
+      it 'should process both symbolic and string keys, in both upcase and downcase' do
+        input_normal_keys = {A: 1, C: 2, G: 3, T: 4}
+        input_different_keys = {:A => 1, :c => 2, 'g' => 3, 'T' => 4}
+        Parser.array_from_acgt_hash(input_different_keys).should == Parser.array_from_acgt_hash(input_normal_keys)
+      end
+    end
+    context '::try_convert_to_array' do
+      it 'should not change array' do
+        inputs = []
+        inputs << [[1,2,3,4], [2,3,4,5], [3,4,5,6]]
+        inputs << [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}, {A:3, C:4, G:5, T:6}]
+        inputs.each do |input|
+          Parser.try_convert_to_array( input ).should == input
+        end
+      end
+      it 'should convert ACGT-Hashes to an array of positions (not letters)' do
+        Parser.try_convert_to_array( {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} ).should == [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
+      end
+    end
+    context '#parse' do
+      it 'should give the same result as #parse!' do
+        parser = Parser.new('stub parser')
+        parser.stub(:parse!).and_return('stub result')
+        parser.parse.should == 'stub result'
+      end
+      it 'should return nil if #parse! raised an exception' do
+        parser = Parser.new('stub parser')
+        parser.stub(:parse!).and_raise
+        parser.parse.should be_nil
+      end
+    end
     good_cases = {
       'Array Nx4' => {input: [[0,1,2,3],[10,11,12,13]],
                         matrix: [[0,1,2,3],[10,11,12,13]] },
@@ -30,6 +110,10 @@ module Bioinform
     }
     bad_cases = {
+      'Nil object on input' => {input: nil},
+      'Empty array on input' => {input: []},
       'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
       'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
@@ -54,5 +138,10 @@ module Bioinform
     }
     parser_specs(Parser, good_cases, bad_cases)
+    context '#parser!' do
+      it "should raise an exception on parsing empty list to parser" do
+        expect{ Parser.new().parse! }.to raise_error
+      end
+    end
   end
 end

data/spec/parsers/string_fantom_parser_spec.rb CHANGED Viewed

@@ -5,20 +5,22 @@ module Bioinform
   describe StringFantomParser do
     good_cases = {
       'string in Fantom-format' => {input: "
-          NA  motif_CTNCAG
-          P0	A	C	G	T
-          P1	0	1878368	0	0
-          P2	0	0	0	1878368
-          P3	469592	469592	469592	469592
-          P4	0	1878368	0	0
-          P5	1878368	0	0	0
-          P6	0	0	1878368	0",
-          matrix: [[0.0, 1878368.0, 0.0, 0.0],
-                    [0.0, 0.0, 0.0, 1878368.0],
-                    [469592.0, 469592.0, 469592.0, 469592.0],
-                    [0.0, 1878368.0, 0.0, 0.0],
-                    [1878368.0, 0.0, 0.0, 0.0],
-                    [0.0, 0.0, 1878368.0, 0.0]] }
+        NA  motif_CTNCAG
+        P0	A	C	G	T
+        P1	0	1878368	0	0
+        P2	0	0	0	1878368
+        P3	469592	469592	469592	469592
+        P4	0	1878368	0	0
+        P5	1878368	0	0	0
+        P6	0	0	1878368	0",
+        matrix: [ [0.0, 1878368.0, 0.0, 0.0],
+                  [0.0, 0.0, 0.0, 1878368.0],
+                  [469592.0, 469592.0, 469592.0, 469592.0],
+                  [0.0, 1878368.0, 0.0, 0.0],
+                  [1878368.0, 0.0, 0.0, 0.0],
+                  [0.0, 0.0, 1878368.0, 0.0]],
+        name: 'motif_CTNCAG'
+      }
     }
     bad_cases = { }

data/spec/parsers/string_parser_spec.rb CHANGED Viewed

@@ -3,6 +3,52 @@ require 'bioinform/parsers/string_parser'
 module Bioinform
   describe StringParser do
+    describe '#each' do
+      it 'should yield consequent results of #parse! while it returns result' do
+        parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
+        expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
+      end
+      it 'should restart parser from the beginning each time' do
+        parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
+        3.times do
+          expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
+        end
+      end
+    end
+    context '::split' do
+      it 'should be able to get a single PM' do
+        StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12").should == [ {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name:nil} ]
+      end
+      it 'should be able to split several PMs separated with an empty line' do
+        StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:nil} ]
+      end
+      it 'should be able to split several PMs separated with name' do
+        StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
+        StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8\n\n\n").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
+      end
+    end
+    context '::split_on_motifs' do
+      it 'should be able to split string into PMs' do
+        result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
+        result.map{|pm| pm.matrix}.should == [  [[1,2,3,4],[5,6,7,8],[9,10,11,12]], [[9,10,11,12],[1,2,3,4],[5,6,7,8]]  ]
+        result.map{|pm| pm.name}.should == [nil, 'Name']
+      end
+      it 'should create PMs by default' do
+        result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
+        result.each{|pm| pm.class.should == PM}
+      end
+      it 'should create PM subclass when it\'s specified' do
+        result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8", PWM)
+        result.each{|pm| pm.class.should == PWM}
+      end
+    end
     good_cases = {
       'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
                       matrix: [[1,2,3,4],[5,6,7,8]] },

data/spec/parsers/trivial_parser_spec.rb ADDED Viewed

@@ -0,0 +1,22 @@
+require 'spec_helper'
+require 'bioinform/parsers/parser'
+module Bioinform
+  describe TrivialParser do
+    context '#initialize' do
+      it 'should take the only input argument' do
+        TrivialParser.instance_method(:initialize).arity.should == 1
+      end
+    end
+    context '#parser!' do
+      it 'should return input of that was passed to initialize' do
+        TrivialParser.new('stub input').parse!.should == 'stub input'
+      end
+    end
+    it 'can be used to create PM with {matrix: ..., name: ...} form' do
+      pm = PM.new({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
+      pm.matrix.should == [[1,2,3,4],[5,6,7,8]]
+      pm.name.should == 'Name'
+    end
+  end
+end

data/spec/spec_helper.rb CHANGED Viewed

@@ -4,17 +4,23 @@ $LOAD_PATH.unshift File.dirname(__FILE__)
 require 'rspec'
 def parser_specs(parser_klass, good_cases, bad_cases)
-  good_cases.each do |case_description, input_and_result|
-    it "should be able to parse #{case_description}" do
-      result = parser_klass.new(input_and_result[:input]).parse
-      result[:matrix].should == input_and_result[:matrix]
-      result[:name].should == input_and_result[:name] if input_and_result.has_key?(:name)
+  context '#parse!' do
+    good_cases.each do |case_description, input_and_result|
+      it "should be able to parse #{case_description}" do
+        result = parser_klass.new(input_and_result[:input]).parse
+        result[:matrix].should == input_and_result[:matrix]
+        if input_and_result.has_key?(:name)
+          result[:name].should == input_and_result[:name]
+        else
+          result[:name].should be_nil
+        end
+      end
     end
-  end
-  bad_cases.each do |case_description, input|
-    it "should fail silently returning {} on parsing #{case_description}" do
-      parser_klass.new(input[:input]).parse.should == {}
+    bad_cases.each do |case_description, input|
+      it "should raise an exception on parsing #{case_description}" do
+        expect{ parser_klass.new(input[:input]).parse! }.to raise_error
+      end
     end
   end
 end

data/spec/support/advanced_scan_spec.rb ADDED Viewed

@@ -0,0 +1,32 @@
+require 'spec_helper'
+require 'bioinform/support/advanced_scan'
+describe StringScanner do
+  context '#advanced_scan' do
+    before do
+      @scanner = StringScanner.new('abcde  fghIJKLmnop')
+    end
+    it 'should return nil if text doesn\'t match. Pointer should not move' do
+      @scanner.advanced_scan(/\s\s\s/).should be_nil
+      @scanner.pos.should == 0
+    end
+    it 'should return MatchData if string Matches. Pointer should move' do
+      @scanner.advanced_scan(/\w\w\w/).should be_kind_of MatchData
+      @scanner.pos.should == 3
+    end
+    it 'should return have the same groups as regexp has' do
+      result = @scanner.advanced_scan(/(\w+)(\s+)([a-z]+)([A-Z]+)/)
+      result[0].should == 'abcde  fghIJKL'
+      result[1].should == 'abcde'
+      result[2].should == '  '
+      result[3].should == 'fgh'
+      result[4].should == 'IJKL'
+    end
+    it 'should return have the same named groups as regexp has' do
+      result = @scanner.advanced_scan(/(\w+)(\s+)(?<word_downcase>[a-z]+)(?<word_upcase>[A-Z]+)/)
+      result[0].should == 'abcde  fghIJKL'
+      result[:word_downcase].should == 'fgh'
+      result[:word_upcase].should == 'IJKL'
+    end
+  end
+end

data/spec/support/multiline_squish_spec.rb CHANGED Viewed

@@ -15,5 +15,11 @@ describe String do
     it 'should preserve rows pagination' do
       "abc def ghi\njk lmn".multiline_squish.should == "abc def ghi\njk lmn"
     end
+    it 'should preserve empty lines in the middle of text' do
+      "abc def\n\nghi\n \t  \njk lmn \n\n\n zzz".multiline_squish.should == "abc def\n\nghi\n\njk lmn\n\n\nzzz"
+    end
+    it 'should drop empty lines at begin and at end of string' do
+      "\n  \t\n\nabc def\n\nghi\n \t  \njk lmn \n\n\n zzz\n\n \t  \n".multiline_squish.should == "abc def\n\nghi\n\njk lmn\n\n\nzzz"
+    end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: bioinform
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.1.5
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-07-31 00:00:00.000000000 Z
+date: 2012-09-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activesupport
@@ -67,7 +67,9 @@ files:
 - lib/bioinform/parsers/parser.rb
 - lib/bioinform/parsers/string_fantom_parser.rb
 - lib/bioinform/parsers/string_parser.rb
+- lib/bioinform/parsers/trivial_parser.rb
 - lib/bioinform/support.rb
+- lib/bioinform/support/advanced_scan.rb
 - lib/bioinform/support/array_product.rb
 - lib/bioinform/support/array_zip.rb
 - lib/bioinform/support/callable_symbol.rb
@@ -83,11 +85,14 @@ files:
 - lib/bioinform/version.rb
 - spec/data_models/pcm_spec.rb
 - spec/data_models/pm_spec.rb
+- spec/data_models/ppm_spec.rb
 - spec/data_models/pwm_spec.rb
 - spec/parsers/parser_spec.rb
 - spec/parsers/string_fantom_parser_spec.rb
 - spec/parsers/string_parser_spec.rb
+- spec/parsers/trivial_parser_spec.rb
 - spec/spec_helper.rb
+- spec/support/advanced_scan_spec.rb
 - spec/support/array_product_spec.rb
 - spec/support/array_zip_spec.rb
 - spec/support/callable_symbol_spec.rb
@@ -128,11 +133,14 @@ summary: Classes for work with different input formats of positional matrices an
 test_files:
 - spec/data_models/pcm_spec.rb
 - spec/data_models/pm_spec.rb
+- spec/data_models/ppm_spec.rb
 - spec/data_models/pwm_spec.rb
 - spec/parsers/parser_spec.rb
 - spec/parsers/string_fantom_parser_spec.rb
 - spec/parsers/string_parser_spec.rb
+- spec/parsers/trivial_parser_spec.rb
 - spec/spec_helper.rb
+- spec/support/advanced_scan_spec.rb
 - spec/support/array_product_spec.rb
 - spec/support/array_zip_spec.rb
 - spec/support/callable_symbol_spec.rb