bioinform 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  require 'bioinform/support'
2
2
  require 'bioinform/data_models/pm'
3
+ require 'bioinform/data_models/ppm'
4
+ require 'bioinform/data_models/pwm'
3
5
  module Bioinform
4
6
  class PCM < PM
5
7
  def count
@@ -12,7 +14,12 @@ module Bioinform
12
14
  Math.log((pos[ind] + probability[ind] * pseudocount) / (probability[ind]*(count + pseudocount)) )
13
15
  end
14
16
  end
15
- PWM.new(mat)
17
+ PWM.new(matrix: mat, name: name)
18
+ end
19
+
20
+ def to_ppm
21
+ mat = each_position.map{|pos| pos.map{|el| el.to_f / count }}
22
+ PPM.new(matrix: mat, name: name)
16
23
  end
17
24
 
18
25
  end
@@ -2,23 +2,23 @@ require 'bioinform/support'
2
2
  require 'bioinform/parsers'
3
3
 
4
4
  module Bioinform
5
- IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}
6
- LetterByIndex = {0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T'}
7
-
5
+ IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3}
6
+ LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
7
+
8
8
  class PM
9
9
  attr_reader :matrix
10
10
  attr_accessor :background, :name
11
11
 
12
- def choose_parser(input)
13
- input.is_a?(String) ? StringParser : Parser
14
- [Parser, StringParser, StringFantomParser].find do |parser|
15
- self.class.new(input, parser) rescue nil
12
+ def self.choose_parser(input)
13
+ [TrivialParser, Parser, StringParser, StringFantomParser].find do |parser|
14
+ self.new(input, parser) rescue nil
16
15
  end
17
16
  end
18
17
 
19
18
  def initialize(input, parser = nil)
20
- parser ||= choose_parser(input)
21
- result = parser.new(input).parse
19
+ parser ||= self.class.choose_parser(input)
20
+ raise 'No one parser can process input' unless parser
21
+ result = parser.new(input).parse
22
22
  @matrix = result[:matrix]
23
23
  @name = result[:name]
24
24
  @background = [1, 1, 1, 1]
@@ -29,15 +29,20 @@ module Bioinform
29
29
  @matrix == other.matrix && @background == other.background
30
30
  end
31
31
 
32
- def valid?
33
- @matrix.is_a?(Array) &&
34
- @matrix.all?(&:is_a?.(Array)) &&
35
- @matrix.all?{|pos| pos.size == 4} &&
36
- @matrix.all?(&:all?.(&:is_a?.(Numeric)))
32
+ def self.valid_matrix?(matrix)
33
+ matrix.is_a?(Array) &&
34
+ ! matrix.empty? &&
35
+ matrix.all?(&:is_a?.(Array)) &&
36
+ matrix.all?{|pos| pos.size == 4} &&
37
+ matrix.all?(&:all?.(&:is_a?.(Numeric)))
37
38
  rescue
38
39
  false
39
40
  end
40
41
 
42
+ def valid?
43
+ self.class.valid_matrix?(@matrix)
44
+ end
45
+
41
46
  def each_position
42
47
  if block_given?
43
48
  matrix.each{|pos| yield pos}
@@ -54,7 +59,7 @@ module Bioinform
54
59
  def to_s(with_name = true)
55
60
  matrix_str = each_position.map(&:join.("\t")).join("\n")
56
61
  if with_name && @name
57
- "#{@name}\n#{matrix_str}"
62
+ @name + "\n" + matrix_str
58
63
  else
59
64
  matrix_str
60
65
  end
@@ -3,6 +3,8 @@ require 'bioinform/data_models/pm'
3
3
 
4
4
  module Bioinform
5
5
  class PPM < PM
6
-
6
+ def to_ppm
7
+ self
8
+ end
7
9
  end
8
10
  end
@@ -21,8 +21,20 @@ module Bioinform
21
21
  def score(word)
22
22
  word = word.upcase
23
23
  raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
24
- raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
- word.each_char.map.with_index{|letter, pos| matrix[pos][IndexByLetter[letter]] }.inject(&:+)
24
+ #raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
+ (0...length).map do |pos|
26
+ begin
27
+ # Need support of N-letters and other IUPAC
28
+ letter = word[pos]
29
+ matrix[pos][IndexByLetter[letter]]
30
+ rescue
31
+ raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters'
32
+ end
33
+ end.inject(&:+)
34
+ end
35
+
36
+ def to_pwm
37
+ self
26
38
  end
27
39
  end
28
40
  end
@@ -1,40 +1,79 @@
1
1
  require 'bioinform/support'
2
+ require 'bioinform/data_models/pm'
2
3
 
3
4
  module Bioinform
4
5
  class Parser
5
- attr_reader :input, :matrix
6
+ attr_reader :input
6
7
 
7
- def initialize(input)
8
- @input = input
8
+ def initialize(*input)
9
+ if input.size == 1 # [ [1,2,3,4] ], [ [[1,2,3,4],[5,6,7,8]] ]
10
+ if input.first.is_a?(Array) && input.first.all?{|el| el.is_a? Numeric} # [ [1,2,3,4] ]
11
+ @input = input
12
+ else # [ [[1,2,3,4],[5,6,7,8]] ]
13
+ @input = input.first
14
+ end
15
+ else #[ [1,2,3,4], [5,6,7,8] ], [ ]
16
+ @input = input
17
+ end
18
+ end
19
+
20
+ def parse!
21
+ matrix = self.class.transform_input(input)
22
+ raise 'Parsing error' unless self.class.valid_matrix?(matrix)
23
+ {matrix: matrix}
9
24
  end
10
25
 
11
26
  def parse
12
- inp = input
13
- transpose = inp.is_a?(Hash)
14
- inp = ClassMethods.try_convert_to_array(inp)
15
- inp.map!{|x| ClassMethods.try_convert_to_array(x)}
16
- transpose = true if (not inp.all?{|x| x.size == 4}) && inp.size == 4 && inp.same_by?(&:size)
17
- @matrix = transpose ? inp.transpose : inp
18
- result
19
- rescue
20
- {}
21
- end
22
-
23
- def result(options={})
24
- raise 'Parsing Error' unless matrix.is_a?(Array) && matrix.all?(&:is_a?.(Array)) && matrix.all?{|pos| pos.size == 4} && matrix.all?(&:all?.(&:is_a?.(Numeric)))
25
- options.merge(matrix: @matrix)
26
- end
27
-
28
- class ClassMethods
29
- def self.array_from_acgt_hash(hsh)
30
- hsh = hsh.collect_hash{|key,value| [key.to_s.upcase, value] }
31
- raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == %w[A C G T]
32
- %w[A C G T].collect{|letter| hsh[letter] }
33
- end
34
- def self.try_convert_to_array(inp)
35
- return inp if inp.is_a? Array
36
- array_from_acgt_hash(inp)
27
+ parse! rescue nil
28
+ end
29
+
30
+ def self.parse!(*input)
31
+ self.new(*input).parse!
32
+ end
33
+ def self.parse(*input)
34
+ self.new(*input).parse
35
+ end
36
+
37
+ def self.valid_matrix?(matrix)
38
+ PM.valid_matrix?(matrix)
39
+ end
40
+
41
+ # {A: 1, C: 2, G: 3, T: 4} --> [1,2,3,4]
42
+ # {A: [1,2], C: [3,4], G: [5,6], T: [7,8]} --> [[1,3,5,7],[2,4,6,8]] ( == [[1,2], [3,4], [5,6], [7,8]].transpose)
43
+ def self.array_from_acgt_hash(hsh)
44
+ hsh = normalize_hash_keys(hsh)
45
+ raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == [:A,:C,:G,:T]
46
+ result = [:A,:C,:G,:T].collect{|letter| hsh[letter] }
47
+ result.all?{|el| el.is_a?(Array)} ? result.transpose : result
48
+ end
49
+
50
+ # {a: 1, C: 2, 'g' => 3, 'T' => 4} --> {A: 1, C: 2, G: 3, T: 4}
51
+ def self.normalize_hash_keys(hsh)
52
+ hsh.collect_hash{|key,value| [key.to_s.upcase.to_sym, value] }
53
+ end
54
+
55
+ # [[1,2,3,4], [2,3,4,5]] --> [[1,2,3,4], [2,3,4,5]]
56
+ # [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}] --> [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}]
57
+ # {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} --> [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
58
+ def self.try_convert_to_array(input)
59
+ case input
60
+ when Array then input
61
+ when Hash then array_from_acgt_hash(input)
62
+ else raise TypeError, 'input of Bioinform::Parser::array_from_acgt_hash should be Array or Hash'
37
63
  end
38
64
  end
65
+
66
+ def self.transform_input(input)
67
+ result = try_convert_to_array(input).map{|el| try_convert_to_array(el)}
68
+ need_tranpose?(result) ? result.transpose : result
69
+ end
70
+
71
+ # point whether matrix input positions(need not be transposed -- false) or letters(need -- true) as first index
72
+ # [[1,3,5,7], [2,4,6,8]] --> false
73
+ # [[1,2],[3,4],[5,6],[7,8]] --> true
74
+ def self.need_tranpose?(input)
75
+ (input.size == 4) && input.any?{|x| x.size != 4}
76
+ end
77
+
39
78
  end
40
79
  end
@@ -3,18 +3,12 @@ require 'bioinform/parsers/string_parser'
3
3
 
4
4
  module Bioinform
5
5
  class StringFantomParser < StringParser
6
- def row_pat
7
- '[\w\d]+ ' + "(#{number_pat} )*#{number_pat}"
8
- end
9
- def name_pat
10
- 'NA (?<name>[\w.+:-]+)'
11
- end
12
6
  def header_pat
13
- "#{name_pat}\n" + '[\w\d]+ ' +"A C G T\n"
7
+ /NA (?<name>[\w.+:-]+)\n[\w\d]+ A C G T\n/
14
8
  end
15
9
 
16
- def matrix_preprocess(matrix)
17
- matrix.split("\n").map{|line| line.split[1..-1].map(&:to_f)}
10
+ def row_pat
11
+ /[\w\d]+ (?<row>(#{number_pat} )*#{number_pat})\n?/
18
12
  end
19
13
  end
20
14
  end
@@ -1,45 +1,85 @@
1
+ require 'strscan'
1
2
  require 'bioinform/support'
2
3
  require 'bioinform/parsers/parser'
3
4
 
4
5
  module Bioinform
5
6
  class StringParser < Parser
7
+ attr_reader :scanner
8
+ def initialize(input)
9
+ raise ArgumentError unless input.is_a?(String)
10
+ super
11
+ @scanner = StringScanner.new(input.multiline_squish)
12
+ end
13
+
6
14
  def number_pat
7
- '[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?'
15
+ /[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?/
8
16
  end
17
+
18
+ def header_pat
19
+ />?\s*(?<name>\S+)\n/
20
+ end
21
+
9
22
  def row_pat
10
- "(#{number_pat} )*#{number_pat}"
23
+ /(?<row>(#{number_pat} )*#{number_pat})\n?/
11
24
  end
12
- def name_pat
13
- '(>\s*)?(?<name>\S+)'
25
+
26
+ def scan_row
27
+ match = scanner.advanced_scan(row_pat)
28
+ match && match[:row]
14
29
  end
15
- def matrix_pat
16
- "(?<matrix>(#{row_pat}\n)*#{row_pat})"
30
+
31
+ def split_row(row_string)
32
+ row_string.split.map(&:to_f)
17
33
  end
18
- def header_pat
19
- "(#{name_pat}\n)?"
34
+
35
+ def scan_any_spaces
36
+ scanner.scan(/\s+/)
20
37
  end
21
- def pattern
22
- /\A#{header_pat}#{matrix_pat}\z/
38
+
39
+ def parse_name
40
+ match = scanner.advanced_scan(header_pat)
41
+ match && match[:name]
23
42
  end
24
43
 
25
- # when matrix is extracted from the string it should be transformed to a matrix of numerics
26
- def matrix_preprocess(matrix)
27
- matrix.split("\n").map{|line| line.split.map(&:to_f)}
44
+ def parse_matrix
45
+ matrix = []
46
+ while row_string = scan_row
47
+ matrix << split_row(row_string)
48
+ end
49
+ matrix
28
50
  end
29
51
 
30
- def parse
31
- case input
32
- when String
33
- match = input.multiline_squish.match(pattern)
34
- raise ArgumentError unless match
35
- matrix = matrix_preprocess( match[:matrix] )
36
- raise ArgumentError unless matrix
37
- Parser.new(matrix).parse.merge(name: match[:name])
52
+ def parse!
53
+ scan_any_spaces
54
+ name = parse_name
55
+ matrix = parse_matrix
56
+ Parser.parse!(matrix).merge(name: name)
57
+ end
58
+
59
+ def scanner_reset
60
+ scanner.reset
61
+ end
62
+
63
+ def each
64
+ if block_given?
65
+ scanner_reset
66
+ while result = parse
67
+ yield result
68
+ end
38
69
  else
39
- raise ArgumentError
70
+ Enumerator.new(self, :each)
40
71
  end
41
- rescue
42
- {}
43
72
  end
73
+ include Enumerable
74
+
75
+ alias_method :split, :to_a
76
+ def self.split(input)
77
+ self.new(input).split
78
+ end
79
+
80
+ def self.split_on_motifs(input, pm_klass = PM)
81
+ split(input).map{|el| pm_klass.new(el)}
82
+ end
83
+
44
84
  end
45
85
  end
@@ -0,0 +1,17 @@
1
+ require 'bioinform/support'
2
+ require 'bioinform/parsers/parser'
3
+
4
+ module Bioinform
5
+ # TrivialParser can be used to parse hashes returned by #parse method of other parsers:
6
+ # PM.new({matrix:[[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
7
+ # PM.new(StringParser.new("1 2 3 4\n5 6 7 8").parse)
8
+ # StringParser.new("First\n1 2 3 4\n5 6 7 8\nSecond\n0 0 0 0").map{|inp| PM.new(inp, TrivialParser)}
9
+ class TrivialParser < Parser
10
+ def initialize(input)
11
+ @input = input
12
+ end
13
+ def parse!
14
+ input
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,4 @@
1
1
  require 'bioinform/parsers/parser'
2
+ require 'bioinform/parsers/trivial_parser'
2
3
  require 'bioinform/parsers/string_parser'
3
4
  require 'bioinform/parsers/string_fantom_parser'
@@ -0,0 +1,8 @@
1
+ require 'strscan'
2
+
3
+ class StringScanner
4
+ def advanced_scan(pat)
5
+ result = scan(pat)
6
+ result && result.match(pat)
7
+ end
8
+ end
@@ -1,6 +1,6 @@
1
1
  require 'active_support/core_ext/string/filters'
2
2
  class String
3
3
  def multiline_squish
4
- split("\n").map(&:squish).drop_while(&:empty?).take_while{|line| !line.empty?}.join("\n")
4
+ split("\n").map(&:squish).join("\n").gsub(/\A\n+/,'').gsub(/\n+\z/,'')
5
5
  end
6
6
  end
@@ -14,4 +14,6 @@ require 'bioinform/support/deep_dup'
14
14
  require 'bioinform/support/partial_sums'
15
15
 
16
16
  require 'bioinform/support/array_zip'
17
- require 'bioinform/support/array_product'
17
+ require 'bioinform/support/array_product'
18
+
19
+ require 'bioinform/support/advanced_scan'
@@ -1,3 +1,3 @@
1
1
  module Bioinform
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
@@ -5,23 +5,41 @@ module Bioinform
5
5
  describe PCM do
6
6
  describe '#count' do
7
7
  it 'should be equal to sum of elements at position' do
8
- PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).count.should == 7
9
- PCM.new([[1, 2.3, 3.2, 1],[4.4, 1.1, 1, 2]]).count.should == 7.5
8
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).count.should == 7
9
+ PCM.new([[1, 2.3, 3.2, 1],[4.4, 0.1, 1, 2]]).count.should == 7.5
10
10
  end
11
11
  end
12
12
 
13
13
  describe '#to_pwm' do
14
14
  it 'should return PWM' do
15
- PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm.should be_kind_of(PWM)
15
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm.should be_kind_of(PWM)
16
16
  end
17
17
  it 'should make transformation: el --> log( (el + p_i*pseudocount) / (p_i*(count + pseudocount)) )' do
18
- PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(1).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.47, 0.118,0.486,-0.47],[0.754,-0.47,-0.47,0.118]]
19
- PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(10).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.194, 0.057,0.258,-0.194],[0.425,-0.194,-0.194,0.057]]
18
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(1).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.47, 0.118, 0.486, -0.47],[0.754, -2.079, -0.47, 0.118]]
19
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(10).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.194, 0.057, 0.258, -0.194],[0.425, -0.531, -0.194, 0.057]]
20
20
  end
21
21
  it 'should use default pseudocount equal to log(count)' do
22
- PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm.should == PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(Math.log(7))
22
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm.should == PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(Math.log(7))
23
+ end
24
+ it 'should preserve name' do
25
+ PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: nil).to_pwm.name.should be_nil
26
+ PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: 'Stub name').to_pwm.name.should == 'Stub name'
23
27
  end
24
28
  end
25
29
 
30
+ describe '#to_ppm' do
31
+ it 'should return PPM' do
32
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_ppm.should be_kind_of(PPM)
33
+ end
34
+ it 'should make transformation el --> el / count' do
35
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_ppm.should == PPM.new([[1.0/7, 2.0/7, 3.0/7, 1.0/7],[4.0/7, 0.0/7, 1.0/7, 2.0/7]])
36
+ end
37
+ it 'should preserve name' do
38
+ PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: nil).to_ppm.name.should be_nil
39
+ PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: 'Stub name').to_ppm.name.should == 'Stub name'
40
+ end
41
+ end
42
+
43
+
26
44
  end
27
45
  end
@@ -3,18 +3,17 @@ require 'bioinform/data_models/pm'
3
3
 
4
4
  module Bioinform
5
5
  describe PM do
6
- describe '#valid?' do
6
+
7
+ describe '::valid_matrix?' do
7
8
  it 'should be true iff an argument is an array of arrays of 4 numerics in a column' do
8
-
9
- PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3,4],[1,4,5,6.5]]; self }.valid?.should be_true
10
- PM.new([[0,0,0,0]]).instance_eval{@matrix = {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]}; self }.valid?.should be_false
11
- PM.new([[0,0,0,0]]).instance_eval{@matrix = [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}]; self }.valid?.should be_false
12
- PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3,4],[1,4,6.5]]; self }.valid?.should be_false
13
- PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3],[1,4,6.5]]; self }.valid?.should be_false
14
- PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,'3','4'],[1,'4','5',6.5]]; self }.valid?.should be_false
15
-
9
+ PM.valid_matrix?( [[1,2,3,4],[1,4,5,6.5]] ).should be_true
10
+ PM.valid_matrix?( {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]} ).should be_false
11
+ PM.valid_matrix?( [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}] ).should be_false
12
+ PM.valid_matrix?( [[1,2,3,4],[1,4,6.5]] ).should be_false
13
+ PM.valid_matrix?( [[1,2,3],[1,4,6.5]] ).should be_false
14
+ PM.valid_matrix?( [[1,2,'3','4'],[1,'4','5',6.5]] ).should be_false
16
15
  end
17
- end
16
+ end
18
17
 
19
18
  describe '#to_s' do
20
19
  before :each do
@@ -249,6 +248,12 @@ module Bioinform
249
248
  @pm.best_suffix(2).should == (-1.0)
250
249
  @pm.best_suffix(3).should == (0.0)
251
250
  end
251
+ it 'should give right results after left(right)_augment, discrete, reverse_complement etc' do
252
+ pm = PM.new([[1, 2, 3, 4], [10,10.5,11,11.5]])
253
+ pm.best_suffix(1).should == 11.5
254
+ pm.left_augment!(1)
255
+ pm.best_suffix(1).should == 15.5
256
+ end
252
257
  end
253
258
  describe '#worst_suffix' do
254
259
  it 'should return minimal score of suffices from i-th position inclusively i.e. [i..end]' do
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/data_models/pcm'
3
+
4
+ module Bioinform
5
+ describe PPM do
6
+
7
+ end
8
+ end
@@ -3,6 +3,86 @@ require 'bioinform/parsers/parser'
3
3
 
4
4
  module Bioinform
5
5
  describe Parser do
6
+ context '#initialize' do
7
+ it 'should accept an array correctly' do
8
+ Parser.new([[1,2,3,4],[5,6,7,8]]).parse[:matrix].should == [[1,2,3,4],[5,6,7,8]]
9
+ end
10
+ it 'should treat several arguments as an array composed of them' do
11
+ Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
12
+ end
13
+ it 'should treat one Array of numbers as an Array(with 1 element) of Arrays' do
14
+ Parser.new([1,2,3,4]).parse.should == Parser.new([[1,2,3,4]]).parse
15
+ end
16
+ end
17
+
18
+ context '::parse!' do
19
+ it 'should behave like Parser.new(input).parse!' do
20
+ Parser.parse!([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse!
21
+ expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
22
+ end
23
+ end
24
+ context '::parse' do
25
+ it 'should behave like Parser.new(input).parse!' do
26
+ Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
27
+ Parser.parse([1,2,3],[4,5,6]).should be_nil
28
+ end
29
+ end
30
+
31
+ context '::normalize_hash_keys' do
32
+ it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
33
+ Parser.normalize_hash_keys( {a: 1, C: 2, 'g' => 3, 'T' => 4} ).should == {A: 1, C: 2, G: 3, T: 4}
34
+ end
35
+ end
36
+
37
+ context '::need_transpose?' do
38
+ it 'should point whether matrix have positions(need not be transposed -- false) or letters(true) as first index' do
39
+ Parser.need_tranpose?([[1,3,5,7], [2,4,6,8]]).should be_false
40
+ Parser.need_tranpose?([[1,2],[3,4],[5,6],[7,8]]).should be_true
41
+ end
42
+ end
43
+ context '::array_from_acgt_hash' do
44
+ it 'should convert hash of arrays to a transposed array of arrays' do
45
+ input = {A: [1,2,3], C: [2,3,4], G: [3,4,5], T: [4,5,6]}
46
+ Parser.array_from_acgt_hash(input).should == [[1,2,3], [2,3,4], [3,4,5], [4,5,6]].transpose
47
+ end
48
+ it 'should convert hash of numbers to an array of numbers' do
49
+ input = {A: 1, C: 2, G: 3, T: 4}
50
+ Parser.array_from_acgt_hash(input).should == [1,2,3,4]
51
+ end
52
+ it 'should process both symbolic and string keys, in both upcase and downcase' do
53
+ input_normal_keys = {A: 1, C: 2, G: 3, T: 4}
54
+ input_different_keys = {:A => 1, :c => 2, 'g' => 3, 'T' => 4}
55
+ Parser.array_from_acgt_hash(input_different_keys).should == Parser.array_from_acgt_hash(input_normal_keys)
56
+ end
57
+ end
58
+
59
+ context '::try_convert_to_array' do
60
+ it 'should not change array' do
61
+ inputs = []
62
+ inputs << [[1,2,3,4], [2,3,4,5], [3,4,5,6]]
63
+ inputs << [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}, {A:3, C:4, G:5, T:6}]
64
+ inputs.each do |input|
65
+ Parser.try_convert_to_array( input ).should == input
66
+ end
67
+ end
68
+ it 'should convert ACGT-Hashes to an array of positions (not letters)' do
69
+ Parser.try_convert_to_array( {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} ).should == [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
70
+ end
71
+ end
72
+
73
+ context '#parse' do
74
+ it 'should give the same result as #parse!' do
75
+ parser = Parser.new('stub parser')
76
+ parser.stub(:parse!).and_return('stub result')
77
+ parser.parse.should == 'stub result'
78
+ end
79
+ it 'should return nil if #parse! raised an exception' do
80
+ parser = Parser.new('stub parser')
81
+ parser.stub(:parse!).and_raise
82
+ parser.parse.should be_nil
83
+ end
84
+ end
85
+
6
86
  good_cases = {
7
87
  'Array Nx4' => {input: [[0,1,2,3],[10,11,12,13]],
8
88
  matrix: [[0,1,2,3],[10,11,12,13]] },
@@ -30,6 +110,10 @@ module Bioinform
30
110
  }
31
111
 
32
112
  bad_cases = {
113
+ 'Nil object on input' => {input: nil},
114
+
115
+ 'Empty array on input' => {input: []},
116
+
33
117
  'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
34
118
 
35
119
  'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
@@ -54,5 +138,10 @@ module Bioinform
54
138
  }
55
139
 
56
140
  parser_specs(Parser, good_cases, bad_cases)
141
+ context '#parser!' do
142
+ it "should raise an exception on parsing empty list to parser" do
143
+ expect{ Parser.new().parse! }.to raise_error
144
+ end
145
+ end
57
146
  end
58
147
  end
@@ -5,20 +5,22 @@ module Bioinform
5
5
  describe StringFantomParser do
6
6
  good_cases = {
7
7
  'string in Fantom-format' => {input: "
8
- NA motif_CTNCAG
9
- P0 A C G T
10
- P1 0 1878368 0 0
11
- P2 0 0 0 1878368
12
- P3 469592 469592 469592 469592
13
- P4 0 1878368 0 0
14
- P5 1878368 0 0 0
15
- P6 0 0 1878368 0",
16
- matrix: [[0.0, 1878368.0, 0.0, 0.0],
17
- [0.0, 0.0, 0.0, 1878368.0],
18
- [469592.0, 469592.0, 469592.0, 469592.0],
19
- [0.0, 1878368.0, 0.0, 0.0],
20
- [1878368.0, 0.0, 0.0, 0.0],
21
- [0.0, 0.0, 1878368.0, 0.0]] }
8
+ NA motif_CTNCAG
9
+ P0 A C G T
10
+ P1 0 1878368 0 0
11
+ P2 0 0 0 1878368
12
+ P3 469592 469592 469592 469592
13
+ P4 0 1878368 0 0
14
+ P5 1878368 0 0 0
15
+ P6 0 0 1878368 0",
16
+ matrix: [ [0.0, 1878368.0, 0.0, 0.0],
17
+ [0.0, 0.0, 0.0, 1878368.0],
18
+ [469592.0, 469592.0, 469592.0, 469592.0],
19
+ [0.0, 1878368.0, 0.0, 0.0],
20
+ [1878368.0, 0.0, 0.0, 0.0],
21
+ [0.0, 0.0, 1878368.0, 0.0]],
22
+ name: 'motif_CTNCAG'
23
+ }
22
24
  }
23
25
 
24
26
  bad_cases = { }
@@ -3,6 +3,52 @@ require 'bioinform/parsers/string_parser'
3
3
 
4
4
  module Bioinform
5
5
  describe StringParser do
6
+
7
+ describe '#each' do
8
+ it 'should yield consequent results of #parse! while it returns result' do
9
+ parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
10
+ expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
11
+ end
12
+ it 'should restart parser from the beginning each time' do
13
+ parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
14
+ 3.times do
15
+ expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
16
+ end
17
+ end
18
+ end
19
+
20
+ context '::split' do
21
+ it 'should be able to get a single PM' do
22
+ StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12").should == [ {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name:nil} ]
23
+ end
24
+
25
+ it 'should be able to split several PMs separated with an empty line' do
26
+ StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:nil} ]
27
+ end
28
+
29
+ it 'should be able to split several PMs separated with name' do
30
+ StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
31
+
32
+ StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8\n\n\n").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
33
+ end
34
+ end
35
+
36
+ context '::split_on_motifs' do
37
+ it 'should be able to split string into PMs' do
38
+ result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
39
+ result.map{|pm| pm.matrix}.should == [ [[1,2,3,4],[5,6,7,8],[9,10,11,12]], [[9,10,11,12],[1,2,3,4],[5,6,7,8]] ]
40
+ result.map{|pm| pm.name}.should == [nil, 'Name']
41
+ end
42
+ it 'should create PMs by default' do
43
+ result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
44
+ result.each{|pm| pm.class.should == PM}
45
+ end
46
+ it 'should create PM subclass when it\'s specified' do
47
+ result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8", PWM)
48
+ result.each{|pm| pm.class.should == PWM}
49
+ end
50
+ end
51
+
6
52
  good_cases = {
7
53
  'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
8
54
  matrix: [[1,2,3,4],[5,6,7,8]] },
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/parsers/parser'
3
+
4
+ module Bioinform
5
+ describe TrivialParser do
6
+ context '#initialize' do
7
+ it 'should take the only input argument' do
8
+ TrivialParser.instance_method(:initialize).arity.should == 1
9
+ end
10
+ end
11
+ context '#parser!' do
12
+ it 'should return input of that was passed to initialize' do
13
+ TrivialParser.new('stub input').parse!.should == 'stub input'
14
+ end
15
+ end
16
+ it 'can be used to create PM with {matrix: ..., name: ...} form' do
17
+ pm = PM.new({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
18
+ pm.matrix.should == [[1,2,3,4],[5,6,7,8]]
19
+ pm.name.should == 'Name'
20
+ end
21
+ end
22
+ end
data/spec/spec_helper.rb CHANGED
@@ -4,17 +4,23 @@ $LOAD_PATH.unshift File.dirname(__FILE__)
4
4
  require 'rspec'
5
5
 
6
6
  def parser_specs(parser_klass, good_cases, bad_cases)
7
- good_cases.each do |case_description, input_and_result|
8
- it "should be able to parse #{case_description}" do
9
- result = parser_klass.new(input_and_result[:input]).parse
10
- result[:matrix].should == input_and_result[:matrix]
11
- result[:name].should == input_and_result[:name] if input_and_result.has_key?(:name)
7
+ context '#parse!' do
8
+ good_cases.each do |case_description, input_and_result|
9
+ it "should be able to parse #{case_description}" do
10
+ result = parser_klass.new(input_and_result[:input]).parse
11
+ result[:matrix].should == input_and_result[:matrix]
12
+ if input_and_result.has_key?(:name)
13
+ result[:name].should == input_and_result[:name]
14
+ else
15
+ result[:name].should be_nil
16
+ end
17
+ end
12
18
  end
13
- end
14
-
15
- bad_cases.each do |case_description, input|
16
- it "should fail silently returning {} on parsing #{case_description}" do
17
- parser_klass.new(input[:input]).parse.should == {}
19
+
20
+ bad_cases.each do |case_description, input|
21
+ it "should raise an exception on parsing #{case_description}" do
22
+ expect{ parser_klass.new(input[:input]).parse! }.to raise_error
23
+ end
18
24
  end
19
25
  end
20
26
  end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/support/advanced_scan'
3
+
4
+ describe StringScanner do
5
+ context '#advanced_scan' do
6
+ before do
7
+ @scanner = StringScanner.new('abcde fghIJKLmnop')
8
+ end
9
+ it 'should return nil if text doesn\'t match. Pointer should not move' do
10
+ @scanner.advanced_scan(/\s\s\s/).should be_nil
11
+ @scanner.pos.should == 0
12
+ end
13
+ it 'should return MatchData if string Matches. Pointer should move' do
14
+ @scanner.advanced_scan(/\w\w\w/).should be_kind_of MatchData
15
+ @scanner.pos.should == 3
16
+ end
17
+ it 'should return have the same groups as regexp has' do
18
+ result = @scanner.advanced_scan(/(\w+)(\s+)([a-z]+)([A-Z]+)/)
19
+ result[0].should == 'abcde fghIJKL'
20
+ result[1].should == 'abcde'
21
+ result[2].should == ' '
22
+ result[3].should == 'fgh'
23
+ result[4].should == 'IJKL'
24
+ end
25
+ it 'should return have the same named groups as regexp has' do
26
+ result = @scanner.advanced_scan(/(\w+)(\s+)(?<word_downcase>[a-z]+)(?<word_upcase>[A-Z]+)/)
27
+ result[0].should == 'abcde fghIJKL'
28
+ result[:word_downcase].should == 'fgh'
29
+ result[:word_upcase].should == 'IJKL'
30
+ end
31
+ end
32
+ end
@@ -15,5 +15,11 @@ describe String do
15
15
  it 'should preserve rows pagination' do
16
16
  "abc def ghi\njk lmn".multiline_squish.should == "abc def ghi\njk lmn"
17
17
  end
18
+ it 'should preserve empty lines in the middle of text' do
19
+ "abc def\n\nghi\n \t \njk lmn \n\n\n zzz".multiline_squish.should == "abc def\n\nghi\n\njk lmn\n\n\nzzz"
20
+ end
21
+ it 'should drop empty lines at begin and at end of string' do
22
+ "\n \t\n\nabc def\n\nghi\n \t \njk lmn \n\n\n zzz\n\n \t \n".multiline_squish.should == "abc def\n\nghi\n\njk lmn\n\n\nzzz"
23
+ end
18
24
  end
19
25
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bioinform
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-31 00:00:00.000000000 Z
12
+ date: 2012-09-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -67,7 +67,9 @@ files:
67
67
  - lib/bioinform/parsers/parser.rb
68
68
  - lib/bioinform/parsers/string_fantom_parser.rb
69
69
  - lib/bioinform/parsers/string_parser.rb
70
+ - lib/bioinform/parsers/trivial_parser.rb
70
71
  - lib/bioinform/support.rb
72
+ - lib/bioinform/support/advanced_scan.rb
71
73
  - lib/bioinform/support/array_product.rb
72
74
  - lib/bioinform/support/array_zip.rb
73
75
  - lib/bioinform/support/callable_symbol.rb
@@ -83,11 +85,14 @@ files:
83
85
  - lib/bioinform/version.rb
84
86
  - spec/data_models/pcm_spec.rb
85
87
  - spec/data_models/pm_spec.rb
88
+ - spec/data_models/ppm_spec.rb
86
89
  - spec/data_models/pwm_spec.rb
87
90
  - spec/parsers/parser_spec.rb
88
91
  - spec/parsers/string_fantom_parser_spec.rb
89
92
  - spec/parsers/string_parser_spec.rb
93
+ - spec/parsers/trivial_parser_spec.rb
90
94
  - spec/spec_helper.rb
95
+ - spec/support/advanced_scan_spec.rb
91
96
  - spec/support/array_product_spec.rb
92
97
  - spec/support/array_zip_spec.rb
93
98
  - spec/support/callable_symbol_spec.rb
@@ -128,11 +133,14 @@ summary: Classes for work with different input formats of positional matrices an
128
133
  test_files:
129
134
  - spec/data_models/pcm_spec.rb
130
135
  - spec/data_models/pm_spec.rb
136
+ - spec/data_models/ppm_spec.rb
131
137
  - spec/data_models/pwm_spec.rb
132
138
  - spec/parsers/parser_spec.rb
133
139
  - spec/parsers/string_fantom_parser_spec.rb
134
140
  - spec/parsers/string_parser_spec.rb
141
+ - spec/parsers/trivial_parser_spec.rb
135
142
  - spec/spec_helper.rb
143
+ - spec/support/advanced_scan_spec.rb
136
144
  - spec/support/array_product_spec.rb
137
145
  - spec/support/array_zip_spec.rb
138
146
  - spec/support/callable_symbol_spec.rb