bioinform 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,7 @@
1
1
  require 'bioinform/support'
2
2
  require 'bioinform/data_models/pm'
3
+ require 'bioinform/data_models/ppm'
4
+ require 'bioinform/data_models/pwm'
3
5
  module Bioinform
4
6
  class PCM < PM
5
7
  def count
@@ -12,7 +14,12 @@ module Bioinform
12
14
  Math.log((pos[ind] + probability[ind] * pseudocount) / (probability[ind]*(count + pseudocount)) )
13
15
  end
14
16
  end
15
- PWM.new(mat)
17
+ PWM.new(matrix: mat, name: name)
18
+ end
19
+
20
+ def to_ppm
21
+ mat = each_position.map{|pos| pos.map{|el| el.to_f / count }}
22
+ PPM.new(matrix: mat, name: name)
16
23
  end
17
24
 
18
25
  end
@@ -2,23 +2,23 @@ require 'bioinform/support'
2
2
  require 'bioinform/parsers'
3
3
 
4
4
  module Bioinform
5
- IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}
6
- LetterByIndex = {0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T'}
7
-
5
+ IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3}
6
+ LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
7
+
8
8
  class PM
9
9
  attr_reader :matrix
10
10
  attr_accessor :background, :name
11
11
 
12
- def choose_parser(input)
13
- input.is_a?(String) ? StringParser : Parser
14
- [Parser, StringParser, StringFantomParser].find do |parser|
15
- self.class.new(input, parser) rescue nil
12
+ def self.choose_parser(input)
13
+ [TrivialParser, Parser, StringParser, StringFantomParser].find do |parser|
14
+ self.new(input, parser) rescue nil
16
15
  end
17
16
  end
18
17
 
19
18
  def initialize(input, parser = nil)
20
- parser ||= choose_parser(input)
21
- result = parser.new(input).parse
19
+ parser ||= self.class.choose_parser(input)
20
+ raise 'No one parser can process input' unless parser
21
+ result = parser.new(input).parse
22
22
  @matrix = result[:matrix]
23
23
  @name = result[:name]
24
24
  @background = [1, 1, 1, 1]
@@ -29,15 +29,20 @@ module Bioinform
29
29
  @matrix == other.matrix && @background == other.background
30
30
  end
31
31
 
32
- def valid?
33
- @matrix.is_a?(Array) &&
34
- @matrix.all?(&:is_a?.(Array)) &&
35
- @matrix.all?{|pos| pos.size == 4} &&
36
- @matrix.all?(&:all?.(&:is_a?.(Numeric)))
32
+ def self.valid_matrix?(matrix)
33
+ matrix.is_a?(Array) &&
34
+ ! matrix.empty? &&
35
+ matrix.all?(&:is_a?.(Array)) &&
36
+ matrix.all?{|pos| pos.size == 4} &&
37
+ matrix.all?(&:all?.(&:is_a?.(Numeric)))
37
38
  rescue
38
39
  false
39
40
  end
40
41
 
42
+ def valid?
43
+ self.class.valid_matrix?(@matrix)
44
+ end
45
+
41
46
  def each_position
42
47
  if block_given?
43
48
  matrix.each{|pos| yield pos}
@@ -54,7 +59,7 @@ module Bioinform
54
59
  def to_s(with_name = true)
55
60
  matrix_str = each_position.map(&:join.("\t")).join("\n")
56
61
  if with_name && @name
57
- "#{@name}\n#{matrix_str}"
62
+ @name + "\n" + matrix_str
58
63
  else
59
64
  matrix_str
60
65
  end
@@ -3,6 +3,8 @@ require 'bioinform/data_models/pm'
3
3
 
4
4
  module Bioinform
5
5
  class PPM < PM
6
-
6
+ def to_ppm
7
+ self
8
+ end
7
9
  end
8
10
  end
@@ -21,8 +21,20 @@ module Bioinform
21
21
  def score(word)
22
22
  word = word.upcase
23
23
  raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
24
- raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
- word.each_char.map.with_index{|letter, pos| matrix[pos][IndexByLetter[letter]] }.inject(&:+)
24
+ #raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
25
+ (0...length).map do |pos|
26
+ begin
27
+ # Need support of N-letters and other IUPAC
28
+ letter = word[pos]
29
+ matrix[pos][IndexByLetter[letter]]
30
+ rescue
31
+ raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters'
32
+ end
33
+ end.inject(&:+)
34
+ end
35
+
36
+ def to_pwm
37
+ self
26
38
  end
27
39
  end
28
40
  end
@@ -1,40 +1,79 @@
1
1
  require 'bioinform/support'
2
+ require 'bioinform/data_models/pm'
2
3
 
3
4
  module Bioinform
4
5
  class Parser
5
- attr_reader :input, :matrix
6
+ attr_reader :input
6
7
 
7
- def initialize(input)
8
- @input = input
8
+ def initialize(*input)
9
+ if input.size == 1 # [ [1,2,3,4] ], [ [[1,2,3,4],[5,6,7,8]] ]
10
+ if input.first.is_a?(Array) && input.first.all?{|el| el.is_a? Numeric} # [ [1,2,3,4] ]
11
+ @input = input
12
+ else # [ [[1,2,3,4],[5,6,7,8]] ]
13
+ @input = input.first
14
+ end
15
+ else #[ [1,2,3,4], [5,6,7,8] ], [ ]
16
+ @input = input
17
+ end
18
+ end
19
+
20
+ def parse!
21
+ matrix = self.class.transform_input(input)
22
+ raise 'Parsing error' unless self.class.valid_matrix?(matrix)
23
+ {matrix: matrix}
9
24
  end
10
25
 
11
26
  def parse
12
- inp = input
13
- transpose = inp.is_a?(Hash)
14
- inp = ClassMethods.try_convert_to_array(inp)
15
- inp.map!{|x| ClassMethods.try_convert_to_array(x)}
16
- transpose = true if (not inp.all?{|x| x.size == 4}) && inp.size == 4 && inp.same_by?(&:size)
17
- @matrix = transpose ? inp.transpose : inp
18
- result
19
- rescue
20
- {}
21
- end
22
-
23
- def result(options={})
24
- raise 'Parsing Error' unless matrix.is_a?(Array) && matrix.all?(&:is_a?.(Array)) && matrix.all?{|pos| pos.size == 4} && matrix.all?(&:all?.(&:is_a?.(Numeric)))
25
- options.merge(matrix: @matrix)
26
- end
27
-
28
- class ClassMethods
29
- def self.array_from_acgt_hash(hsh)
30
- hsh = hsh.collect_hash{|key,value| [key.to_s.upcase, value] }
31
- raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == %w[A C G T]
32
- %w[A C G T].collect{|letter| hsh[letter] }
33
- end
34
- def self.try_convert_to_array(inp)
35
- return inp if inp.is_a? Array
36
- array_from_acgt_hash(inp)
27
+ parse! rescue nil
28
+ end
29
+
30
+ def self.parse!(*input)
31
+ self.new(*input).parse!
32
+ end
33
+ def self.parse(*input)
34
+ self.new(*input).parse
35
+ end
36
+
37
+ def self.valid_matrix?(matrix)
38
+ PM.valid_matrix?(matrix)
39
+ end
40
+
41
+ # {A: 1, C: 2, G: 3, T: 4} --> [1,2,3,4]
42
+ # {A: [1,2], C: [3,4], G: [5,6], T: [7,8]} --> [[1,3,5,7],[2,4,6,8]] ( == [[1,2], [3,4], [5,6], [7,8]].transpose)
43
+ def self.array_from_acgt_hash(hsh)
44
+ hsh = normalize_hash_keys(hsh)
45
+ raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == [:A,:C,:G,:T]
46
+ result = [:A,:C,:G,:T].collect{|letter| hsh[letter] }
47
+ result.all?{|el| el.is_a?(Array)} ? result.transpose : result
48
+ end
49
+
50
+ # {a: 1, C: 2, 'g' => 3, 'T' => 4} --> {A: 1, C: 2, G: 3, T: 4}
51
+ def self.normalize_hash_keys(hsh)
52
+ hsh.collect_hash{|key,value| [key.to_s.upcase.to_sym, value] }
53
+ end
54
+
55
+ # [[1,2,3,4], [2,3,4,5]] --> [[1,2,3,4], [2,3,4,5]]
56
+ # [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}] --> [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}]
57
+ # {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} --> [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
58
+ def self.try_convert_to_array(input)
59
+ case input
60
+ when Array then input
61
+ when Hash then array_from_acgt_hash(input)
62
+ else raise TypeError, 'input of Bioinform::Parser::array_from_acgt_hash should be Array or Hash'
37
63
  end
38
64
  end
65
+
66
+ def self.transform_input(input)
67
+ result = try_convert_to_array(input).map{|el| try_convert_to_array(el)}
68
+ need_tranpose?(result) ? result.transpose : result
69
+ end
70
+
71
+ # point whether matrix input positions(need not be transposed -- false) or letters(need -- true) as first index
72
+ # [[1,3,5,7], [2,4,6,8]] --> false
73
+ # [[1,2],[3,4],[5,6],[7,8]] --> true
74
+ def self.need_tranpose?(input)
75
+ (input.size == 4) && input.any?{|x| x.size != 4}
76
+ end
77
+
39
78
  end
40
79
  end
@@ -3,18 +3,12 @@ require 'bioinform/parsers/string_parser'
3
3
 
4
4
  module Bioinform
5
5
  class StringFantomParser < StringParser
6
- def row_pat
7
- '[\w\d]+ ' + "(#{number_pat} )*#{number_pat}"
8
- end
9
- def name_pat
10
- 'NA (?<name>[\w.+:-]+)'
11
- end
12
6
  def header_pat
13
- "#{name_pat}\n" + '[\w\d]+ ' +"A C G T\n"
7
+ /NA (?<name>[\w.+:-]+)\n[\w\d]+ A C G T\n/
14
8
  end
15
9
 
16
- def matrix_preprocess(matrix)
17
- matrix.split("\n").map{|line| line.split[1..-1].map(&:to_f)}
10
+ def row_pat
11
+ /[\w\d]+ (?<row>(#{number_pat} )*#{number_pat})\n?/
18
12
  end
19
13
  end
20
14
  end
@@ -1,45 +1,85 @@
1
+ require 'strscan'
1
2
  require 'bioinform/support'
2
3
  require 'bioinform/parsers/parser'
3
4
 
4
5
  module Bioinform
5
6
  class StringParser < Parser
7
+ attr_reader :scanner
8
+ def initialize(input)
9
+ raise ArgumentError unless input.is_a?(String)
10
+ super
11
+ @scanner = StringScanner.new(input.multiline_squish)
12
+ end
13
+
6
14
  def number_pat
7
- '[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?'
15
+ /[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?/
8
16
  end
17
+
18
+ def header_pat
19
+ />?\s*(?<name>\S+)\n/
20
+ end
21
+
9
22
  def row_pat
10
- "(#{number_pat} )*#{number_pat}"
23
+ /(?<row>(#{number_pat} )*#{number_pat})\n?/
11
24
  end
12
- def name_pat
13
- '(>\s*)?(?<name>\S+)'
25
+
26
+ def scan_row
27
+ match = scanner.advanced_scan(row_pat)
28
+ match && match[:row]
14
29
  end
15
- def matrix_pat
16
- "(?<matrix>(#{row_pat}\n)*#{row_pat})"
30
+
31
+ def split_row(row_string)
32
+ row_string.split.map(&:to_f)
17
33
  end
18
- def header_pat
19
- "(#{name_pat}\n)?"
34
+
35
+ def scan_any_spaces
36
+ scanner.scan(/\s+/)
20
37
  end
21
- def pattern
22
- /\A#{header_pat}#{matrix_pat}\z/
38
+
39
+ def parse_name
40
+ match = scanner.advanced_scan(header_pat)
41
+ match && match[:name]
23
42
  end
24
43
 
25
- # when matrix is extracted from the string it should be transformed to a matrix of numerics
26
- def matrix_preprocess(matrix)
27
- matrix.split("\n").map{|line| line.split.map(&:to_f)}
44
+ def parse_matrix
45
+ matrix = []
46
+ while row_string = scan_row
47
+ matrix << split_row(row_string)
48
+ end
49
+ matrix
28
50
  end
29
51
 
30
- def parse
31
- case input
32
- when String
33
- match = input.multiline_squish.match(pattern)
34
- raise ArgumentError unless match
35
- matrix = matrix_preprocess( match[:matrix] )
36
- raise ArgumentError unless matrix
37
- Parser.new(matrix).parse.merge(name: match[:name])
52
+ def parse!
53
+ scan_any_spaces
54
+ name = parse_name
55
+ matrix = parse_matrix
56
+ Parser.parse!(matrix).merge(name: name)
57
+ end
58
+
59
+ def scanner_reset
60
+ scanner.reset
61
+ end
62
+
63
+ def each
64
+ if block_given?
65
+ scanner_reset
66
+ while result = parse
67
+ yield result
68
+ end
38
69
  else
39
- raise ArgumentError
70
+ Enumerator.new(self, :each)
40
71
  end
41
- rescue
42
- {}
43
72
  end
73
+ include Enumerable
74
+
75
+ alias_method :split, :to_a
76
+ def self.split(input)
77
+ self.new(input).split
78
+ end
79
+
80
+ def self.split_on_motifs(input, pm_klass = PM)
81
+ split(input).map{|el| pm_klass.new(el)}
82
+ end
83
+
44
84
  end
45
85
  end
@@ -0,0 +1,17 @@
1
+ require 'bioinform/support'
2
+ require 'bioinform/parsers/parser'
3
+
4
+ module Bioinform
5
+ # TrivialParser can be used to parse hashes returned by #parse method of other parsers:
6
+ # PM.new({matrix:[[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
7
+ # PM.new(StringParser.new("1 2 3 4\n5 6 7 8").parse)
8
+ # StringParser.new("First\n1 2 3 4\n5 6 7 8\nSecond\n0 0 0 0").map{|inp| PM.new(inp, TrivialParser)}
9
+ class TrivialParser < Parser
10
+ def initialize(input)
11
+ @input = input
12
+ end
13
+ def parse!
14
+ input
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,4 @@
1
1
  require 'bioinform/parsers/parser'
2
+ require 'bioinform/parsers/trivial_parser'
2
3
  require 'bioinform/parsers/string_parser'
3
4
  require 'bioinform/parsers/string_fantom_parser'
@@ -0,0 +1,8 @@
1
+ require 'strscan'
2
+
3
+ class StringScanner
4
+ def advanced_scan(pat)
5
+ result = scan(pat)
6
+ result && result.match(pat)
7
+ end
8
+ end
@@ -1,6 +1,6 @@
1
1
  require 'active_support/core_ext/string/filters'
2
2
  class String
3
3
  def multiline_squish
4
- split("\n").map(&:squish).drop_while(&:empty?).take_while{|line| !line.empty?}.join("\n")
4
+ split("\n").map(&:squish).join("\n").gsub(/\A\n+/,'').gsub(/\n+\z/,'')
5
5
  end
6
6
  end
@@ -14,4 +14,6 @@ require 'bioinform/support/deep_dup'
14
14
  require 'bioinform/support/partial_sums'
15
15
 
16
16
  require 'bioinform/support/array_zip'
17
- require 'bioinform/support/array_product'
17
+ require 'bioinform/support/array_product'
18
+
19
+ require 'bioinform/support/advanced_scan'
@@ -1,3 +1,3 @@
1
1
  module Bioinform
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
@@ -5,23 +5,41 @@ module Bioinform
5
5
  describe PCM do
6
6
  describe '#count' do
7
7
  it 'should be equal to sum of elements at position' do
8
- PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).count.should == 7
9
- PCM.new([[1, 2.3, 3.2, 1],[4.4, 1.1, 1, 2]]).count.should == 7.5
8
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).count.should == 7
9
+ PCM.new([[1, 2.3, 3.2, 1],[4.4, 0.1, 1, 2]]).count.should == 7.5
10
10
  end
11
11
  end
12
12
 
13
13
  describe '#to_pwm' do
14
14
  it 'should return PWM' do
15
- PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm.should be_kind_of(PWM)
15
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm.should be_kind_of(PWM)
16
16
  end
17
17
  it 'should make transformation: el --> log( (el + p_i*pseudocount) / (p_i*(count + pseudocount)) )' do
18
- PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(1).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.47, 0.118,0.486,-0.47],[0.754,-0.47,-0.47,0.118]]
19
- PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(10).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.194, 0.057,0.258,-0.194],[0.425,-0.194,-0.194,0.057]]
18
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(1).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.47, 0.118, 0.486, -0.47],[0.754, -2.079, -0.47, 0.118]]
19
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(10).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.194, 0.057, 0.258, -0.194],[0.425, -0.531, -0.194, 0.057]]
20
20
  end
21
21
  it 'should use default pseudocount equal to log(count)' do
22
- PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm.should == PCM.new([[1, 2, 3, 1],[4, 1, 1, 2]]).to_pwm(Math.log(7))
22
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm.should == PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(Math.log(7))
23
+ end
24
+ it 'should preserve name' do
25
+ PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: nil).to_pwm.name.should be_nil
26
+ PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: 'Stub name').to_pwm.name.should == 'Stub name'
23
27
  end
24
28
  end
25
29
 
30
+ describe '#to_ppm' do
31
+ it 'should return PPM' do
32
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_ppm.should be_kind_of(PPM)
33
+ end
34
+ it 'should make transformation el --> el / count' do
35
+ PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_ppm.should == PPM.new([[1.0/7, 2.0/7, 3.0/7, 1.0/7],[4.0/7, 0.0/7, 1.0/7, 2.0/7]])
36
+ end
37
+ it 'should preserve name' do
38
+ PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: nil).to_ppm.name.should be_nil
39
+ PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: 'Stub name').to_ppm.name.should == 'Stub name'
40
+ end
41
+ end
42
+
43
+
26
44
  end
27
45
  end
@@ -3,18 +3,17 @@ require 'bioinform/data_models/pm'
3
3
 
4
4
  module Bioinform
5
5
  describe PM do
6
- describe '#valid?' do
6
+
7
+ describe '::valid_matrix?' do
7
8
  it 'should be true iff an argument is an array of arrays of 4 numerics in a column' do
8
-
9
- PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3,4],[1,4,5,6.5]]; self }.valid?.should be_true
10
- PM.new([[0,0,0,0]]).instance_eval{@matrix = {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]}; self }.valid?.should be_false
11
- PM.new([[0,0,0,0]]).instance_eval{@matrix = [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}]; self }.valid?.should be_false
12
- PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3,4],[1,4,6.5]]; self }.valid?.should be_false
13
- PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3],[1,4,6.5]]; self }.valid?.should be_false
14
- PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,'3','4'],[1,'4','5',6.5]]; self }.valid?.should be_false
15
-
9
+ PM.valid_matrix?( [[1,2,3,4],[1,4,5,6.5]] ).should be_true
10
+ PM.valid_matrix?( {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]} ).should be_false
11
+ PM.valid_matrix?( [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}] ).should be_false
12
+ PM.valid_matrix?( [[1,2,3,4],[1,4,6.5]] ).should be_false
13
+ PM.valid_matrix?( [[1,2,3],[1,4,6.5]] ).should be_false
14
+ PM.valid_matrix?( [[1,2,'3','4'],[1,'4','5',6.5]] ).should be_false
16
15
  end
17
- end
16
+ end
18
17
 
19
18
  describe '#to_s' do
20
19
  before :each do
@@ -249,6 +248,12 @@ module Bioinform
249
248
  @pm.best_suffix(2).should == (-1.0)
250
249
  @pm.best_suffix(3).should == (0.0)
251
250
  end
251
+ it 'should give right results after left(right)_augment, discrete, reverse_complement etc' do
252
+ pm = PM.new([[1, 2, 3, 4], [10,10.5,11,11.5]])
253
+ pm.best_suffix(1).should == 11.5
254
+ pm.left_augment!(1)
255
+ pm.best_suffix(1).should == 15.5
256
+ end
252
257
  end
253
258
  describe '#worst_suffix' do
254
259
  it 'should return minimal score of suffices from i-th position inclusively i.e. [i..end]' do
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/data_models/pcm'
3
+
4
+ module Bioinform
5
+ describe PPM do
6
+
7
+ end
8
+ end
@@ -3,6 +3,86 @@ require 'bioinform/parsers/parser'
3
3
 
4
4
  module Bioinform
5
5
  describe Parser do
6
+ context '#initialize' do
7
+ it 'should accept an array correctly' do
8
+ Parser.new([[1,2,3,4],[5,6,7,8]]).parse[:matrix].should == [[1,2,3,4],[5,6,7,8]]
9
+ end
10
+ it 'should treat several arguments as an array composed of them' do
11
+ Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
12
+ end
13
+ it 'should treat one Array of numbers as an Array(with 1 element) of Arrays' do
14
+ Parser.new([1,2,3,4]).parse.should == Parser.new([[1,2,3,4]]).parse
15
+ end
16
+ end
17
+
18
+ context '::parse!' do
19
+ it 'should behave like Parser.new(input).parse!' do
20
+ Parser.parse!([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse!
21
+ expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
22
+ end
23
+ end
24
+ context '::parse' do
25
+ it 'should behave like Parser.new(input).parse!' do
26
+ Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
27
+ Parser.parse([1,2,3],[4,5,6]).should be_nil
28
+ end
29
+ end
30
+
31
+ context '::normalize_hash_keys' do
32
+ it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
33
+ Parser.normalize_hash_keys( {a: 1, C: 2, 'g' => 3, 'T' => 4} ).should == {A: 1, C: 2, G: 3, T: 4}
34
+ end
35
+ end
36
+
37
+ context '::need_transpose?' do
38
+ it 'should point whether matrix have positions(need not be transposed -- false) or letters(true) as first index' do
39
+ Parser.need_tranpose?([[1,3,5,7], [2,4,6,8]]).should be_false
40
+ Parser.need_tranpose?([[1,2],[3,4],[5,6],[7,8]]).should be_true
41
+ end
42
+ end
43
+ context '::array_from_acgt_hash' do
44
+ it 'should convert hash of arrays to a transposed array of arrays' do
45
+ input = {A: [1,2,3], C: [2,3,4], G: [3,4,5], T: [4,5,6]}
46
+ Parser.array_from_acgt_hash(input).should == [[1,2,3], [2,3,4], [3,4,5], [4,5,6]].transpose
47
+ end
48
+ it 'should convert hash of numbers to an array of numbers' do
49
+ input = {A: 1, C: 2, G: 3, T: 4}
50
+ Parser.array_from_acgt_hash(input).should == [1,2,3,4]
51
+ end
52
+ it 'should process both symbolic and string keys, in both upcase and downcase' do
53
+ input_normal_keys = {A: 1, C: 2, G: 3, T: 4}
54
+ input_different_keys = {:A => 1, :c => 2, 'g' => 3, 'T' => 4}
55
+ Parser.array_from_acgt_hash(input_different_keys).should == Parser.array_from_acgt_hash(input_normal_keys)
56
+ end
57
+ end
58
+
59
+ context '::try_convert_to_array' do
60
+ it 'should not change array' do
61
+ inputs = []
62
+ inputs << [[1,2,3,4], [2,3,4,5], [3,4,5,6]]
63
+ inputs << [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}, {A:3, C:4, G:5, T:6}]
64
+ inputs.each do |input|
65
+ Parser.try_convert_to_array( input ).should == input
66
+ end
67
+ end
68
+ it 'should convert ACGT-Hashes to an array of positions (not letters)' do
69
+ Parser.try_convert_to_array( {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} ).should == [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
70
+ end
71
+ end
72
+
73
+ context '#parse' do
74
+ it 'should give the same result as #parse!' do
75
+ parser = Parser.new('stub parser')
76
+ parser.stub(:parse!).and_return('stub result')
77
+ parser.parse.should == 'stub result'
78
+ end
79
+ it 'should return nil if #parse! raised an exception' do
80
+ parser = Parser.new('stub parser')
81
+ parser.stub(:parse!).and_raise
82
+ parser.parse.should be_nil
83
+ end
84
+ end
85
+
6
86
  good_cases = {
7
87
  'Array Nx4' => {input: [[0,1,2,3],[10,11,12,13]],
8
88
  matrix: [[0,1,2,3],[10,11,12,13]] },
@@ -30,6 +110,10 @@ module Bioinform
30
110
  }
31
111
 
32
112
  bad_cases = {
113
+ 'Nil object on input' => {input: nil},
114
+
115
+ 'Empty array on input' => {input: []},
116
+
33
117
  'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
34
118
 
35
119
  'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
@@ -54,5 +138,10 @@ module Bioinform
54
138
  }
55
139
 
56
140
  parser_specs(Parser, good_cases, bad_cases)
141
+ context '#parser!' do
142
+ it "should raise an exception on parsing empty list to parser" do
143
+ expect{ Parser.new().parse! }.to raise_error
144
+ end
145
+ end
57
146
  end
58
147
  end
@@ -5,20 +5,22 @@ module Bioinform
5
5
  describe StringFantomParser do
6
6
  good_cases = {
7
7
  'string in Fantom-format' => {input: "
8
- NA motif_CTNCAG
9
- P0 A C G T
10
- P1 0 1878368 0 0
11
- P2 0 0 0 1878368
12
- P3 469592 469592 469592 469592
13
- P4 0 1878368 0 0
14
- P5 1878368 0 0 0
15
- P6 0 0 1878368 0",
16
- matrix: [[0.0, 1878368.0, 0.0, 0.0],
17
- [0.0, 0.0, 0.0, 1878368.0],
18
- [469592.0, 469592.0, 469592.0, 469592.0],
19
- [0.0, 1878368.0, 0.0, 0.0],
20
- [1878368.0, 0.0, 0.0, 0.0],
21
- [0.0, 0.0, 1878368.0, 0.0]] }
8
+ NA motif_CTNCAG
9
+ P0 A C G T
10
+ P1 0 1878368 0 0
11
+ P2 0 0 0 1878368
12
+ P3 469592 469592 469592 469592
13
+ P4 0 1878368 0 0
14
+ P5 1878368 0 0 0
15
+ P6 0 0 1878368 0",
16
+ matrix: [ [0.0, 1878368.0, 0.0, 0.0],
17
+ [0.0, 0.0, 0.0, 1878368.0],
18
+ [469592.0, 469592.0, 469592.0, 469592.0],
19
+ [0.0, 1878368.0, 0.0, 0.0],
20
+ [1878368.0, 0.0, 0.0, 0.0],
21
+ [0.0, 0.0, 1878368.0, 0.0]],
22
+ name: 'motif_CTNCAG'
23
+ }
22
24
  }
23
25
 
24
26
  bad_cases = { }
@@ -3,6 +3,52 @@ require 'bioinform/parsers/string_parser'
3
3
 
4
4
  module Bioinform
5
5
  describe StringParser do
6
+
7
+ describe '#each' do
8
+ it 'should yield consequent results of #parse! while it returns result' do
9
+ parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
10
+ expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
11
+ end
12
+ it 'should restart parser from the beginning each time' do
13
+ parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
14
+ 3.times do
15
+ expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
16
+ end
17
+ end
18
+ end
19
+
20
+ context '::split' do
21
+ it 'should be able to get a single PM' do
22
+ StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12").should == [ {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name:nil} ]
23
+ end
24
+
25
+ it 'should be able to split several PMs separated with an empty line' do
26
+ StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:nil} ]
27
+ end
28
+
29
+ it 'should be able to split several PMs separated with name' do
30
+ StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
31
+
32
+ StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8\n\n\n").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
33
+ end
34
+ end
35
+
36
+ context '::split_on_motifs' do
37
+ it 'should be able to split string into PMs' do
38
+ result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
39
+ result.map{|pm| pm.matrix}.should == [ [[1,2,3,4],[5,6,7,8],[9,10,11,12]], [[9,10,11,12],[1,2,3,4],[5,6,7,8]] ]
40
+ result.map{|pm| pm.name}.should == [nil, 'Name']
41
+ end
42
+ it 'should create PMs by default' do
43
+ result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
44
+ result.each{|pm| pm.class.should == PM}
45
+ end
46
+ it 'should create PM subclass when it\'s specified' do
47
+ result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8", PWM)
48
+ result.each{|pm| pm.class.should == PWM}
49
+ end
50
+ end
51
+
6
52
  good_cases = {
7
53
  'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
8
54
  matrix: [[1,2,3,4],[5,6,7,8]] },
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/parsers/parser'
3
+
4
+ module Bioinform
5
+ describe TrivialParser do
6
+ context '#initialize' do
7
+ it 'should take the only input argument' do
8
+ TrivialParser.instance_method(:initialize).arity.should == 1
9
+ end
10
+ end
11
+ context '#parser!' do
12
+ it 'should return input of that was passed to initialize' do
13
+ TrivialParser.new('stub input').parse!.should == 'stub input'
14
+ end
15
+ end
16
+ it 'can be used to create PM with {matrix: ..., name: ...} form' do
17
+ pm = PM.new({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
18
+ pm.matrix.should == [[1,2,3,4],[5,6,7,8]]
19
+ pm.name.should == 'Name'
20
+ end
21
+ end
22
+ end
data/spec/spec_helper.rb CHANGED
@@ -4,17 +4,23 @@ $LOAD_PATH.unshift File.dirname(__FILE__)
4
4
  require 'rspec'
5
5
 
6
6
  def parser_specs(parser_klass, good_cases, bad_cases)
7
- good_cases.each do |case_description, input_and_result|
8
- it "should be able to parse #{case_description}" do
9
- result = parser_klass.new(input_and_result[:input]).parse
10
- result[:matrix].should == input_and_result[:matrix]
11
- result[:name].should == input_and_result[:name] if input_and_result.has_key?(:name)
7
+ context '#parse!' do
8
+ good_cases.each do |case_description, input_and_result|
9
+ it "should be able to parse #{case_description}" do
10
+ result = parser_klass.new(input_and_result[:input]).parse
11
+ result[:matrix].should == input_and_result[:matrix]
12
+ if input_and_result.has_key?(:name)
13
+ result[:name].should == input_and_result[:name]
14
+ else
15
+ result[:name].should be_nil
16
+ end
17
+ end
12
18
  end
13
- end
14
-
15
- bad_cases.each do |case_description, input|
16
- it "should fail silently returning {} on parsing #{case_description}" do
17
- parser_klass.new(input[:input]).parse.should == {}
19
+
20
+ bad_cases.each do |case_description, input|
21
+ it "should raise an exception on parsing #{case_description}" do
22
+ expect{ parser_klass.new(input[:input]).parse! }.to raise_error
23
+ end
18
24
  end
19
25
  end
20
26
  end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+ require 'bioinform/support/advanced_scan'
3
+
4
+ describe StringScanner do
5
+ context '#advanced_scan' do
6
+ before do
7
+ @scanner = StringScanner.new('abcde fghIJKLmnop')
8
+ end
9
+ it 'should return nil if text doesn\'t match. Pointer should not move' do
10
+ @scanner.advanced_scan(/\s\s\s/).should be_nil
11
+ @scanner.pos.should == 0
12
+ end
13
+ it 'should return MatchData if string Matches. Pointer should move' do
14
+ @scanner.advanced_scan(/\w\w\w/).should be_kind_of MatchData
15
+ @scanner.pos.should == 3
16
+ end
17
+ it 'should return have the same groups as regexp has' do
18
+ result = @scanner.advanced_scan(/(\w+)(\s+)([a-z]+)([A-Z]+)/)
19
+ result[0].should == 'abcde fghIJKL'
20
+ result[1].should == 'abcde'
21
+ result[2].should == ' '
22
+ result[3].should == 'fgh'
23
+ result[4].should == 'IJKL'
24
+ end
25
+ it 'should return have the same named groups as regexp has' do
26
+ result = @scanner.advanced_scan(/(\w+)(\s+)(?<word_downcase>[a-z]+)(?<word_upcase>[A-Z]+)/)
27
+ result[0].should == 'abcde fghIJKL'
28
+ result[:word_downcase].should == 'fgh'
29
+ result[:word_upcase].should == 'IJKL'
30
+ end
31
+ end
32
+ end
@@ -15,5 +15,11 @@ describe String do
15
15
  it 'should preserve rows pagination' do
16
16
  "abc def ghi\njk lmn".multiline_squish.should == "abc def ghi\njk lmn"
17
17
  end
18
+ it 'should preserve empty lines in the middle of text' do
19
+ "abc def\n\nghi\n \t \njk lmn \n\n\n zzz".multiline_squish.should == "abc def\n\nghi\n\njk lmn\n\n\nzzz"
20
+ end
21
+ it 'should drop empty lines at begin and at end of string' do
22
+ "\n \t\n\nabc def\n\nghi\n \t \njk lmn \n\n\n zzz\n\n \t \n".multiline_squish.should == "abc def\n\nghi\n\njk lmn\n\n\nzzz"
23
+ end
18
24
  end
19
25
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bioinform
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-31 00:00:00.000000000 Z
12
+ date: 2012-09-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -67,7 +67,9 @@ files:
67
67
  - lib/bioinform/parsers/parser.rb
68
68
  - lib/bioinform/parsers/string_fantom_parser.rb
69
69
  - lib/bioinform/parsers/string_parser.rb
70
+ - lib/bioinform/parsers/trivial_parser.rb
70
71
  - lib/bioinform/support.rb
72
+ - lib/bioinform/support/advanced_scan.rb
71
73
  - lib/bioinform/support/array_product.rb
72
74
  - lib/bioinform/support/array_zip.rb
73
75
  - lib/bioinform/support/callable_symbol.rb
@@ -83,11 +85,14 @@ files:
83
85
  - lib/bioinform/version.rb
84
86
  - spec/data_models/pcm_spec.rb
85
87
  - spec/data_models/pm_spec.rb
88
+ - spec/data_models/ppm_spec.rb
86
89
  - spec/data_models/pwm_spec.rb
87
90
  - spec/parsers/parser_spec.rb
88
91
  - spec/parsers/string_fantom_parser_spec.rb
89
92
  - spec/parsers/string_parser_spec.rb
93
+ - spec/parsers/trivial_parser_spec.rb
90
94
  - spec/spec_helper.rb
95
+ - spec/support/advanced_scan_spec.rb
91
96
  - spec/support/array_product_spec.rb
92
97
  - spec/support/array_zip_spec.rb
93
98
  - spec/support/callable_symbol_spec.rb
@@ -128,11 +133,14 @@ summary: Classes for work with different input formats of positional matrices an
128
133
  test_files:
129
134
  - spec/data_models/pcm_spec.rb
130
135
  - spec/data_models/pm_spec.rb
136
+ - spec/data_models/ppm_spec.rb
131
137
  - spec/data_models/pwm_spec.rb
132
138
  - spec/parsers/parser_spec.rb
133
139
  - spec/parsers/string_fantom_parser_spec.rb
134
140
  - spec/parsers/string_parser_spec.rb
141
+ - spec/parsers/trivial_parser_spec.rb
135
142
  - spec/spec_helper.rb
143
+ - spec/support/advanced_scan_spec.rb
136
144
  - spec/support/array_product_spec.rb
137
145
  - spec/support/array_zip_spec.rb
138
146
  - spec/support/callable_symbol_spec.rb