bioinform 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bioinform/data_models/pcm.rb +8 -1
- data/lib/bioinform/data_models/pm.rb +20 -15
- data/lib/bioinform/data_models/ppm.rb +3 -1
- data/lib/bioinform/data_models/pwm.rb +14 -2
- data/lib/bioinform/parsers/parser.rb +67 -28
- data/lib/bioinform/parsers/string_fantom_parser.rb +3 -9
- data/lib/bioinform/parsers/string_parser.rb +64 -24
- data/lib/bioinform/parsers/trivial_parser.rb +17 -0
- data/lib/bioinform/parsers.rb +1 -0
- data/lib/bioinform/support/advanced_scan.rb +8 -0
- data/lib/bioinform/support/multiline_squish.rb +1 -1
- data/lib/bioinform/support.rb +3 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/data_models/pcm_spec.rb +24 -6
- data/spec/data_models/pm_spec.rb +15 -10
- data/spec/data_models/ppm_spec.rb +8 -0
- data/spec/parsers/parser_spec.rb +89 -0
- data/spec/parsers/string_fantom_parser_spec.rb +16 -14
- data/spec/parsers/string_parser_spec.rb +46 -0
- data/spec/parsers/trivial_parser_spec.rb +22 -0
- data/spec/spec_helper.rb +16 -10
- data/spec/support/advanced_scan_spec.rb +32 -0
- data/spec/support/multiline_squish_spec.rb +6 -0
- metadata +10 -2
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'bioinform/support'
|
2
2
|
require 'bioinform/data_models/pm'
|
3
|
+
require 'bioinform/data_models/ppm'
|
4
|
+
require 'bioinform/data_models/pwm'
|
3
5
|
module Bioinform
|
4
6
|
class PCM < PM
|
5
7
|
def count
|
@@ -12,7 +14,12 @@ module Bioinform
|
|
12
14
|
Math.log((pos[ind] + probability[ind] * pseudocount) / (probability[ind]*(count + pseudocount)) )
|
13
15
|
end
|
14
16
|
end
|
15
|
-
PWM.new(mat)
|
17
|
+
PWM.new(matrix: mat, name: name)
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_ppm
|
21
|
+
mat = each_position.map{|pos| pos.map{|el| el.to_f / count }}
|
22
|
+
PPM.new(matrix: mat, name: name)
|
16
23
|
end
|
17
24
|
|
18
25
|
end
|
@@ -2,23 +2,23 @@ require 'bioinform/support'
|
|
2
2
|
require 'bioinform/parsers'
|
3
3
|
|
4
4
|
module Bioinform
|
5
|
-
IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3}
|
6
|
-
LetterByIndex = {0 =>
|
7
|
-
|
5
|
+
IndexByLetter = {'A' => 0, 'C' => 1, 'G' => 2, 'T' => 3, A: 0, C: 1, G: 2, T: 3}
|
6
|
+
LetterByIndex = {0 => :A, 1 => :C, 2 => :G, 3 => :T}
|
7
|
+
|
8
8
|
class PM
|
9
9
|
attr_reader :matrix
|
10
10
|
attr_accessor :background, :name
|
11
11
|
|
12
|
-
def choose_parser(input)
|
13
|
-
|
14
|
-
|
15
|
-
self.class.new(input, parser) rescue nil
|
12
|
+
def self.choose_parser(input)
|
13
|
+
[TrivialParser, Parser, StringParser, StringFantomParser].find do |parser|
|
14
|
+
self.new(input, parser) rescue nil
|
16
15
|
end
|
17
16
|
end
|
18
17
|
|
19
18
|
def initialize(input, parser = nil)
|
20
|
-
parser ||= choose_parser(input)
|
21
|
-
|
19
|
+
parser ||= self.class.choose_parser(input)
|
20
|
+
raise 'No one parser can process input' unless parser
|
21
|
+
result = parser.new(input).parse
|
22
22
|
@matrix = result[:matrix]
|
23
23
|
@name = result[:name]
|
24
24
|
@background = [1, 1, 1, 1]
|
@@ -29,15 +29,20 @@ module Bioinform
|
|
29
29
|
@matrix == other.matrix && @background == other.background
|
30
30
|
end
|
31
31
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
def self.valid_matrix?(matrix)
|
33
|
+
matrix.is_a?(Array) &&
|
34
|
+
! matrix.empty? &&
|
35
|
+
matrix.all?(&:is_a?.(Array)) &&
|
36
|
+
matrix.all?{|pos| pos.size == 4} &&
|
37
|
+
matrix.all?(&:all?.(&:is_a?.(Numeric)))
|
37
38
|
rescue
|
38
39
|
false
|
39
40
|
end
|
40
41
|
|
42
|
+
def valid?
|
43
|
+
self.class.valid_matrix?(@matrix)
|
44
|
+
end
|
45
|
+
|
41
46
|
def each_position
|
42
47
|
if block_given?
|
43
48
|
matrix.each{|pos| yield pos}
|
@@ -54,7 +59,7 @@ module Bioinform
|
|
54
59
|
def to_s(with_name = true)
|
55
60
|
matrix_str = each_position.map(&:join.("\t")).join("\n")
|
56
61
|
if with_name && @name
|
57
|
-
|
62
|
+
@name + "\n" + matrix_str
|
58
63
|
else
|
59
64
|
matrix_str
|
60
65
|
end
|
@@ -21,8 +21,20 @@ module Bioinform
|
|
21
21
|
def score(word)
|
22
22
|
word = word.upcase
|
23
23
|
raise ArgumentError, 'word in PWM#score(word) should have the same length as matrix' unless word.length == length
|
24
|
-
raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
|
25
|
-
|
24
|
+
#raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters' unless word.each_char.all?{|letter| %w{A C G T}.include? letter}
|
25
|
+
(0...length).map do |pos|
|
26
|
+
begin
|
27
|
+
# Need support of N-letters and other IUPAC
|
28
|
+
letter = word[pos]
|
29
|
+
matrix[pos][IndexByLetter[letter]]
|
30
|
+
rescue
|
31
|
+
raise ArgumentError, 'word in PWM#score(word) should have only ACGT-letters'
|
32
|
+
end
|
33
|
+
end.inject(&:+)
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_pwm
|
37
|
+
self
|
26
38
|
end
|
27
39
|
end
|
28
40
|
end
|
@@ -1,40 +1,79 @@
|
|
1
1
|
require 'bioinform/support'
|
2
|
+
require 'bioinform/data_models/pm'
|
2
3
|
|
3
4
|
module Bioinform
|
4
5
|
class Parser
|
5
|
-
attr_reader :input
|
6
|
+
attr_reader :input
|
6
7
|
|
7
|
-
def initialize(input)
|
8
|
-
|
8
|
+
def initialize(*input)
|
9
|
+
if input.size == 1 # [ [1,2,3,4] ], [ [[1,2,3,4],[5,6,7,8]] ]
|
10
|
+
if input.first.is_a?(Array) && input.first.all?{|el| el.is_a? Numeric} # [ [1,2,3,4] ]
|
11
|
+
@input = input
|
12
|
+
else # [ [[1,2,3,4],[5,6,7,8]] ]
|
13
|
+
@input = input.first
|
14
|
+
end
|
15
|
+
else #[ [1,2,3,4], [5,6,7,8] ], [ ]
|
16
|
+
@input = input
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse!
|
21
|
+
matrix = self.class.transform_input(input)
|
22
|
+
raise 'Parsing error' unless self.class.valid_matrix?(matrix)
|
23
|
+
{matrix: matrix}
|
9
24
|
end
|
10
25
|
|
11
26
|
def parse
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
27
|
+
parse! rescue nil
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.parse!(*input)
|
31
|
+
self.new(*input).parse!
|
32
|
+
end
|
33
|
+
def self.parse(*input)
|
34
|
+
self.new(*input).parse
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.valid_matrix?(matrix)
|
38
|
+
PM.valid_matrix?(matrix)
|
39
|
+
end
|
40
|
+
|
41
|
+
# {A: 1, C: 2, G: 3, T: 4} --> [1,2,3,4]
|
42
|
+
# {A: [1,2], C: [3,4], G: [5,6], T: [7,8]} --> [[1,3,5,7],[2,4,6,8]] ( == [[1,2], [3,4], [5,6], [7,8]].transpose)
|
43
|
+
def self.array_from_acgt_hash(hsh)
|
44
|
+
hsh = normalize_hash_keys(hsh)
|
45
|
+
raise 'some of hash keys A,C,G,T are missing or hash has excess keys' unless hsh.keys.sort == [:A,:C,:G,:T]
|
46
|
+
result = [:A,:C,:G,:T].collect{|letter| hsh[letter] }
|
47
|
+
result.all?{|el| el.is_a?(Array)} ? result.transpose : result
|
48
|
+
end
|
49
|
+
|
50
|
+
# {a: 1, C: 2, 'g' => 3, 'T' => 4} --> {A: 1, C: 2, G: 3, T: 4}
|
51
|
+
def self.normalize_hash_keys(hsh)
|
52
|
+
hsh.collect_hash{|key,value| [key.to_s.upcase.to_sym, value] }
|
53
|
+
end
|
54
|
+
|
55
|
+
# [[1,2,3,4], [2,3,4,5]] --> [[1,2,3,4], [2,3,4,5]]
|
56
|
+
# [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}] --> [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}]
|
57
|
+
# {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} --> [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
|
58
|
+
def self.try_convert_to_array(input)
|
59
|
+
case input
|
60
|
+
when Array then input
|
61
|
+
when Hash then array_from_acgt_hash(input)
|
62
|
+
else raise TypeError, 'input of Bioinform::Parser::array_from_acgt_hash should be Array or Hash'
|
37
63
|
end
|
38
64
|
end
|
65
|
+
|
66
|
+
def self.transform_input(input)
|
67
|
+
result = try_convert_to_array(input).map{|el| try_convert_to_array(el)}
|
68
|
+
need_tranpose?(result) ? result.transpose : result
|
69
|
+
end
|
70
|
+
|
71
|
+
# point whether matrix input positions(need not be transposed -- false) or letters(need -- true) as first index
|
72
|
+
# [[1,3,5,7], [2,4,6,8]] --> false
|
73
|
+
# [[1,2],[3,4],[5,6],[7,8]] --> true
|
74
|
+
def self.need_tranpose?(input)
|
75
|
+
(input.size == 4) && input.any?{|x| x.size != 4}
|
76
|
+
end
|
77
|
+
|
39
78
|
end
|
40
79
|
end
|
@@ -3,18 +3,12 @@ require 'bioinform/parsers/string_parser'
|
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
class StringFantomParser < StringParser
|
6
|
-
def row_pat
|
7
|
-
'[\w\d]+ ' + "(#{number_pat} )*#{number_pat}"
|
8
|
-
end
|
9
|
-
def name_pat
|
10
|
-
'NA (?<name>[\w.+:-]+)'
|
11
|
-
end
|
12
6
|
def header_pat
|
13
|
-
|
7
|
+
/NA (?<name>[\w.+:-]+)\n[\w\d]+ A C G T\n/
|
14
8
|
end
|
15
9
|
|
16
|
-
def
|
17
|
-
|
10
|
+
def row_pat
|
11
|
+
/[\w\d]+ (?<row>(#{number_pat} )*#{number_pat})\n?/
|
18
12
|
end
|
19
13
|
end
|
20
14
|
end
|
@@ -1,45 +1,85 @@
|
|
1
|
+
require 'strscan'
|
1
2
|
require 'bioinform/support'
|
2
3
|
require 'bioinform/parsers/parser'
|
3
4
|
|
4
5
|
module Bioinform
|
5
6
|
class StringParser < Parser
|
7
|
+
attr_reader :scanner
|
8
|
+
def initialize(input)
|
9
|
+
raise ArgumentError unless input.is_a?(String)
|
10
|
+
super
|
11
|
+
@scanner = StringScanner.new(input.multiline_squish)
|
12
|
+
end
|
13
|
+
|
6
14
|
def number_pat
|
7
|
-
|
15
|
+
/[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?/
|
8
16
|
end
|
17
|
+
|
18
|
+
def header_pat
|
19
|
+
/>?\s*(?<name>\S+)\n/
|
20
|
+
end
|
21
|
+
|
9
22
|
def row_pat
|
10
|
-
|
23
|
+
/(?<row>(#{number_pat} )*#{number_pat})\n?/
|
11
24
|
end
|
12
|
-
|
13
|
-
|
25
|
+
|
26
|
+
def scan_row
|
27
|
+
match = scanner.advanced_scan(row_pat)
|
28
|
+
match && match[:row]
|
14
29
|
end
|
15
|
-
|
16
|
-
|
30
|
+
|
31
|
+
def split_row(row_string)
|
32
|
+
row_string.split.map(&:to_f)
|
17
33
|
end
|
18
|
-
|
19
|
-
|
34
|
+
|
35
|
+
def scan_any_spaces
|
36
|
+
scanner.scan(/\s+/)
|
20
37
|
end
|
21
|
-
|
22
|
-
|
38
|
+
|
39
|
+
def parse_name
|
40
|
+
match = scanner.advanced_scan(header_pat)
|
41
|
+
match && match[:name]
|
23
42
|
end
|
24
43
|
|
25
|
-
|
26
|
-
|
27
|
-
|
44
|
+
def parse_matrix
|
45
|
+
matrix = []
|
46
|
+
while row_string = scan_row
|
47
|
+
matrix << split_row(row_string)
|
48
|
+
end
|
49
|
+
matrix
|
28
50
|
end
|
29
51
|
|
30
|
-
def parse
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
52
|
+
def parse!
|
53
|
+
scan_any_spaces
|
54
|
+
name = parse_name
|
55
|
+
matrix = parse_matrix
|
56
|
+
Parser.parse!(matrix).merge(name: name)
|
57
|
+
end
|
58
|
+
|
59
|
+
def scanner_reset
|
60
|
+
scanner.reset
|
61
|
+
end
|
62
|
+
|
63
|
+
def each
|
64
|
+
if block_given?
|
65
|
+
scanner_reset
|
66
|
+
while result = parse
|
67
|
+
yield result
|
68
|
+
end
|
38
69
|
else
|
39
|
-
|
70
|
+
Enumerator.new(self, :each)
|
40
71
|
end
|
41
|
-
rescue
|
42
|
-
{}
|
43
72
|
end
|
73
|
+
include Enumerable
|
74
|
+
|
75
|
+
alias_method :split, :to_a
|
76
|
+
def self.split(input)
|
77
|
+
self.new(input).split
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.split_on_motifs(input, pm_klass = PM)
|
81
|
+
split(input).map{|el| pm_klass.new(el)}
|
82
|
+
end
|
83
|
+
|
44
84
|
end
|
45
85
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'bioinform/support'
|
2
|
+
require 'bioinform/parsers/parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
# TrivialParser can be used to parse hashes returned by #parse method of other parsers:
|
6
|
+
# PM.new({matrix:[[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
|
7
|
+
# PM.new(StringParser.new("1 2 3 4\n5 6 7 8").parse)
|
8
|
+
# StringParser.new("First\n1 2 3 4\n5 6 7 8\nSecond\n0 0 0 0").map{|inp| PM.new(inp, TrivialParser)}
|
9
|
+
class TrivialParser < Parser
|
10
|
+
def initialize(input)
|
11
|
+
@input = input
|
12
|
+
end
|
13
|
+
def parse!
|
14
|
+
input
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/bioinform/parsers.rb
CHANGED
data/lib/bioinform/support.rb
CHANGED
data/lib/bioinform/version.rb
CHANGED
@@ -5,23 +5,41 @@ module Bioinform
|
|
5
5
|
describe PCM do
|
6
6
|
describe '#count' do
|
7
7
|
it 'should be equal to sum of elements at position' do
|
8
|
-
PCM.new([[1, 2, 3, 1],[4,
|
9
|
-
PCM.new([[1, 2.3, 3.2, 1],[4.4,
|
8
|
+
PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).count.should == 7
|
9
|
+
PCM.new([[1, 2.3, 3.2, 1],[4.4, 0.1, 1, 2]]).count.should == 7.5
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
13
|
describe '#to_pwm' do
|
14
14
|
it 'should return PWM' do
|
15
|
-
PCM.new([[1, 2, 3, 1],[4,
|
15
|
+
PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm.should be_kind_of(PWM)
|
16
16
|
end
|
17
17
|
it 'should make transformation: el --> log( (el + p_i*pseudocount) / (p_i*(count + pseudocount)) )' do
|
18
|
-
PCM.new([[1, 2, 3, 1],[4,
|
19
|
-
PCM.new([[1, 2, 3, 1],[4,
|
18
|
+
PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(1).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.47, 0.118, 0.486, -0.47],[0.754, -2.079, -0.47, 0.118]]
|
19
|
+
PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(10).matrix.map{|line|line.map{|el| el.round(3)}}.should == [[-0.194, 0.057, 0.258, -0.194],[0.425, -0.531, -0.194, 0.057]]
|
20
20
|
end
|
21
21
|
it 'should use default pseudocount equal to log(count)' do
|
22
|
-
PCM.new([[1, 2, 3, 1],[4,
|
22
|
+
PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm.should == PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_pwm(Math.log(7))
|
23
|
+
end
|
24
|
+
it 'should preserve name' do
|
25
|
+
PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: nil).to_pwm.name.should be_nil
|
26
|
+
PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: 'Stub name').to_pwm.name.should == 'Stub name'
|
23
27
|
end
|
24
28
|
end
|
25
29
|
|
30
|
+
describe '#to_ppm' do
|
31
|
+
it 'should return PPM' do
|
32
|
+
PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_ppm.should be_kind_of(PPM)
|
33
|
+
end
|
34
|
+
it 'should make transformation el --> el / count' do
|
35
|
+
PCM.new([[1, 2, 3, 1],[4, 0, 1, 2]]).to_ppm.should == PPM.new([[1.0/7, 2.0/7, 3.0/7, 1.0/7],[4.0/7, 0.0/7, 1.0/7, 2.0/7]])
|
36
|
+
end
|
37
|
+
it 'should preserve name' do
|
38
|
+
PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: nil).to_ppm.name.should be_nil
|
39
|
+
PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: 'Stub name').to_ppm.name.should == 'Stub name'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
26
44
|
end
|
27
45
|
end
|
data/spec/data_models/pm_spec.rb
CHANGED
@@ -3,18 +3,17 @@ require 'bioinform/data_models/pm'
|
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe PM do
|
6
|
-
|
6
|
+
|
7
|
+
describe '::valid_matrix?' do
|
7
8
|
it 'should be true iff an argument is an array of arrays of 4 numerics in a column' do
|
8
|
-
|
9
|
-
PM.
|
10
|
-
PM.
|
11
|
-
PM.
|
12
|
-
PM.
|
13
|
-
PM.
|
14
|
-
PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,'3','4'],[1,'4','5',6.5]]; self }.valid?.should be_false
|
15
|
-
|
9
|
+
PM.valid_matrix?( [[1,2,3,4],[1,4,5,6.5]] ).should be_true
|
10
|
+
PM.valid_matrix?( {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]} ).should be_false
|
11
|
+
PM.valid_matrix?( [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}] ).should be_false
|
12
|
+
PM.valid_matrix?( [[1,2,3,4],[1,4,6.5]] ).should be_false
|
13
|
+
PM.valid_matrix?( [[1,2,3],[1,4,6.5]] ).should be_false
|
14
|
+
PM.valid_matrix?( [[1,2,'3','4'],[1,'4','5',6.5]] ).should be_false
|
16
15
|
end
|
17
|
-
end
|
16
|
+
end
|
18
17
|
|
19
18
|
describe '#to_s' do
|
20
19
|
before :each do
|
@@ -249,6 +248,12 @@ module Bioinform
|
|
249
248
|
@pm.best_suffix(2).should == (-1.0)
|
250
249
|
@pm.best_suffix(3).should == (0.0)
|
251
250
|
end
|
251
|
+
it 'should give right results after left(right)_augment, discrete, reverse_complement etc' do
|
252
|
+
pm = PM.new([[1, 2, 3, 4], [10,10.5,11,11.5]])
|
253
|
+
pm.best_suffix(1).should == 11.5
|
254
|
+
pm.left_augment!(1)
|
255
|
+
pm.best_suffix(1).should == 15.5
|
256
|
+
end
|
252
257
|
end
|
253
258
|
describe '#worst_suffix' do
|
254
259
|
it 'should return minimal score of suffices from i-th position inclusively i.e. [i..end]' do
|
data/spec/parsers/parser_spec.rb
CHANGED
@@ -3,6 +3,86 @@ require 'bioinform/parsers/parser'
|
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe Parser do
|
6
|
+
context '#initialize' do
|
7
|
+
it 'should accept an array correctly' do
|
8
|
+
Parser.new([[1,2,3,4],[5,6,7,8]]).parse[:matrix].should == [[1,2,3,4],[5,6,7,8]]
|
9
|
+
end
|
10
|
+
it 'should treat several arguments as an array composed of them' do
|
11
|
+
Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
|
12
|
+
end
|
13
|
+
it 'should treat one Array of numbers as an Array(with 1 element) of Arrays' do
|
14
|
+
Parser.new([1,2,3,4]).parse.should == Parser.new([[1,2,3,4]]).parse
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context '::parse!' do
|
19
|
+
it 'should behave like Parser.new(input).parse!' do
|
20
|
+
Parser.parse!([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse!
|
21
|
+
expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
|
22
|
+
end
|
23
|
+
end
|
24
|
+
context '::parse' do
|
25
|
+
it 'should behave like Parser.new(input).parse!' do
|
26
|
+
Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
|
27
|
+
Parser.parse([1,2,3],[4,5,6]).should be_nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context '::normalize_hash_keys' do
|
32
|
+
it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
|
33
|
+
Parser.normalize_hash_keys( {a: 1, C: 2, 'g' => 3, 'T' => 4} ).should == {A: 1, C: 2, G: 3, T: 4}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context '::need_transpose?' do
|
38
|
+
it 'should point whether matrix have positions(need not be transposed -- false) or letters(true) as first index' do
|
39
|
+
Parser.need_tranpose?([[1,3,5,7], [2,4,6,8]]).should be_false
|
40
|
+
Parser.need_tranpose?([[1,2],[3,4],[5,6],[7,8]]).should be_true
|
41
|
+
end
|
42
|
+
end
|
43
|
+
context '::array_from_acgt_hash' do
|
44
|
+
it 'should convert hash of arrays to a transposed array of arrays' do
|
45
|
+
input = {A: [1,2,3], C: [2,3,4], G: [3,4,5], T: [4,5,6]}
|
46
|
+
Parser.array_from_acgt_hash(input).should == [[1,2,3], [2,3,4], [3,4,5], [4,5,6]].transpose
|
47
|
+
end
|
48
|
+
it 'should convert hash of numbers to an array of numbers' do
|
49
|
+
input = {A: 1, C: 2, G: 3, T: 4}
|
50
|
+
Parser.array_from_acgt_hash(input).should == [1,2,3,4]
|
51
|
+
end
|
52
|
+
it 'should process both symbolic and string keys, in both upcase and downcase' do
|
53
|
+
input_normal_keys = {A: 1, C: 2, G: 3, T: 4}
|
54
|
+
input_different_keys = {:A => 1, :c => 2, 'g' => 3, 'T' => 4}
|
55
|
+
Parser.array_from_acgt_hash(input_different_keys).should == Parser.array_from_acgt_hash(input_normal_keys)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
context '::try_convert_to_array' do
|
60
|
+
it 'should not change array' do
|
61
|
+
inputs = []
|
62
|
+
inputs << [[1,2,3,4], [2,3,4,5], [3,4,5,6]]
|
63
|
+
inputs << [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}, {A:3, C:4, G:5, T:6}]
|
64
|
+
inputs.each do |input|
|
65
|
+
Parser.try_convert_to_array( input ).should == input
|
66
|
+
end
|
67
|
+
end
|
68
|
+
it 'should convert ACGT-Hashes to an array of positions (not letters)' do
|
69
|
+
Parser.try_convert_to_array( {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} ).should == [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
context '#parse' do
|
74
|
+
it 'should give the same result as #parse!' do
|
75
|
+
parser = Parser.new('stub parser')
|
76
|
+
parser.stub(:parse!).and_return('stub result')
|
77
|
+
parser.parse.should == 'stub result'
|
78
|
+
end
|
79
|
+
it 'should return nil if #parse! raised an exception' do
|
80
|
+
parser = Parser.new('stub parser')
|
81
|
+
parser.stub(:parse!).and_raise
|
82
|
+
parser.parse.should be_nil
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
6
86
|
good_cases = {
|
7
87
|
'Array Nx4' => {input: [[0,1,2,3],[10,11,12,13]],
|
8
88
|
matrix: [[0,1,2,3],[10,11,12,13]] },
|
@@ -30,6 +110,10 @@ module Bioinform
|
|
30
110
|
}
|
31
111
|
|
32
112
|
bad_cases = {
|
113
|
+
'Nil object on input' => {input: nil},
|
114
|
+
|
115
|
+
'Empty array on input' => {input: []},
|
116
|
+
|
33
117
|
'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
|
34
118
|
|
35
119
|
'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
|
@@ -54,5 +138,10 @@ module Bioinform
|
|
54
138
|
}
|
55
139
|
|
56
140
|
parser_specs(Parser, good_cases, bad_cases)
|
141
|
+
context '#parser!' do
|
142
|
+
it "should raise an exception on parsing empty list to parser" do
|
143
|
+
expect{ Parser.new().parse! }.to raise_error
|
144
|
+
end
|
145
|
+
end
|
57
146
|
end
|
58
147
|
end
|
@@ -5,20 +5,22 @@ module Bioinform
|
|
5
5
|
describe StringFantomParser do
|
6
6
|
good_cases = {
|
7
7
|
'string in Fantom-format' => {input: "
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
8
|
+
NA motif_CTNCAG
|
9
|
+
P0 A C G T
|
10
|
+
P1 0 1878368 0 0
|
11
|
+
P2 0 0 0 1878368
|
12
|
+
P3 469592 469592 469592 469592
|
13
|
+
P4 0 1878368 0 0
|
14
|
+
P5 1878368 0 0 0
|
15
|
+
P6 0 0 1878368 0",
|
16
|
+
matrix: [ [0.0, 1878368.0, 0.0, 0.0],
|
17
|
+
[0.0, 0.0, 0.0, 1878368.0],
|
18
|
+
[469592.0, 469592.0, 469592.0, 469592.0],
|
19
|
+
[0.0, 1878368.0, 0.0, 0.0],
|
20
|
+
[1878368.0, 0.0, 0.0, 0.0],
|
21
|
+
[0.0, 0.0, 1878368.0, 0.0]],
|
22
|
+
name: 'motif_CTNCAG'
|
23
|
+
}
|
22
24
|
}
|
23
25
|
|
24
26
|
bad_cases = { }
|
@@ -3,6 +3,52 @@ require 'bioinform/parsers/string_parser'
|
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe StringParser do
|
6
|
+
|
7
|
+
describe '#each' do
|
8
|
+
it 'should yield consequent results of #parse! while it returns result' do
|
9
|
+
parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
|
10
|
+
expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
|
11
|
+
end
|
12
|
+
it 'should restart parser from the beginning each time' do
|
13
|
+
parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
|
14
|
+
3.times do
|
15
|
+
expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
context '::split' do
|
21
|
+
it 'should be able to get a single PM' do
|
22
|
+
StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12").should == [ {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name:nil} ]
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'should be able to split several PMs separated with an empty line' do
|
26
|
+
StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:nil} ]
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should be able to split several PMs separated with name' do
|
30
|
+
StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
|
31
|
+
|
32
|
+
StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8\n\n\n").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context '::split_on_motifs' do
|
37
|
+
it 'should be able to split string into PMs' do
|
38
|
+
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
|
39
|
+
result.map{|pm| pm.matrix}.should == [ [[1,2,3,4],[5,6,7,8],[9,10,11,12]], [[9,10,11,12],[1,2,3,4],[5,6,7,8]] ]
|
40
|
+
result.map{|pm| pm.name}.should == [nil, 'Name']
|
41
|
+
end
|
42
|
+
it 'should create PMs by default' do
|
43
|
+
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
|
44
|
+
result.each{|pm| pm.class.should == PM}
|
45
|
+
end
|
46
|
+
it 'should create PM subclass when it\'s specified' do
|
47
|
+
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8", PWM)
|
48
|
+
result.each{|pm| pm.class.should == PWM}
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
6
52
|
good_cases = {
|
7
53
|
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
|
8
54
|
matrix: [[1,2,3,4],[5,6,7,8]] },
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/parsers/parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
describe TrivialParser do
|
6
|
+
context '#initialize' do
|
7
|
+
it 'should take the only input argument' do
|
8
|
+
TrivialParser.instance_method(:initialize).arity.should == 1
|
9
|
+
end
|
10
|
+
end
|
11
|
+
context '#parser!' do
|
12
|
+
it 'should return input of that was passed to initialize' do
|
13
|
+
TrivialParser.new('stub input').parse!.should == 'stub input'
|
14
|
+
end
|
15
|
+
end
|
16
|
+
it 'can be used to create PM with {matrix: ..., name: ...} form' do
|
17
|
+
pm = PM.new({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
|
18
|
+
pm.matrix.should == [[1,2,3,4],[5,6,7,8]]
|
19
|
+
pm.name.should == 'Name'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -4,17 +4,23 @@ $LOAD_PATH.unshift File.dirname(__FILE__)
|
|
4
4
|
require 'rspec'
|
5
5
|
|
6
6
|
def parser_specs(parser_klass, good_cases, bad_cases)
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
context '#parse!' do
|
8
|
+
good_cases.each do |case_description, input_and_result|
|
9
|
+
it "should be able to parse #{case_description}" do
|
10
|
+
result = parser_klass.new(input_and_result[:input]).parse
|
11
|
+
result[:matrix].should == input_and_result[:matrix]
|
12
|
+
if input_and_result.has_key?(:name)
|
13
|
+
result[:name].should == input_and_result[:name]
|
14
|
+
else
|
15
|
+
result[:name].should be_nil
|
16
|
+
end
|
17
|
+
end
|
12
18
|
end
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
19
|
+
|
20
|
+
bad_cases.each do |case_description, input|
|
21
|
+
it "should raise an exception on parsing #{case_description}" do
|
22
|
+
expect{ parser_klass.new(input[:input]).parse! }.to raise_error
|
23
|
+
end
|
18
24
|
end
|
19
25
|
end
|
20
26
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/support/advanced_scan'
|
3
|
+
|
4
|
+
describe StringScanner do
|
5
|
+
context '#advanced_scan' do
|
6
|
+
before do
|
7
|
+
@scanner = StringScanner.new('abcde fghIJKLmnop')
|
8
|
+
end
|
9
|
+
it 'should return nil if text doesn\'t match. Pointer should not move' do
|
10
|
+
@scanner.advanced_scan(/\s\s\s/).should be_nil
|
11
|
+
@scanner.pos.should == 0
|
12
|
+
end
|
13
|
+
it 'should return MatchData if string Matches. Pointer should move' do
|
14
|
+
@scanner.advanced_scan(/\w\w\w/).should be_kind_of MatchData
|
15
|
+
@scanner.pos.should == 3
|
16
|
+
end
|
17
|
+
it 'should return have the same groups as regexp has' do
|
18
|
+
result = @scanner.advanced_scan(/(\w+)(\s+)([a-z]+)([A-Z]+)/)
|
19
|
+
result[0].should == 'abcde fghIJKL'
|
20
|
+
result[1].should == 'abcde'
|
21
|
+
result[2].should == ' '
|
22
|
+
result[3].should == 'fgh'
|
23
|
+
result[4].should == 'IJKL'
|
24
|
+
end
|
25
|
+
it 'should return have the same named groups as regexp has' do
|
26
|
+
result = @scanner.advanced_scan(/(\w+)(\s+)(?<word_downcase>[a-z]+)(?<word_upcase>[A-Z]+)/)
|
27
|
+
result[0].should == 'abcde fghIJKL'
|
28
|
+
result[:word_downcase].should == 'fgh'
|
29
|
+
result[:word_upcase].should == 'IJKL'
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -15,5 +15,11 @@ describe String do
|
|
15
15
|
it 'should preserve rows pagination' do
|
16
16
|
"abc def ghi\njk lmn".multiline_squish.should == "abc def ghi\njk lmn"
|
17
17
|
end
|
18
|
+
it 'should preserve empty lines in the middle of text' do
|
19
|
+
"abc def\n\nghi\n \t \njk lmn \n\n\n zzz".multiline_squish.should == "abc def\n\nghi\n\njk lmn\n\n\nzzz"
|
20
|
+
end
|
21
|
+
it 'should drop empty lines at begin and at end of string' do
|
22
|
+
"\n \t\n\nabc def\n\nghi\n \t \njk lmn \n\n\n zzz\n\n \t \n".multiline_squish.should == "abc def\n\nghi\n\njk lmn\n\n\nzzz"
|
23
|
+
end
|
18
24
|
end
|
19
25
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -67,7 +67,9 @@ files:
|
|
67
67
|
- lib/bioinform/parsers/parser.rb
|
68
68
|
- lib/bioinform/parsers/string_fantom_parser.rb
|
69
69
|
- lib/bioinform/parsers/string_parser.rb
|
70
|
+
- lib/bioinform/parsers/trivial_parser.rb
|
70
71
|
- lib/bioinform/support.rb
|
72
|
+
- lib/bioinform/support/advanced_scan.rb
|
71
73
|
- lib/bioinform/support/array_product.rb
|
72
74
|
- lib/bioinform/support/array_zip.rb
|
73
75
|
- lib/bioinform/support/callable_symbol.rb
|
@@ -83,11 +85,14 @@ files:
|
|
83
85
|
- lib/bioinform/version.rb
|
84
86
|
- spec/data_models/pcm_spec.rb
|
85
87
|
- spec/data_models/pm_spec.rb
|
88
|
+
- spec/data_models/ppm_spec.rb
|
86
89
|
- spec/data_models/pwm_spec.rb
|
87
90
|
- spec/parsers/parser_spec.rb
|
88
91
|
- spec/parsers/string_fantom_parser_spec.rb
|
89
92
|
- spec/parsers/string_parser_spec.rb
|
93
|
+
- spec/parsers/trivial_parser_spec.rb
|
90
94
|
- spec/spec_helper.rb
|
95
|
+
- spec/support/advanced_scan_spec.rb
|
91
96
|
- spec/support/array_product_spec.rb
|
92
97
|
- spec/support/array_zip_spec.rb
|
93
98
|
- spec/support/callable_symbol_spec.rb
|
@@ -128,11 +133,14 @@ summary: Classes for work with different input formats of positional matrices an
|
|
128
133
|
test_files:
|
129
134
|
- spec/data_models/pcm_spec.rb
|
130
135
|
- spec/data_models/pm_spec.rb
|
136
|
+
- spec/data_models/ppm_spec.rb
|
131
137
|
- spec/data_models/pwm_spec.rb
|
132
138
|
- spec/parsers/parser_spec.rb
|
133
139
|
- spec/parsers/string_fantom_parser_spec.rb
|
134
140
|
- spec/parsers/string_parser_spec.rb
|
141
|
+
- spec/parsers/trivial_parser_spec.rb
|
135
142
|
- spec/spec_helper.rb
|
143
|
+
- spec/support/advanced_scan_spec.rb
|
136
144
|
- spec/support/array_product_spec.rb
|
137
145
|
- spec/support/array_zip_spec.rb
|
138
146
|
- spec/support/callable_symbol_spec.rb
|