bioinform 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/TODO.txt +7 -2
- data/bin/merge_into_collection +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +2 -0
- data/lib/bioinform/cli/merge_into_collection.rb +76 -0
- data/lib/bioinform/cli/pcm2pwm.rb +20 -20
- data/lib/bioinform/cli/split_motifs.rb +21 -20
- data/lib/bioinform/cli.rb +16 -2
- data/lib/bioinform/data_models/collection.rb +13 -10
- data/lib/bioinform/data_models/pcm.rb +2 -2
- data/lib/bioinform/data_models/pm.rb +24 -37
- data/lib/bioinform/data_models/ppm.rb +2 -2
- data/lib/bioinform/data_models/pwm.rb +2 -2
- data/lib/bioinform/data_models.rb +8 -8
- data/lib/bioinform/parsers/parser.rb +10 -5
- data/lib/bioinform/parsers/splittable_parser.rb +57 -0
- data/lib/bioinform/parsers/string_fantom_parser.rb +3 -3
- data/lib/bioinform/parsers/string_parser.rb +5 -24
- data/lib/bioinform/parsers/trivial_parser.rb +19 -3
- data/lib/bioinform/parsers/yaml_parser.rb +35 -0
- data/lib/bioinform/parsers.rb +6 -4
- data/lib/bioinform/support/parameters.rb +19 -0
- data/lib/bioinform/support/partial_sums.rb +1 -1
- data/lib/bioinform/support.rb +11 -10
- data/lib/bioinform/version.rb +1 -1
- data/lib/bioinform.rb +5 -5
- data/spec/cli/cli_spec.rb +8 -7
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -0
- data/spec/cli/data/{KLF4_f2.pwm.result → merge_into_collection/KLF4_f2.pwm} +0 -0
- data/spec/cli/data/{SP1_f1.pwm.result → merge_into_collection/SP1_f1.pwm} +0 -0
- data/spec/cli/data/merge_into_collection/collection.txt.result +40 -0
- data/spec/cli/data/merge_into_collection/collection.yaml.result +185 -0
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +185 -0
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -0
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -0
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -0
- data/spec/cli/data/{KLF4 f2 spaced name.pcm → pcm2pwm/KLF4 f2 spaced name.pcm} +0 -0
- data/spec/cli/data/{KLF4_f2.pcm → pcm2pwm/KLF4_f2.pcm} +0 -0
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -0
- data/spec/cli/data/{SP1_f1.pcm → pcm2pwm/SP1_f1.pcm} +0 -0
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -0
- data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -0
- data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -0
- data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -0
- data/spec/cli/data/split_motifs/collection.yaml +197 -0
- data/spec/cli/data/split_motifs/plain_collection.txt +38 -0
- data/spec/cli/merge_into_collection_spec.rb +100 -0
- data/spec/cli/pcm2pwm_spec.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +74 -3
- data/spec/data_models/collection_spec.rb +2 -2
- data/spec/data_models/pcm_spec.rb +2 -2
- data/spec/data_models/pm_spec.rb +10 -27
- data/spec/data_models/ppm_spec.rb +2 -2
- data/spec/data_models/pwm_spec.rb +3 -3
- data/spec/fabricators/collection_fabricator.rb +8 -0
- data/spec/fabricators/pm_fabricator.rb +43 -0
- data/spec/parsers/parser_spec.rb +29 -37
- data/spec/parsers/string_fantom_parser_spec.rb +38 -35
- data/spec/parsers/string_parser_spec.rb +33 -66
- data/spec/parsers/trivial_parser_spec.rb +48 -6
- data/spec/parsers/yaml_parser_spec.rb +50 -0
- data/spec/spec_helper.rb +2 -6
- data/spec/support/advanced_scan_spec.rb +2 -2
- data/spec/support/array_product_spec.rb +2 -2
- data/spec/support/array_zip_spec.rb +2 -2
- data/spec/support/collect_hash_spec.rb +2 -2
- data/spec/support/delete_many_spec.rb +2 -2
- data/spec/support/inverf_spec.rb +2 -2
- data/spec/support/multiline_squish_spec.rb +2 -2
- data/spec/support/partial_sums_spec.rb +2 -2
- data/spec/support/same_by_spec.rb +2 -2
- metadata +86 -12
data/spec/parsers/parser_spec.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative '../../lib/bioinform/parsers/parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe Parser do
|
6
6
|
context '#initialize' do
|
7
7
|
it 'should accept an array correctly' do
|
8
|
-
Parser.new([[1,2,3,4],[5,6,7,8]]).parse
|
8
|
+
Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
|
9
9
|
end
|
10
10
|
it 'should treat several arguments as an array composed of them' do
|
11
11
|
Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
|
@@ -21,6 +21,7 @@ module Bioinform
|
|
21
21
|
expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
|
22
22
|
end
|
23
23
|
end
|
24
|
+
|
24
25
|
context '::parse' do
|
25
26
|
it 'should behave like Parser.new(input).parse!' do
|
26
27
|
Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
|
@@ -37,9 +38,13 @@ module Bioinform
|
|
37
38
|
Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
|
38
39
|
Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
|
39
40
|
end
|
40
|
-
|
41
41
|
end
|
42
42
|
|
43
|
+
context '::split_on_motifs' do
|
44
|
+
it 'should be able to get a single PM' do
|
45
|
+
Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
|
46
|
+
end
|
47
|
+
end
|
43
48
|
|
44
49
|
context '::normalize_hash_keys' do
|
45
50
|
it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
|
@@ -97,56 +102,43 @@ module Bioinform
|
|
97
102
|
end
|
98
103
|
|
99
104
|
good_cases = {
|
100
|
-
'Array Nx4' => {input: [[
|
101
|
-
|
105
|
+
'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
|
106
|
+
result: Fabricate(:pm_unnamed) },
|
102
107
|
|
103
|
-
'Array 4xN' => {input: [[
|
104
|
-
|
108
|
+
'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
|
109
|
+
result: Fabricate(:pm_unnamed) },
|
105
110
|
|
106
|
-
'Hash A,C,G,T => Arrays' => { input: {:A => [
|
107
|
-
|
111
|
+
'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
|
112
|
+
result: Fabricate(:pm_unnamed) },
|
108
113
|
|
109
|
-
'Hash array of hashes' => { input: [{:A =>
|
110
|
-
|
114
|
+
'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
|
115
|
+
result: Fabricate(:pm_unnamed) },
|
111
116
|
|
112
|
-
'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[
|
113
|
-
|
117
|
+
'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
|
118
|
+
result: Fabricate(:pm_4x4_unnamed) },
|
114
119
|
|
115
|
-
'Hash A,C,G,T => 4-Arrays' => { input: {:A => [
|
116
|
-
|
120
|
+
'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
|
121
|
+
result: Fabricate(:pm_4x4_unnamed) },
|
117
122
|
|
118
|
-
'4-Arrays of A,C,G,T hashes' => { input: [{:A =>
|
119
|
-
{:A =>
|
120
|
-
{:A =>
|
121
|
-
{:A =>
|
122
|
-
|
123
|
+
'4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
|
124
|
+
{:A => 5, :c => 6, 'g' => 7, 'T' => 8},
|
125
|
+
{:A => 9, :c => 10, 'g' => 11, 'T' => 12},
|
126
|
+
{:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
|
127
|
+
result: Fabricate(:pm_4x4_unnamed) }
|
123
128
|
}
|
124
129
|
|
125
130
|
bad_cases = {
|
126
131
|
'Nil object on input' => {input: nil},
|
127
|
-
|
128
132
|
'Empty array on input' => {input: []},
|
129
|
-
|
130
133
|
'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
|
131
|
-
|
132
134
|
'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
|
133
|
-
|
134
135
|
'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
|
135
|
-
|
136
|
-
'
|
137
|
-
|
138
|
-
'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
|
139
|
-
{:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
|
140
|
-
|
141
|
-
'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3},
|
142
|
-
{:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
|
143
|
-
|
136
|
+
'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
|
137
|
+
'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
|
138
|
+
'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
|
144
139
|
'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
|
145
|
-
|
146
140
|
'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
|
147
|
-
|
148
141
|
'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
|
149
|
-
|
150
142
|
'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
|
151
143
|
}
|
152
144
|
|
@@ -1,16 +1,16 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative '../../lib/bioinform/parsers/string_fantom_parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe StringFantomParser do
|
6
|
-
describe '#
|
6
|
+
describe '#split_on_motifs' do
|
7
7
|
it 'should be able to parse several motifs' do
|
8
|
-
input =
|
8
|
+
input = "
|
9
9
|
//
|
10
10
|
NA motif_1
|
11
11
|
P0 A C G T
|
12
|
-
P1 0
|
13
|
-
P2 4
|
12
|
+
P1 0 1 2 3
|
13
|
+
P2 4 5 6 7
|
14
14
|
//
|
15
15
|
//
|
16
16
|
NA motif_2
|
@@ -22,41 +22,44 @@ P3 9 10 11 12
|
|
22
22
|
NA motif_3
|
23
23
|
P0 A C G T
|
24
24
|
P1 2 3 4 5
|
25
|
-
P2 6 7 8 9
|
26
|
-
|
27
|
-
StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'},
|
28
|
-
{matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name: 'motif_2'},
|
29
|
-
{matrix: [[2,3,4,5],[6,7,8,9]], name: 'motif_3'} ]
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'should be able to parse motif with additional rows' do
|
33
|
-
input = <<-EOS
|
34
|
-
NA motif_1
|
35
|
-
P0 A C G T S P
|
36
|
-
P1 0 1 2 3 5 10
|
37
|
-
P2 4 5 6 7 5 11
|
38
|
-
EOS
|
39
|
-
StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'} ]
|
25
|
+
P2 6 7 8 9"
|
26
|
+
StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
|
40
27
|
end
|
41
28
|
end
|
42
29
|
|
43
30
|
good_cases = {
|
44
31
|
'string in Fantom-format' => {input: "
|
45
|
-
NA
|
32
|
+
NA PM_name
|
33
|
+
P0 A C G T
|
34
|
+
P1 1 2 3 4
|
35
|
+
P2 5 6 7 8",
|
36
|
+
result: Fabricate(:pm)
|
37
|
+
},
|
38
|
+
|
39
|
+
'motif with additional rows' => {input: "
|
40
|
+
NA PM_name
|
41
|
+
P0 A C G T S P
|
42
|
+
P1 1 2 3 4 5 10
|
43
|
+
P2 5 6 7 8 5 11",
|
44
|
+
result: Fabricate(:pm)
|
45
|
+
},
|
46
|
+
|
47
|
+
'string with more than 10 positions(2-digit row numbers)' => {input: "
|
48
|
+
NA PM_name
|
46
49
|
P0 A C G T
|
47
|
-
P1
|
48
|
-
P2
|
49
|
-
P3
|
50
|
-
P4
|
51
|
-
P5
|
52
|
-
P6
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
50
|
+
P1 1 2 3 4
|
51
|
+
P2 5 6 7 8
|
52
|
+
P3 1 2 3 4
|
53
|
+
P4 5 6 7 8
|
54
|
+
P5 1 2 3 4
|
55
|
+
P6 5 6 7 8
|
56
|
+
P7 1 2 3 4
|
57
|
+
P8 5 6 7 8
|
58
|
+
P9 1 2 3 4
|
59
|
+
P10 5 6 7 8
|
60
|
+
P11 1 2 3 4
|
61
|
+
P12 5 6 7 8",
|
62
|
+
result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
|
60
63
|
}
|
61
64
|
}
|
62
65
|
|
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative '../../lib/bioinform/parsers/string_parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe StringParser do
|
@@ -7,37 +7,35 @@ module Bioinform
|
|
7
7
|
describe '#each' do
|
8
8
|
it 'should yield consequent results of #parse! while it returns result' do
|
9
9
|
parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
|
10
|
-
expect{|b| parser.each(&b)}.to yield_successive_args(
|
10
|
+
expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
|
11
|
+
OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
|
12
|
+
OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
|
11
13
|
end
|
12
14
|
it 'should restart parser from the beginning each time' do
|
13
15
|
parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
|
14
16
|
3.times do
|
15
|
-
expect{|b| parser.each(&b)}.to yield_successive_args(
|
17
|
+
expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
|
18
|
+
OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
|
19
|
+
OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
|
16
20
|
end
|
17
21
|
end
|
18
22
|
end
|
19
23
|
|
20
|
-
context '::
|
24
|
+
context '::split_on_motifs' do
|
21
25
|
it 'should be able to get a single PM' do
|
22
|
-
StringParser.
|
26
|
+
StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8").should == [ Fabricate(:pm_unnamed) ]
|
23
27
|
end
|
24
|
-
|
25
28
|
it 'should be able to split several PMs separated with an empty line' do
|
26
|
-
StringParser.
|
29
|
+
StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n\n 15 16 17 18 \n 11 21 31 41").should ==
|
30
|
+
[ Fabricate(:pm_first, name: nil), Fabricate(:pm_second, name: nil) ]
|
27
31
|
end
|
28
|
-
|
29
32
|
it 'should be able to split several PMs separated with name' do
|
30
|
-
StringParser.
|
31
|
-
|
32
|
-
StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8\n\n\n").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
|
33
|
+
StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \nPM_second\n 15 16 17 18 \n 11 21 31 41").should ==
|
34
|
+
[ Fabricate(:pm_first, name: nil), Fabricate(:pm_second) ]
|
33
35
|
end
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
it 'should be able to split string into PMs' do
|
38
|
-
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
|
39
|
-
result.map{|pm| pm.matrix}.should == [ [[1,2,3,4],[5,6,7,8],[9,10,11,12]], [[9,10,11,12],[1,2,3,4],[5,6,7,8]] ]
|
40
|
-
result.map{|pm| pm.name}.should == [nil, 'Name']
|
36
|
+
it 'should be able to split several PMs separated with both name and empty line' do
|
37
|
+
StringParser.split_on_motifs("PM_first\n1 2 3 4 \n 5 6 7 8 \n\nPM_second\n 15 16 17 18 \n 11 21 31 41\n\n\n").should ==
|
38
|
+
[ Fabricate(:pm_first), Fabricate(:pm_second) ]
|
41
39
|
end
|
42
40
|
it 'should create PMs by default' do
|
43
41
|
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
|
@@ -50,54 +48,23 @@ module Bioinform
|
|
50
48
|
end
|
51
49
|
|
52
50
|
good_cases = {
|
53
|
-
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
|
54
|
-
|
55
|
-
|
56
|
-
'
|
57
|
-
matrix: [[1,2,3,4],[5,6,7,8]] },
|
58
|
-
|
59
|
-
'string with name' => {input: "TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
60
|
-
matrix: [[1,2,3,4],[5,6,7,8]], name: 'TestMatrix' },
|
61
|
-
|
62
|
-
'string with name (with introduction sign)' => {input: ">\t TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
63
|
-
matrix: [[1,2,3,4],[5,6,7,8]],
|
64
|
-
name: 'TestMatrix' },
|
65
|
-
|
51
|
+
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
|
52
|
+
'4xN string' => {input: "1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm_unnamed) },
|
53
|
+
'string with name' => {input: "PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
|
54
|
+
'string with name (with introduction sign)' => {input: ">\t PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
|
66
55
|
'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
|
67
|
-
|
68
|
-
|
69
|
-
'string with
|
70
|
-
|
71
|
-
|
72
|
-
'string with
|
73
|
-
|
74
|
-
|
75
|
-
'string with
|
76
|
-
|
77
|
-
|
78
|
-
'string with
|
79
|
-
matrix: [[1,2,3,4],[5,6,7,8]] },
|
80
|
-
|
81
|
-
'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8",
|
82
|
-
matrix: [[1,2,3,4],[5,6,7,8]] },
|
83
|
-
|
84
|
-
'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8",
|
85
|
-
matrix: [[1,2,3,4],[5,6,7,8]] },
|
86
|
-
|
87
|
-
'Nx4 string with name and acgt-header' => {input: "Name\nA C G T\n1 2 3 4\n5 6 7 8",
|
88
|
-
matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'},
|
89
|
-
|
90
|
-
'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8",
|
91
|
-
matrix: [[1,2,3,4],[5,6,7,8]] },
|
92
|
-
|
93
|
-
'4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n0 0 0 0\n2 2 2 2",
|
94
|
-
matrix: [[1,2,3,4],[5,6,7,8],[0,0,0,0],[2,2,2,2]] },
|
95
|
-
|
96
|
-
'4x4 string with acgt-row-markers' => {input: "A|1 2 3 4\nC|5 6 7 8\nG|0 0 0 0\nT|2 2 2 2",
|
97
|
-
matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]] },
|
98
|
-
|
99
|
-
'4x4 string with name and acgt-row-markers' => {input: "Name\nA:1 2 3 4\nC:5 6 7 8\nG:0 0 0 0\nT:2 2 2 2",
|
100
|
-
matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]], name: 'Name' }
|
56
|
+
result: Fabricate(:pm, name: 'Testmatrix_first:subname+sub-subname') },
|
57
|
+
'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210", result: Fabricate(:pm_with_floats) },
|
58
|
+
'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3", result: Fabricate(:pm_with_floats) },
|
59
|
+
'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8", result: Fabricate(:pm_unnamed) },
|
60
|
+
'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n", result: Fabricate(:pm_unnamed) },
|
61
|
+
'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8", result: Fabricate(:pm_unnamed) },
|
62
|
+
'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
|
63
|
+
'Nx4 string with name and acgt-header' => {input: "PM_name\nA C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm)},
|
64
|
+
'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8", result: Fabricate(:pm_unnamed) },
|
65
|
+
'4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n9 10 11 12\n13 14 15 16", result: Fabricate(:pm_4x4_unnamed) },
|
66
|
+
'4x4 string with acgt-row-markers' => {input: "A|1 5 9 13\nC|2 6 10 14\nG|3 7 11 15\nT|4 8 12 16", result: Fabricate(:pm_4x4_unnamed) },
|
67
|
+
'4x4 string with name and acgt-row-markers' => {input: "PM_name\nA:1 5 9 13\nC:2 6 10 14\nG:3 7 11 15\nT:4 8 12 16", result: Fabricate(:pm_4x4) }
|
101
68
|
}
|
102
69
|
|
103
70
|
bad_cases = {
|
@@ -1,5 +1,6 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative '../../lib/bioinform/parsers/parser'
|
3
|
+
require_relative '../../lib/bioinform/data_models/collection'
|
3
4
|
|
4
5
|
module Bioinform
|
5
6
|
describe TrivialParser do
|
@@ -8,15 +9,56 @@ module Bioinform
|
|
8
9
|
TrivialParser.instance_method(:initialize).arity.should == 1
|
9
10
|
end
|
10
11
|
end
|
11
|
-
|
12
|
-
|
13
|
-
|
12
|
+
|
13
|
+
context '#parse!' do
|
14
|
+
it 'should return OpenStruct based on input of that was passed to initialize when input is a Hash' do
|
15
|
+
TrivialParser.new(matrix: 'stub matrix', name: 'stub name').parse!.should == OpenStruct.new(matrix: 'stub matrix', name: 'stub name')
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should return OpenStruct based on input of that was passed to initialize when input is a OpenStruct' do
|
19
|
+
TrivialParser.new(OpenStruct.new(matrix: 'stub matrix', name: 'stub name')).parse!.should == OpenStruct.new(matrix: 'stub matrix', name: 'stub name')
|
14
20
|
end
|
15
21
|
end
|
22
|
+
|
23
|
+
context '::split_on_motifs' do
|
24
|
+
it 'should be able to get a single PM' do
|
25
|
+
TrivialParser.split_on_motifs({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:'Name') ]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
16
29
|
it 'can be used to create PM with {matrix: ..., name: ...} form' do
|
17
30
|
pm = PM.new({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
|
18
31
|
pm.matrix.should == [[1,2,3,4],[5,6,7,8]]
|
19
32
|
pm.name.should == 'Name'
|
20
33
|
end
|
34
|
+
|
35
|
+
it 'can be used to create PM from PM (make copy)' do
|
36
|
+
pm = Fabricate(:pm)
|
37
|
+
pm_copy = PM.new(pm, TrivialParser)
|
38
|
+
pm_copy.should == pm
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe TrivialCollectionParser do
|
43
|
+
before :each do
|
44
|
+
@pm_1 = Fabricate(:pm_first)
|
45
|
+
@pm_2 = Fabricate(:pm_second)
|
46
|
+
@collection = Fabricate(:two_elements_collection)
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '#parse!' do
|
50
|
+
it 'can be used to obtain PMs from Collection' do
|
51
|
+
@parser = TrivialCollectionParser.new(@collection)
|
52
|
+
@parser.parse!.should == @pm_1
|
53
|
+
@parser.parse!.should == @pm_2
|
54
|
+
expect{ @parser.parse! }.to raise_error
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe '::split_on_motifs' do
|
59
|
+
it 'should be able to split collection into PMs' do
|
60
|
+
TrivialCollectionParser.split_on_motifs(@collection).should == [@pm_1, @pm_2]
|
61
|
+
end
|
62
|
+
end
|
21
63
|
end
|
22
|
-
end
|
64
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require_relative '../spec_helper'
|
3
|
+
require_relative '../../lib/bioinform/parsers/yaml_parser'
|
4
|
+
require_relative '../../lib/bioinform/data_models/collection'
|
5
|
+
|
6
|
+
module Bioinform
|
7
|
+
describe YAMLParser do
|
8
|
+
context '#parse!' do
|
9
|
+
it 'should return PM that was encoded in YAML format' do
|
10
|
+
pm = Fabricate(:pm)
|
11
|
+
parser = YAMLParser.new(pm.to_yaml)
|
12
|
+
parser.parse!.should == pm
|
13
|
+
end
|
14
|
+
end
|
15
|
+
it 'can be used to create PM from yaml-string' do
|
16
|
+
pm = Fabricate(:pm)
|
17
|
+
pm_copy = PM.new(pm.to_yaml, YAMLParser)
|
18
|
+
pm_copy.should == pm
|
19
|
+
end
|
20
|
+
|
21
|
+
context '::split_on_motifs' do
|
22
|
+
it 'should be able to get a single PM' do
|
23
|
+
pm = Fabricate(:pm)
|
24
|
+
YAMLParser.split_on_motifs(pm.to_yaml, PM).should == [ pm ]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe YAMLCollectionParser do
|
30
|
+
before :each do
|
31
|
+
@pm_1 = Fabricate(:pm_first)
|
32
|
+
@pm_2 = Fabricate(:pm_second)
|
33
|
+
@collection = Collection.new
|
34
|
+
@collection << @pm_1 << @pm_2
|
35
|
+
end
|
36
|
+
context '::split_on_motifs' do
|
37
|
+
it 'should be able to split collection into PMs' do
|
38
|
+
YAMLCollectionParser.split_on_motifs(@collection.to_yaml).should == [@pm_1, @pm_2]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
context '#parse!' do
|
42
|
+
it 'should return PMs which were in encoded YAML format' do
|
43
|
+
@parser = YAMLCollectionParser.new(@collection.to_yaml)
|
44
|
+
@parser.parse!.should == @pm_1
|
45
|
+
@parser.parse!.should == @pm_2
|
46
|
+
expect{ @parser.parse! }.to raise_error
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -5,6 +5,7 @@ require 'rspec'
|
|
5
5
|
|
6
6
|
require 'fileutils'
|
7
7
|
require 'stringio'
|
8
|
+
require 'fabrication'
|
8
9
|
|
9
10
|
# from minitest
|
10
11
|
def capture_io(&block)
|
@@ -39,12 +40,7 @@ def parser_specs(parser_klass, good_cases, bad_cases)
|
|
39
40
|
good_cases.each do |case_description, input_and_result|
|
40
41
|
it "should be able to parse #{case_description}" do
|
41
42
|
result = parser_klass.new(input_and_result[:input]).parse
|
42
|
-
result
|
43
|
-
if input_and_result.has_key?(:name)
|
44
|
-
result[:name].should == input_and_result[:name]
|
45
|
-
else
|
46
|
-
result[:name].should be_nil
|
47
|
-
end
|
43
|
+
Bioinform::PM.new(result).should == input_and_result[:result]
|
48
44
|
end
|
49
45
|
end
|
50
46
|
|
data/spec/support/inverf_spec.rb
CHANGED