bioinform 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TODO.txt +7 -2
- data/bin/merge_into_collection +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +2 -0
- data/lib/bioinform/cli/merge_into_collection.rb +76 -0
- data/lib/bioinform/cli/pcm2pwm.rb +20 -20
- data/lib/bioinform/cli/split_motifs.rb +21 -20
- data/lib/bioinform/cli.rb +16 -2
- data/lib/bioinform/data_models/collection.rb +13 -10
- data/lib/bioinform/data_models/pcm.rb +2 -2
- data/lib/bioinform/data_models/pm.rb +24 -37
- data/lib/bioinform/data_models/ppm.rb +2 -2
- data/lib/bioinform/data_models/pwm.rb +2 -2
- data/lib/bioinform/data_models.rb +8 -8
- data/lib/bioinform/parsers/parser.rb +10 -5
- data/lib/bioinform/parsers/splittable_parser.rb +57 -0
- data/lib/bioinform/parsers/string_fantom_parser.rb +3 -3
- data/lib/bioinform/parsers/string_parser.rb +5 -24
- data/lib/bioinform/parsers/trivial_parser.rb +19 -3
- data/lib/bioinform/parsers/yaml_parser.rb +35 -0
- data/lib/bioinform/parsers.rb +6 -4
- data/lib/bioinform/support/parameters.rb +19 -0
- data/lib/bioinform/support/partial_sums.rb +1 -1
- data/lib/bioinform/support.rb +11 -10
- data/lib/bioinform/version.rb +1 -1
- data/lib/bioinform.rb +5 -5
- data/spec/cli/cli_spec.rb +8 -7
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -0
- data/spec/cli/data/{KLF4_f2.pwm.result → merge_into_collection/KLF4_f2.pwm} +0 -0
- data/spec/cli/data/{SP1_f1.pwm.result → merge_into_collection/SP1_f1.pwm} +0 -0
- data/spec/cli/data/merge_into_collection/collection.txt.result +40 -0
- data/spec/cli/data/merge_into_collection/collection.yaml.result +185 -0
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +185 -0
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -0
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -0
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -0
- data/spec/cli/data/{KLF4 f2 spaced name.pcm → pcm2pwm/KLF4 f2 spaced name.pcm} +0 -0
- data/spec/cli/data/{KLF4_f2.pcm → pcm2pwm/KLF4_f2.pcm} +0 -0
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -0
- data/spec/cli/data/{SP1_f1.pcm → pcm2pwm/SP1_f1.pcm} +0 -0
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -0
- data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -0
- data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -0
- data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -0
- data/spec/cli/data/split_motifs/collection.yaml +197 -0
- data/spec/cli/data/split_motifs/plain_collection.txt +38 -0
- data/spec/cli/merge_into_collection_spec.rb +100 -0
- data/spec/cli/pcm2pwm_spec.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +74 -3
- data/spec/data_models/collection_spec.rb +2 -2
- data/spec/data_models/pcm_spec.rb +2 -2
- data/spec/data_models/pm_spec.rb +10 -27
- data/spec/data_models/ppm_spec.rb +2 -2
- data/spec/data_models/pwm_spec.rb +3 -3
- data/spec/fabricators/collection_fabricator.rb +8 -0
- data/spec/fabricators/pm_fabricator.rb +43 -0
- data/spec/parsers/parser_spec.rb +29 -37
- data/spec/parsers/string_fantom_parser_spec.rb +38 -35
- data/spec/parsers/string_parser_spec.rb +33 -66
- data/spec/parsers/trivial_parser_spec.rb +48 -6
- data/spec/parsers/yaml_parser_spec.rb +50 -0
- data/spec/spec_helper.rb +2 -6
- data/spec/support/advanced_scan_spec.rb +2 -2
- data/spec/support/array_product_spec.rb +2 -2
- data/spec/support/array_zip_spec.rb +2 -2
- data/spec/support/collect_hash_spec.rb +2 -2
- data/spec/support/delete_many_spec.rb +2 -2
- data/spec/support/inverf_spec.rb +2 -2
- data/spec/support/multiline_squish_spec.rb +2 -2
- data/spec/support/partial_sums_spec.rb +2 -2
- data/spec/support/same_by_spec.rb +2 -2
- metadata +86 -12
data/spec/parsers/parser_spec.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative '../../lib/bioinform/parsers/parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe Parser do
|
6
6
|
context '#initialize' do
|
7
7
|
it 'should accept an array correctly' do
|
8
|
-
Parser.new([[1,2,3,4],[5,6,7,8]]).parse
|
8
|
+
Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
|
9
9
|
end
|
10
10
|
it 'should treat several arguments as an array composed of them' do
|
11
11
|
Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
|
@@ -21,6 +21,7 @@ module Bioinform
|
|
21
21
|
expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
|
22
22
|
end
|
23
23
|
end
|
24
|
+
|
24
25
|
context '::parse' do
|
25
26
|
it 'should behave like Parser.new(input).parse!' do
|
26
27
|
Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
|
@@ -37,9 +38,13 @@ module Bioinform
|
|
37
38
|
Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
|
38
39
|
Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
|
39
40
|
end
|
40
|
-
|
41
41
|
end
|
42
42
|
|
43
|
+
context '::split_on_motifs' do
|
44
|
+
it 'should be able to get a single PM' do
|
45
|
+
Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
|
46
|
+
end
|
47
|
+
end
|
43
48
|
|
44
49
|
context '::normalize_hash_keys' do
|
45
50
|
it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
|
@@ -97,56 +102,43 @@ module Bioinform
|
|
97
102
|
end
|
98
103
|
|
99
104
|
good_cases = {
|
100
|
-
'Array Nx4' => {input: [[
|
101
|
-
|
105
|
+
'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
|
106
|
+
result: Fabricate(:pm_unnamed) },
|
102
107
|
|
103
|
-
'Array 4xN' => {input: [[
|
104
|
-
|
108
|
+
'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
|
109
|
+
result: Fabricate(:pm_unnamed) },
|
105
110
|
|
106
|
-
'Hash A,C,G,T => Arrays' => { input: {:A => [
|
107
|
-
|
111
|
+
'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
|
112
|
+
result: Fabricate(:pm_unnamed) },
|
108
113
|
|
109
|
-
'Hash array of hashes' => { input: [{:A =>
|
110
|
-
|
114
|
+
'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
|
115
|
+
result: Fabricate(:pm_unnamed) },
|
111
116
|
|
112
|
-
'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[
|
113
|
-
|
117
|
+
'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
|
118
|
+
result: Fabricate(:pm_4x4_unnamed) },
|
114
119
|
|
115
|
-
'Hash A,C,G,T => 4-Arrays' => { input: {:A => [
|
116
|
-
|
120
|
+
'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
|
121
|
+
result: Fabricate(:pm_4x4_unnamed) },
|
117
122
|
|
118
|
-
'4-Arrays of A,C,G,T hashes' => { input: [{:A =>
|
119
|
-
{:A =>
|
120
|
-
{:A =>
|
121
|
-
{:A =>
|
122
|
-
|
123
|
+
'4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
|
124
|
+
{:A => 5, :c => 6, 'g' => 7, 'T' => 8},
|
125
|
+
{:A => 9, :c => 10, 'g' => 11, 'T' => 12},
|
126
|
+
{:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
|
127
|
+
result: Fabricate(:pm_4x4_unnamed) }
|
123
128
|
}
|
124
129
|
|
125
130
|
bad_cases = {
|
126
131
|
'Nil object on input' => {input: nil},
|
127
|
-
|
128
132
|
'Empty array on input' => {input: []},
|
129
|
-
|
130
133
|
'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
|
131
|
-
|
132
134
|
'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
|
133
|
-
|
134
135
|
'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
|
135
|
-
|
136
|
-
'
|
137
|
-
|
138
|
-
'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
|
139
|
-
{:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
|
140
|
-
|
141
|
-
'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3},
|
142
|
-
{:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
|
143
|
-
|
136
|
+
'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
|
137
|
+
'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
|
138
|
+
'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
|
144
139
|
'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
|
145
|
-
|
146
140
|
'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
|
147
|
-
|
148
141
|
'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
|
149
|
-
|
150
142
|
'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
|
151
143
|
}
|
152
144
|
|
@@ -1,16 +1,16 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative '../../lib/bioinform/parsers/string_fantom_parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe StringFantomParser do
|
6
|
-
describe '#
|
6
|
+
describe '#split_on_motifs' do
|
7
7
|
it 'should be able to parse several motifs' do
|
8
|
-
input =
|
8
|
+
input = "
|
9
9
|
//
|
10
10
|
NA motif_1
|
11
11
|
P0 A C G T
|
12
|
-
P1 0
|
13
|
-
P2 4
|
12
|
+
P1 0 1 2 3
|
13
|
+
P2 4 5 6 7
|
14
14
|
//
|
15
15
|
//
|
16
16
|
NA motif_2
|
@@ -22,41 +22,44 @@ P3 9 10 11 12
|
|
22
22
|
NA motif_3
|
23
23
|
P0 A C G T
|
24
24
|
P1 2 3 4 5
|
25
|
-
P2 6 7 8 9
|
26
|
-
|
27
|
-
StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'},
|
28
|
-
{matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name: 'motif_2'},
|
29
|
-
{matrix: [[2,3,4,5],[6,7,8,9]], name: 'motif_3'} ]
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'should be able to parse motif with additional rows' do
|
33
|
-
input = <<-EOS
|
34
|
-
NA motif_1
|
35
|
-
P0 A C G T S P
|
36
|
-
P1 0 1 2 3 5 10
|
37
|
-
P2 4 5 6 7 5 11
|
38
|
-
EOS
|
39
|
-
StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'} ]
|
25
|
+
P2 6 7 8 9"
|
26
|
+
StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
|
40
27
|
end
|
41
28
|
end
|
42
29
|
|
43
30
|
good_cases = {
|
44
31
|
'string in Fantom-format' => {input: "
|
45
|
-
NA
|
32
|
+
NA PM_name
|
33
|
+
P0 A C G T
|
34
|
+
P1 1 2 3 4
|
35
|
+
P2 5 6 7 8",
|
36
|
+
result: Fabricate(:pm)
|
37
|
+
},
|
38
|
+
|
39
|
+
'motif with additional rows' => {input: "
|
40
|
+
NA PM_name
|
41
|
+
P0 A C G T S P
|
42
|
+
P1 1 2 3 4 5 10
|
43
|
+
P2 5 6 7 8 5 11",
|
44
|
+
result: Fabricate(:pm)
|
45
|
+
},
|
46
|
+
|
47
|
+
'string with more than 10 positions(2-digit row numbers)' => {input: "
|
48
|
+
NA PM_name
|
46
49
|
P0 A C G T
|
47
|
-
P1
|
48
|
-
P2
|
49
|
-
P3
|
50
|
-
P4
|
51
|
-
P5
|
52
|
-
P6
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
50
|
+
P1 1 2 3 4
|
51
|
+
P2 5 6 7 8
|
52
|
+
P3 1 2 3 4
|
53
|
+
P4 5 6 7 8
|
54
|
+
P5 1 2 3 4
|
55
|
+
P6 5 6 7 8
|
56
|
+
P7 1 2 3 4
|
57
|
+
P8 5 6 7 8
|
58
|
+
P9 1 2 3 4
|
59
|
+
P10 5 6 7 8
|
60
|
+
P11 1 2 3 4
|
61
|
+
P12 5 6 7 8",
|
62
|
+
result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
|
60
63
|
}
|
61
64
|
}
|
62
65
|
|
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative '../../lib/bioinform/parsers/string_parser'
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe StringParser do
|
@@ -7,37 +7,35 @@ module Bioinform
|
|
7
7
|
describe '#each' do
|
8
8
|
it 'should yield consequent results of #parse! while it returns result' do
|
9
9
|
parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
|
10
|
-
expect{|b| parser.each(&b)}.to yield_successive_args(
|
10
|
+
expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
|
11
|
+
OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
|
12
|
+
OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
|
11
13
|
end
|
12
14
|
it 'should restart parser from the beginning each time' do
|
13
15
|
parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
|
14
16
|
3.times do
|
15
|
-
expect{|b| parser.each(&b)}.to yield_successive_args(
|
17
|
+
expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
|
18
|
+
OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
|
19
|
+
OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
|
16
20
|
end
|
17
21
|
end
|
18
22
|
end
|
19
23
|
|
20
|
-
context '::
|
24
|
+
context '::split_on_motifs' do
|
21
25
|
it 'should be able to get a single PM' do
|
22
|
-
StringParser.
|
26
|
+
StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8").should == [ Fabricate(:pm_unnamed) ]
|
23
27
|
end
|
24
|
-
|
25
28
|
it 'should be able to split several PMs separated with an empty line' do
|
26
|
-
StringParser.
|
29
|
+
StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n\n 15 16 17 18 \n 11 21 31 41").should ==
|
30
|
+
[ Fabricate(:pm_first, name: nil), Fabricate(:pm_second, name: nil) ]
|
27
31
|
end
|
28
|
-
|
29
32
|
it 'should be able to split several PMs separated with name' do
|
30
|
-
StringParser.
|
31
|
-
|
32
|
-
StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8\n\n\n").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
|
33
|
+
StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \nPM_second\n 15 16 17 18 \n 11 21 31 41").should ==
|
34
|
+
[ Fabricate(:pm_first, name: nil), Fabricate(:pm_second) ]
|
33
35
|
end
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
it 'should be able to split string into PMs' do
|
38
|
-
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
|
39
|
-
result.map{|pm| pm.matrix}.should == [ [[1,2,3,4],[5,6,7,8],[9,10,11,12]], [[9,10,11,12],[1,2,3,4],[5,6,7,8]] ]
|
40
|
-
result.map{|pm| pm.name}.should == [nil, 'Name']
|
36
|
+
it 'should be able to split several PMs separated with both name and empty line' do
|
37
|
+
StringParser.split_on_motifs("PM_first\n1 2 3 4 \n 5 6 7 8 \n\nPM_second\n 15 16 17 18 \n 11 21 31 41\n\n\n").should ==
|
38
|
+
[ Fabricate(:pm_first), Fabricate(:pm_second) ]
|
41
39
|
end
|
42
40
|
it 'should create PMs by default' do
|
43
41
|
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
|
@@ -50,54 +48,23 @@ module Bioinform
|
|
50
48
|
end
|
51
49
|
|
52
50
|
good_cases = {
|
53
|
-
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
|
54
|
-
|
55
|
-
|
56
|
-
'
|
57
|
-
matrix: [[1,2,3,4],[5,6,7,8]] },
|
58
|
-
|
59
|
-
'string with name' => {input: "TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
60
|
-
matrix: [[1,2,3,4],[5,6,7,8]], name: 'TestMatrix' },
|
61
|
-
|
62
|
-
'string with name (with introduction sign)' => {input: ">\t TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
63
|
-
matrix: [[1,2,3,4],[5,6,7,8]],
|
64
|
-
name: 'TestMatrix' },
|
65
|
-
|
51
|
+
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
|
52
|
+
'4xN string' => {input: "1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm_unnamed) },
|
53
|
+
'string with name' => {input: "PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
|
54
|
+
'string with name (with introduction sign)' => {input: ">\t PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
|
66
55
|
'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
|
67
|
-
|
68
|
-
|
69
|
-
'string with
|
70
|
-
|
71
|
-
|
72
|
-
'string with
|
73
|
-
|
74
|
-
|
75
|
-
'string with
|
76
|
-
|
77
|
-
|
78
|
-
'string with
|
79
|
-
matrix: [[1,2,3,4],[5,6,7,8]] },
|
80
|
-
|
81
|
-
'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8",
|
82
|
-
matrix: [[1,2,3,4],[5,6,7,8]] },
|
83
|
-
|
84
|
-
'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8",
|
85
|
-
matrix: [[1,2,3,4],[5,6,7,8]] },
|
86
|
-
|
87
|
-
'Nx4 string with name and acgt-header' => {input: "Name\nA C G T\n1 2 3 4\n5 6 7 8",
|
88
|
-
matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'},
|
89
|
-
|
90
|
-
'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8",
|
91
|
-
matrix: [[1,2,3,4],[5,6,7,8]] },
|
92
|
-
|
93
|
-
'4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n0 0 0 0\n2 2 2 2",
|
94
|
-
matrix: [[1,2,3,4],[5,6,7,8],[0,0,0,0],[2,2,2,2]] },
|
95
|
-
|
96
|
-
'4x4 string with acgt-row-markers' => {input: "A|1 2 3 4\nC|5 6 7 8\nG|0 0 0 0\nT|2 2 2 2",
|
97
|
-
matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]] },
|
98
|
-
|
99
|
-
'4x4 string with name and acgt-row-markers' => {input: "Name\nA:1 2 3 4\nC:5 6 7 8\nG:0 0 0 0\nT:2 2 2 2",
|
100
|
-
matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]], name: 'Name' }
|
56
|
+
result: Fabricate(:pm, name: 'Testmatrix_first:subname+sub-subname') },
|
57
|
+
'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210", result: Fabricate(:pm_with_floats) },
|
58
|
+
'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3", result: Fabricate(:pm_with_floats) },
|
59
|
+
'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8", result: Fabricate(:pm_unnamed) },
|
60
|
+
'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n", result: Fabricate(:pm_unnamed) },
|
61
|
+
'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8", result: Fabricate(:pm_unnamed) },
|
62
|
+
'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
|
63
|
+
'Nx4 string with name and acgt-header' => {input: "PM_name\nA C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm)},
|
64
|
+
'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8", result: Fabricate(:pm_unnamed) },
|
65
|
+
'4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n9 10 11 12\n13 14 15 16", result: Fabricate(:pm_4x4_unnamed) },
|
66
|
+
'4x4 string with acgt-row-markers' => {input: "A|1 5 9 13\nC|2 6 10 14\nG|3 7 11 15\nT|4 8 12 16", result: Fabricate(:pm_4x4_unnamed) },
|
67
|
+
'4x4 string with name and acgt-row-markers' => {input: "PM_name\nA:1 5 9 13\nC:2 6 10 14\nG:3 7 11 15\nT:4 8 12 16", result: Fabricate(:pm_4x4) }
|
101
68
|
}
|
102
69
|
|
103
70
|
bad_cases = {
|
@@ -1,5 +1,6 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative '../../lib/bioinform/parsers/parser'
|
3
|
+
require_relative '../../lib/bioinform/data_models/collection'
|
3
4
|
|
4
5
|
module Bioinform
|
5
6
|
describe TrivialParser do
|
@@ -8,15 +9,56 @@ module Bioinform
|
|
8
9
|
TrivialParser.instance_method(:initialize).arity.should == 1
|
9
10
|
end
|
10
11
|
end
|
11
|
-
|
12
|
-
|
13
|
-
|
12
|
+
|
13
|
+
context '#parse!' do
|
14
|
+
it 'should return OpenStruct based on input of that was passed to initialize when input is a Hash' do
|
15
|
+
TrivialParser.new(matrix: 'stub matrix', name: 'stub name').parse!.should == OpenStruct.new(matrix: 'stub matrix', name: 'stub name')
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should return OpenStruct based on input of that was passed to initialize when input is a OpenStruct' do
|
19
|
+
TrivialParser.new(OpenStruct.new(matrix: 'stub matrix', name: 'stub name')).parse!.should == OpenStruct.new(matrix: 'stub matrix', name: 'stub name')
|
14
20
|
end
|
15
21
|
end
|
22
|
+
|
23
|
+
context '::split_on_motifs' do
|
24
|
+
it 'should be able to get a single PM' do
|
25
|
+
TrivialParser.split_on_motifs({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:'Name') ]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
16
29
|
it 'can be used to create PM with {matrix: ..., name: ...} form' do
|
17
30
|
pm = PM.new({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
|
18
31
|
pm.matrix.should == [[1,2,3,4],[5,6,7,8]]
|
19
32
|
pm.name.should == 'Name'
|
20
33
|
end
|
34
|
+
|
35
|
+
it 'can be used to create PM from PM (make copy)' do
|
36
|
+
pm = Fabricate(:pm)
|
37
|
+
pm_copy = PM.new(pm, TrivialParser)
|
38
|
+
pm_copy.should == pm
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe TrivialCollectionParser do
|
43
|
+
before :each do
|
44
|
+
@pm_1 = Fabricate(:pm_first)
|
45
|
+
@pm_2 = Fabricate(:pm_second)
|
46
|
+
@collection = Fabricate(:two_elements_collection)
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '#parse!' do
|
50
|
+
it 'can be used to obtain PMs from Collection' do
|
51
|
+
@parser = TrivialCollectionParser.new(@collection)
|
52
|
+
@parser.parse!.should == @pm_1
|
53
|
+
@parser.parse!.should == @pm_2
|
54
|
+
expect{ @parser.parse! }.to raise_error
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe '::split_on_motifs' do
|
59
|
+
it 'should be able to split collection into PMs' do
|
60
|
+
TrivialCollectionParser.split_on_motifs(@collection).should == [@pm_1, @pm_2]
|
61
|
+
end
|
62
|
+
end
|
21
63
|
end
|
22
|
-
end
|
64
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require_relative '../spec_helper'
|
3
|
+
require_relative '../../lib/bioinform/parsers/yaml_parser'
|
4
|
+
require_relative '../../lib/bioinform/data_models/collection'
|
5
|
+
|
6
|
+
module Bioinform
|
7
|
+
describe YAMLParser do
|
8
|
+
context '#parse!' do
|
9
|
+
it 'should return PM that was encoded in YAML format' do
|
10
|
+
pm = Fabricate(:pm)
|
11
|
+
parser = YAMLParser.new(pm.to_yaml)
|
12
|
+
parser.parse!.should == pm
|
13
|
+
end
|
14
|
+
end
|
15
|
+
it 'can be used to create PM from yaml-string' do
|
16
|
+
pm = Fabricate(:pm)
|
17
|
+
pm_copy = PM.new(pm.to_yaml, YAMLParser)
|
18
|
+
pm_copy.should == pm
|
19
|
+
end
|
20
|
+
|
21
|
+
context '::split_on_motifs' do
|
22
|
+
it 'should be able to get a single PM' do
|
23
|
+
pm = Fabricate(:pm)
|
24
|
+
YAMLParser.split_on_motifs(pm.to_yaml, PM).should == [ pm ]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe YAMLCollectionParser do
|
30
|
+
before :each do
|
31
|
+
@pm_1 = Fabricate(:pm_first)
|
32
|
+
@pm_2 = Fabricate(:pm_second)
|
33
|
+
@collection = Collection.new
|
34
|
+
@collection << @pm_1 << @pm_2
|
35
|
+
end
|
36
|
+
context '::split_on_motifs' do
|
37
|
+
it 'should be able to split collection into PMs' do
|
38
|
+
YAMLCollectionParser.split_on_motifs(@collection.to_yaml).should == [@pm_1, @pm_2]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
context '#parse!' do
|
42
|
+
it 'should return PMs which were in encoded YAML format' do
|
43
|
+
@parser = YAMLCollectionParser.new(@collection.to_yaml)
|
44
|
+
@parser.parse!.should == @pm_1
|
45
|
+
@parser.parse!.should == @pm_2
|
46
|
+
expect{ @parser.parse! }.to raise_error
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -5,6 +5,7 @@ require 'rspec'
|
|
5
5
|
|
6
6
|
require 'fileutils'
|
7
7
|
require 'stringio'
|
8
|
+
require 'fabrication'
|
8
9
|
|
9
10
|
# from minitest
|
10
11
|
def capture_io(&block)
|
@@ -39,12 +40,7 @@ def parser_specs(parser_klass, good_cases, bad_cases)
|
|
39
40
|
good_cases.each do |case_description, input_and_result|
|
40
41
|
it "should be able to parse #{case_description}" do
|
41
42
|
result = parser_klass.new(input_and_result[:input]).parse
|
42
|
-
result
|
43
|
-
if input_and_result.has_key?(:name)
|
44
|
-
result[:name].should == input_and_result[:name]
|
45
|
-
else
|
46
|
-
result[:name].should be_nil
|
47
|
-
end
|
43
|
+
Bioinform::PM.new(result).should == input_and_result[:result]
|
48
44
|
end
|
49
45
|
end
|
50
46
|
|
data/spec/support/inverf_spec.rb
CHANGED