bioinform 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/bioinform/data_models/pcm.rb +12 -0
- data/lib/bioinform/data_models/pm.rb +38 -54
- data/lib/bioinform/data_models/pwm.rb +7 -7
- data/lib/bioinform/data_models.rb +2 -0
- data/lib/bioinform/parsers/parser.rb +40 -0
- data/lib/bioinform/{data_models/parsers → parsers}/string_fantom_parser.rb +1 -2
- data/lib/bioinform/{data_models/parsers → parsers}/string_parser.rb +7 -7
- data/lib/bioinform/parsers.rb +3 -0
- data/lib/bioinform/support/partial_sums.rb +2 -0
- data/lib/bioinform/version.rb +1 -1
- data/lib/bioinform.rb +0 -1
- data/spec/data_models/pcm_spec.rb +27 -0
- data/spec/data_models/pm_spec.rb +62 -144
- data/spec/data_models/pwm_spec.rb +3 -7
- data/spec/parsers/parser_spec.rb +58 -0
- data/spec/parsers/string_fantom_parser_spec.rb +28 -0
- data/spec/parsers/string_parser_spec.rb +46 -0
- data/spec/spec_helper.rb +13 -21
- data/spec/support/multiline_squish_spec.rb +12 -4
- data/spec/support/partial_sums_spec.rb +3 -0
- metadata +14 -18
- data/lib/bioinform/data_models/parser.rb +0 -38
- data/lib/bioinform/data_models/parsers/array_parser.rb +0 -17
- data/lib/bioinform/data_models/parsers/hash_parser.rb +0 -19
- data/lib/bioinform/data_models/parsers.rb +0 -6
- data/spec/data_models/parser_spec.rb +0 -46
- data/spec/data_models/parsers/array_parser_spec.rb +0 -53
- data/spec/data_models/parsers/hash_parser_spec.rb +0 -60
- data/spec/data_models/parsers/string_fantom_parser_spec.rb +0 -38
- data/spec/data_models/parsers/string_parser_spec.rb +0 -156
data/spec/data_models/pm_spec.rb
CHANGED
@@ -3,96 +3,22 @@ require 'bioinform/data_models/pm'
|
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe PM do
|
6
|
-
include Parser::Helpers
|
7
|
-
|
8
6
|
describe '#valid?' do
|
9
7
|
it 'should be true iff an argument is an array of arrays of 4 numerics in a column' do
|
10
|
-
|
11
|
-
PM.new.instance_eval{@matrix =
|
12
|
-
PM.new.instance_eval{@matrix =
|
13
|
-
PM.new.instance_eval{@matrix = [
|
14
|
-
PM.new.instance_eval{@matrix = [[1,2,3],[1,4,6.5]]; self }.valid?.should be_false
|
15
|
-
PM.new.instance_eval{@matrix = [[1,2,
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
describe '#initialize' do
|
20
|
-
context 'when parser specified' do
|
21
|
-
before :each do
|
22
|
-
parser_stub :ParserBad, false, { matrix: [[0,0,0,0],[1,1,1,1]], name: 'Bad' }
|
23
|
-
parser_stub :ParserGood, true, { matrix: [[1,1,1,1],[1,1,1,1]], name: 'Good' }
|
24
|
-
parser_stub :ParserWithIncompleteOutput, true, { name: 'Without `matrix` key' }
|
25
|
-
parser_stub :ParserGoodWithoutName, true, { matrix: [[1,1,1,1],[1,1,1,1]] }
|
26
|
-
parser_stub :ParserWithInvalidMatrix, true, { matrix: [[1,1,1],[1,1,1]] }
|
27
|
-
end
|
28
|
-
after :each do
|
29
|
-
parser_subclasses_cleanup
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'should raise an ArgumentError if parser cannot parse input' do
|
33
|
-
expect{ PM.new('my stub input', ParserBad) }.to raise_error ArgumentError
|
34
|
-
end
|
35
|
-
it 'should raise an ArgumentError if parser output doesn\'t have `matrix` key' do
|
36
|
-
expect{ PM.new('my stub input', ParserWithIncompleteOutput) }.to raise_error ArgumentError
|
37
|
-
end
|
38
|
-
it 'should raise an ArgumentError if parser output has invalid matrix' do
|
39
|
-
expect{ PM.new('my stub input', ParserWithInvalidMatrix) }.to raise_error ArgumentError
|
40
|
-
end
|
41
|
-
|
42
|
-
context 'when parse was successful' do
|
43
|
-
it 'should load matrix from parser\'s resulting hash' do
|
44
|
-
pm = PM.new('my stub input', ParserGoodWithoutName)
|
45
|
-
pm.matrix.should == [[1,1,1,1],[1,1,1,1]]
|
46
|
-
pm.name.should be_nil
|
47
|
-
end
|
48
|
-
it 'should set other available attributes from parse resulting hash' do
|
49
|
-
pm = PM.new('my stub input', ParserGood)
|
50
|
-
pm.matrix.should == [[1,1,1,1],[1,1,1,1]]
|
51
|
-
pm.name.should == 'Good'
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
context 'when parser not specified' do
|
57
|
-
after :each do
|
58
|
-
parser_subclasses_cleanup
|
59
|
-
end
|
60
|
-
it 'should raise an ArgumentError if no one parser can parse input' do
|
61
|
-
parser_stub :ParserBad, false, { matrix: [[0,0,0,0],[1,1,1,1]], name: 'Bad' }
|
62
|
-
expect{ PM.new('my stub input') }.to raise_error ArgumentError
|
63
|
-
end
|
64
|
-
it 'should use first parsed which can parse input' do
|
65
|
-
parser_stub :ParserBad, false, { matrix: [[0,0,0,0],[1,1,1,1]], name: 'Bad' }
|
66
|
-
parser_stub :ParserGoodFirst, true, { matrix: [[1,1,1,1],[1,1,1,1]], name: 'GoodFirst' }
|
67
|
-
parser_stub :ParserGoodSecond, true, { matrix: [[1,1,1,1],[1,1,1,1]], name: 'GoodSecond' }
|
68
|
-
|
69
|
-
pm = PM.new('my stub input')
|
70
|
-
pm.name.should == 'GoodFirst'
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
describe '#matrix=' do
|
76
|
-
it 'should replace matrix if argument is a valid matrix' do
|
77
|
-
@pm = PM.new()
|
78
|
-
@pm.matrix.should be_nil
|
79
|
-
|
80
|
-
@pm.matrix = [[1,2,3,4],[1,4,5,6.5]]
|
81
|
-
@pm.matrix.should == [[1,2,3,4],[1,4,5,6.5]]
|
82
|
-
|
83
|
-
@pm.matrix = [[1,4,5,6.5], [2,2,2,2]]
|
84
|
-
@pm.matrix.should == [[1,4,5,6.5],[2,2,2,2]]
|
85
|
-
end
|
86
|
-
it 'should raise an exception if argument isn\'t valid matrix' do
|
87
|
-
@pm = PM.new
|
88
|
-
expect{ @pm.matrix = [[1,2,3,4],[1,4,5]] }.to raise_error
|
8
|
+
|
9
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3,4],[1,4,5,6.5]]; self }.valid?.should be_true
|
10
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]}; self }.valid?.should be_false
|
11
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}]; self }.valid?.should be_false
|
12
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3,4],[1,4,6.5]]; self }.valid?.should be_false
|
13
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3],[1,4,6.5]]; self }.valid?.should be_false
|
14
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,'3','4'],[1,'4','5',6.5]]; self }.valid?.should be_false
|
15
|
+
|
89
16
|
end
|
90
|
-
end
|
17
|
+
end
|
91
18
|
|
92
19
|
describe '#to_s' do
|
93
20
|
before :each do
|
94
|
-
@pm = PM.new
|
95
|
-
@pm.matrix = [[1,2,3,4],[1,4,5,6.5]]
|
21
|
+
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
96
22
|
end
|
97
23
|
it 'should return string with single-tabulated multiline matrix' do
|
98
24
|
@pm.to_s.should == "1\t2\t3\t4\n1\t4\t5\t6.5"
|
@@ -115,28 +41,52 @@ module Bioinform
|
|
115
41
|
end
|
116
42
|
|
117
43
|
describe '#pretty_string' do
|
118
|
-
it 'should
|
119
|
-
|
44
|
+
it 'should format string with 7-chars fields' do
|
45
|
+
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should == " A C G T \n 1.0 2.0 3.0 4.0\n 5.0 6.0 7.0 8.0"
|
46
|
+
end
|
47
|
+
it 'should return a string of floats formatted with spaces' do
|
48
|
+
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should match(/1.0 +2.0 +3.0 +4.0 *\n *5.0 +6.0 +7.0 +8.0/)
|
49
|
+
end
|
50
|
+
it 'should contain first string of ACGT letters' do
|
51
|
+
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.lines.first.should match(/A +C +G +T/)
|
52
|
+
end
|
53
|
+
it 'should round floats upto 3 digits' do
|
54
|
+
PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] ).pretty_string.should match(/1.1 +2.22 +3.333 +4.444 *\n *5.5 +6.66 +7.777 +8.889/)
|
55
|
+
end
|
56
|
+
|
120
57
|
context 'with name specified' do
|
121
|
-
|
58
|
+
before :each do
|
59
|
+
@pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
|
60
|
+
@pm.name = 'MyName'
|
61
|
+
end
|
62
|
+
it 'should contain name if parameter `with_name` isn\'t false' do
|
63
|
+
@pm.pretty_string.should match(/MyName\n/)
|
64
|
+
end
|
65
|
+
it 'should not contain name if parameter `with_name` is false' do
|
66
|
+
@pm.pretty_string(false).should_not match(/MyName\n/)
|
67
|
+
end
|
122
68
|
end
|
123
69
|
context 'without name specified' do
|
124
|
-
|
70
|
+
before :each do
|
71
|
+
@pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
|
72
|
+
end
|
73
|
+
it 'should not contain name whether parameter `with_name` is or isn\'t false' do
|
74
|
+
@pm.pretty_string.should_not match(/MyName\n/)
|
75
|
+
@pm.pretty_string(false).should_not match(/MyName\n/)
|
76
|
+
end
|
125
77
|
end
|
126
78
|
end
|
127
79
|
|
128
80
|
describe '#size' do
|
129
81
|
it 'should return number of positions' do
|
130
|
-
@pm = PM.new
|
131
|
-
@pm.matrix = [[1,2,3,4],[1,4,5,6.5]]
|
82
|
+
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
132
83
|
@pm.size.should == 2
|
133
84
|
end
|
134
85
|
end
|
135
86
|
|
136
87
|
describe '#to_hash' do
|
137
88
|
before :each do
|
138
|
-
@pm = PM.new
|
139
|
-
@pm.matrix = [[1,2,3,4],[1,4,5,6.5]]
|
89
|
+
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
140
90
|
@hsh = @pm.to_hash
|
141
91
|
end
|
142
92
|
it 'should return a hash with keys A, C, G, T' do
|
@@ -159,8 +109,7 @@ module Bioinform
|
|
159
109
|
|
160
110
|
describe '#background' do
|
161
111
|
before :each do
|
162
|
-
@pm = PM.new
|
163
|
-
@pm.matrix = [[1,2,3,4],[1,4,5,6.5]]
|
112
|
+
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
164
113
|
end
|
165
114
|
context 'when none arguments passed' do
|
166
115
|
context 'when pm just created' do
|
@@ -169,7 +118,7 @@ module Bioinform
|
|
169
118
|
end
|
170
119
|
end
|
171
120
|
it 'should return background' do
|
172
|
-
@pm.
|
121
|
+
@pm.background = [0.2, 0.3, 0.3, 0.2]
|
173
122
|
@pm.background.should == [0.2, 0.3, 0.3, 0.2]
|
174
123
|
end
|
175
124
|
end
|
@@ -181,15 +130,14 @@ module Bioinform
|
|
181
130
|
end
|
182
131
|
context 'when more than one argument passed' do
|
183
132
|
it 'should raise an ArgumentError' do
|
184
|
-
expect { @pm.background(:first, :second
|
133
|
+
expect { @pm.background(:first, :second) }.to raise_error ArgumentError
|
185
134
|
end
|
186
135
|
end
|
187
136
|
end
|
188
137
|
|
189
138
|
describe '#reverse_complement!' do
|
190
139
|
before :each do
|
191
|
-
@pm = PM.new
|
192
|
-
@pm.matrix = [[1, 2, 3, 4], [1, 4, 5, 6.5]]
|
140
|
+
@pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
|
193
141
|
end
|
194
142
|
it 'should return pm object itself' do
|
195
143
|
@pm.reverse_complement!.should be_equal(@pm)
|
@@ -202,8 +150,7 @@ module Bioinform
|
|
202
150
|
|
203
151
|
describe '#left_augment!' do
|
204
152
|
before :each do
|
205
|
-
@pm = PM.new
|
206
|
-
@pm.matrix = [[1, 2, 3, 4], [1, 4, 5, 6.5]]
|
153
|
+
@pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
|
207
154
|
end
|
208
155
|
it 'should return pm object itself' do
|
209
156
|
@pm.left_augment!(2).should be_equal(@pm)
|
@@ -216,8 +163,7 @@ module Bioinform
|
|
216
163
|
|
217
164
|
describe '#right_augment!' do
|
218
165
|
before :each do
|
219
|
-
@pm = PM.new
|
220
|
-
@pm.matrix = [[1, 2, 3, 4], [1, 4, 5, 6.5]]
|
166
|
+
@pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
|
221
167
|
end
|
222
168
|
it 'should return pm object itself' do
|
223
169
|
@pm.right_augment!(2).should be_equal(@pm)
|
@@ -230,8 +176,7 @@ module Bioinform
|
|
230
176
|
|
231
177
|
describe '#shift_to_zero!' do
|
232
178
|
before :each do
|
233
|
-
@pm = PM.new
|
234
|
-
@pm.matrix = [[1, 2, 3, 4], [5, 6.5, 3, 4]]
|
179
|
+
@pm = PM.new( [[1, 2, 3, 4], [5, 6.5, 3, 4]] )
|
235
180
|
end
|
236
181
|
it 'should return pm object itself' do
|
237
182
|
@pm.shift_to_zero!.should be_equal(@pm)
|
@@ -244,8 +189,7 @@ module Bioinform
|
|
244
189
|
|
245
190
|
describe '#discrete!' do
|
246
191
|
before :each do
|
247
|
-
@pm = PM.new
|
248
|
-
@pm.matrix = [[1.3, 2.0, 3.2, 4.9], [6.51, 6.5, 3.25, 4.633]]
|
192
|
+
@pm = PM.new( [[1.3, 2.0, 3.2, 4.9], [6.51, 6.5, 3.25, 4.633]] )
|
249
193
|
end
|
250
194
|
it 'should return pm object itself' do
|
251
195
|
@pm.discrete!(10).should be_equal(@pm)
|
@@ -262,28 +206,10 @@ module Bioinform
|
|
262
206
|
end
|
263
207
|
end
|
264
208
|
|
265
|
-
describe '#background_sum' do
|
266
|
-
before :each do
|
267
|
-
@pm = PM.new
|
268
|
-
@pm.matrix = [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6]]
|
269
|
-
end
|
270
|
-
context 'when background is [1,1,1,1]' do
|
271
|
-
it 'should be 4' do
|
272
|
-
@pm.background_sum.should == 4
|
273
|
-
end
|
274
|
-
end
|
275
|
-
it 'should be sum of background' do
|
276
|
-
@pm.background( [0.2, 0.3, 0.3, 0.2] )
|
277
|
-
@pm.background_sum.should == 1.0
|
278
|
-
end
|
279
|
-
end
|
280
|
-
|
281
209
|
describe '#vocabulary_volume' do
|
282
210
|
before :each do
|
283
|
-
@pm_2_positions = PM.new
|
284
|
-
@
|
285
|
-
@pm_3_positions = PM.new
|
286
|
-
@pm_3_positions.matrix = [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6], [1, 2, 3, 4]]
|
211
|
+
@pm_2_positions = PM.new( [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6]] )
|
212
|
+
@pm_3_positions = PM.new( [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6], [1, 2, 3, 4]] )
|
287
213
|
end
|
288
214
|
context 'when background is [1,1,1,1]' do
|
289
215
|
it 'should be equal to number of words' do
|
@@ -294,33 +220,30 @@ module Bioinform
|
|
294
220
|
context 'when background is normalized probabilities' do
|
295
221
|
it 'should be 1.0' do
|
296
222
|
@pm_2_positions.background( [0.2, 0.3, 0.3, 0.2] )
|
297
|
-
@pm_2_positions.
|
223
|
+
@pm_2_positions.vocabulary_volume.should == 1.0
|
298
224
|
|
299
225
|
@pm_3_positions.background( [0.2, 0.3, 0.3, 0.2] )
|
300
|
-
@pm_3_positions.
|
226
|
+
@pm_3_positions.vocabulary_volume.should == 1.0
|
301
227
|
end
|
302
228
|
end
|
303
229
|
end
|
304
230
|
|
305
231
|
describe '#best_score' do
|
306
232
|
it 'should be equal to best score' do
|
307
|
-
@pm = PM.new
|
308
|
-
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
233
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
309
234
|
@pm.best_score.should == 4.9 + 7.13 + (-1.0)
|
310
235
|
end
|
311
236
|
end
|
312
237
|
describe '#worst_score' do
|
313
238
|
it 'should be equal to worst score' do
|
314
|
-
@pm = PM.new
|
315
|
-
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
239
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
316
240
|
@pm.worst_score.should == 1.3 + 3.25 + (-1.5)
|
317
241
|
end
|
318
242
|
end
|
319
243
|
|
320
244
|
describe '#best_suffix' do
|
321
245
|
it 'should return maximal score of suffices from i-th position inclusively i.e. [i..end]' do
|
322
|
-
@pm = PM.new
|
323
|
-
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
246
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
324
247
|
@pm.best_suffix(0).should == (4.9 + 7.13 - 1.0)
|
325
248
|
@pm.best_suffix(1).should == (7.13 - 1.0)
|
326
249
|
@pm.best_suffix(2).should == (-1.0)
|
@@ -329,8 +252,7 @@ module Bioinform
|
|
329
252
|
end
|
330
253
|
describe '#worst_suffix' do
|
331
254
|
it 'should return minimal score of suffices from i-th position inclusively i.e. [i..end]' do
|
332
|
-
@pm = PM.new
|
333
|
-
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
255
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
334
256
|
@pm.worst_suffix(0).should == (1.3 + 3.25 - 1.5)
|
335
257
|
@pm.worst_suffix(1).should == (3.25 - 1.5)
|
336
258
|
@pm.worst_suffix(2).should == (- 1.5)
|
@@ -341,10 +263,8 @@ module Bioinform
|
|
341
263
|
[:shift_to_zero, :reverse_complement].each do |meth|
|
342
264
|
describe "nonbang method #{meth}" do
|
343
265
|
before :each do
|
344
|
-
@pm = PM.new
|
345
|
-
@
|
346
|
-
@pm_2 = PM.new
|
347
|
-
@pm_2.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
266
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
267
|
+
@pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
348
268
|
end
|
349
269
|
it 'should return copy of object not object itself' do
|
350
270
|
@pm.send(meth).should_not be_equal @pm
|
@@ -358,10 +278,8 @@ module Bioinform
|
|
358
278
|
[:discrete , :left_augment, :right_augment].each do |meth|
|
359
279
|
describe "nonbang method #{meth}" do
|
360
280
|
before :each do
|
361
|
-
@pm = PM.new
|
362
|
-
@
|
363
|
-
@pm_2 = PM.new
|
364
|
-
@pm_2.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
281
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
282
|
+
@pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
365
283
|
end
|
366
284
|
it 'should return copy of object not object itself' do
|
367
285
|
@pm.send(meth, 2).should_not be_equal @pm
|
@@ -5,13 +5,11 @@ module Bioinform
|
|
5
5
|
describe PWM do
|
6
6
|
describe '#score_mean' do
|
7
7
|
it 'should be equal to a mean score of pwm' do
|
8
|
-
pwm = PWM.new
|
9
|
-
pwm.matrix = [[1,2,1,2],[4,6,8,6],[2,2,2,2]]
|
8
|
+
pwm = PWM.new( [[1,2,1,2],[4,6,8,6],[2,2,2,2]] )
|
10
9
|
pwm.score_mean.should == 1.5 + 6 + 2
|
11
10
|
end
|
12
11
|
it 'should be equal to a mean score of pwm by measure induced from background probability mean' do
|
13
|
-
pwm = PWM.new.background([0.2, 0.3, 0.3, 0.2])
|
14
|
-
pwm.matrix = [[1,2,1,2],[4,6,8,6],[2,2,2,2]]
|
12
|
+
pwm = PWM.new( [[1,2,1,2],[4,6,8,6],[2,2,2,2]] ).background([0.2, 0.3, 0.3, 0.2])
|
15
13
|
pwm.score_mean.should == ((0.2*1+0.3*2+0.3*1+0.2*2) + (0.2*4+0.3*6+0.3*8+0.2*6) + (0.2*2+0.3*2+0.3*2+0.2*2)) / (0.2+0.3+0.3+0.2)
|
16
14
|
end
|
17
15
|
end
|
@@ -24,9 +22,7 @@ module Bioinform
|
|
24
22
|
|
25
23
|
describe '#score' do
|
26
24
|
let(:pwm) do
|
27
|
-
|
28
|
-
pwm.matrix = [[10000,20000,30000,40000],[1000,2000,3000,4000],[100,200,300,400],[10,20,30,40],[1,2,3,4]]
|
29
|
-
pwm
|
25
|
+
PWM.new( [[10000,20000,30000,40000],[1000,2000,3000,4000],[100,200,300,400],[10,20,30,40],[1,2,3,4]] )
|
30
26
|
end
|
31
27
|
it 'should evaluate to score of given word' do
|
32
28
|
pwm.score('aAAAA').should == 11111
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/parsers/parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
describe Parser do
|
6
|
+
good_cases = {
|
7
|
+
'Array Nx4' => {input: [[0,1,2,3],[10,11,12,13]],
|
8
|
+
matrix: [[0,1,2,3],[10,11,12,13]] },
|
9
|
+
|
10
|
+
'Array 4xN' => {input: [[0,10],[1,11],[2,12],[3,13]],
|
11
|
+
matrix: [[0,1,2,3],[10,11,12,13]] },
|
12
|
+
|
13
|
+
'Hash A,C,G,T => Arrays' => { input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13]},
|
14
|
+
matrix: [[0,1,2,3],[10,11,12,13]] },
|
15
|
+
|
16
|
+
'Hash array of hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13}],
|
17
|
+
matrix: [[0,1,2,3],[10,11,12,13]] },
|
18
|
+
|
19
|
+
'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]],
|
20
|
+
matrix: [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]] },
|
21
|
+
|
22
|
+
'Hash A,C,G,T => 4-Arrays' => { input: {:A => [0,10,100,1000], :c => [1,11,101,1001],'g' => [2,12,102,1002],'T' => [3,13,103,1003]},
|
23
|
+
matrix: [[0,1,2,3],[10,11,12,13],[100,101,102,103],[1000,1001,1002,1003]] },
|
24
|
+
|
25
|
+
'4-Arrays of A,C,G,T hashes' => { input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
|
26
|
+
{:A => 10, :c => 11, 'g' => 12, 'T' => 13},
|
27
|
+
{:A => 100, :c => 101, 'g' => 102, 'T' => 103},
|
28
|
+
{:A => 1000, :c => 1001, 'g' => 1002, 'T' => 1003}],
|
29
|
+
matrix: [[0,1,2,3],[10,11,12,13],[100,101,102,103],[1000,1001,1002,1003]] }
|
30
|
+
}
|
31
|
+
|
32
|
+
bad_cases = {
|
33
|
+
'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
|
34
|
+
|
35
|
+
'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
|
36
|
+
|
37
|
+
'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
|
38
|
+
|
39
|
+
'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
|
40
|
+
{:A => 10, :c => 11, 'g' => 12}] },
|
41
|
+
'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
|
42
|
+
{:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
|
43
|
+
|
44
|
+
'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3},
|
45
|
+
{:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
|
46
|
+
|
47
|
+
'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
|
48
|
+
|
49
|
+
'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
|
50
|
+
|
51
|
+
'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
|
52
|
+
|
53
|
+
'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
|
54
|
+
}
|
55
|
+
|
56
|
+
parser_specs(Parser, good_cases, bad_cases)
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/parsers/string_fantom_parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
describe StringFantomParser do
|
6
|
+
good_cases = {
|
7
|
+
'string in Fantom-format' => {input: "
|
8
|
+
NA motif_CTNCAG
|
9
|
+
P0 A C G T
|
10
|
+
P1 0 1878368 0 0
|
11
|
+
P2 0 0 0 1878368
|
12
|
+
P3 469592 469592 469592 469592
|
13
|
+
P4 0 1878368 0 0
|
14
|
+
P5 1878368 0 0 0
|
15
|
+
P6 0 0 1878368 0",
|
16
|
+
matrix: [[0.0, 1878368.0, 0.0, 0.0],
|
17
|
+
[0.0, 0.0, 0.0, 1878368.0],
|
18
|
+
[469592.0, 469592.0, 469592.0, 469592.0],
|
19
|
+
[0.0, 1878368.0, 0.0, 0.0],
|
20
|
+
[1878368.0, 0.0, 0.0, 0.0],
|
21
|
+
[0.0, 0.0, 1878368.0, 0.0]] }
|
22
|
+
}
|
23
|
+
|
24
|
+
bad_cases = { }
|
25
|
+
|
26
|
+
parser_specs(StringFantomParser, good_cases, bad_cases)
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/parsers/string_parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
describe StringParser do
|
6
|
+
good_cases = {
|
7
|
+
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
|
8
|
+
matrix: [[1,2,3,4],[5,6,7,8]] },
|
9
|
+
|
10
|
+
'4xN string' => {input: "1 5\n2 6\n3 7\n 4 8",
|
11
|
+
matrix: [[1,2,3,4],[5,6,7,8]] },
|
12
|
+
|
13
|
+
'string with name' => {input: "TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
14
|
+
matrix: [[1,2,3,4],[5,6,7,8]], name: 'TestMatrix' },
|
15
|
+
|
16
|
+
'string with name (with introduction sign)' => {input: ">\t TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
17
|
+
matrix: [[1,2,3,4],[5,6,7,8]],
|
18
|
+
name: 'TestMatrix' },
|
19
|
+
|
20
|
+
'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
|
21
|
+
matrix: [[1,2,3,4],[5,6,7,8]], name: 'Testmatrix_first:subname+sub-subname' },
|
22
|
+
|
23
|
+
'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210",
|
24
|
+
matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
|
25
|
+
|
26
|
+
'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3",
|
27
|
+
matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
|
28
|
+
|
29
|
+
'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8",
|
30
|
+
matrix: [[1,2,3,4],[5,6,7,8]] },
|
31
|
+
|
32
|
+
'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n",
|
33
|
+
matrix: [[1,2,3,4],[5,6,7,8]] },
|
34
|
+
|
35
|
+
'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8",
|
36
|
+
matrix: [[1,2,3,4],[5,6,7,8]] }
|
37
|
+
}
|
38
|
+
|
39
|
+
bad_cases = {
|
40
|
+
'string with non-numeric input' => {input: "1.23 4.56 78aaa 9.0\n9 -8.7 6.54 -3210" },
|
41
|
+
'string with empty exponent sign' => {input: "1.23 4.56 7.8 9.0\n 9e -8.7 6.54 3210" }
|
42
|
+
}
|
43
|
+
|
44
|
+
parser_specs(StringParser, good_cases, bad_cases)
|
45
|
+
end
|
46
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -2,27 +2,19 @@ $LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
|
|
2
2
|
$LOAD_PATH.unshift File.dirname(__FILE__)
|
3
3
|
|
4
4
|
require 'rspec'
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
Parser.subclasses.each do |klass|
|
19
|
-
#class_levels = klass.to_s.split('::')
|
20
|
-
#class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
|
21
|
-
|
22
|
-
Bioinform.send :remove_const, klass.name.split('::').last
|
23
|
-
end
|
24
|
-
Parser.subclasses.clear
|
25
|
-
end
|
5
|
+
|
6
|
+
def parser_specs(parser_klass, good_cases, bad_cases)
|
7
|
+
good_cases.each do |case_description, input_and_result|
|
8
|
+
it "should be able to parse #{case_description}" do
|
9
|
+
result = parser_klass.new(input_and_result[:input]).parse
|
10
|
+
result[:matrix].should == input_and_result[:matrix]
|
11
|
+
result[:name].should == input_and_result[:name] if input_and_result.has_key?(:name)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
bad_cases.each do |case_description, input|
|
16
|
+
it "should fail silently returning {} on parsing #{case_description}" do
|
17
|
+
parser_klass.new(input[:input]).parse.should == {}
|
26
18
|
end
|
27
19
|
end
|
28
20
|
end
|
@@ -3,9 +3,17 @@ require 'bioinform/support/multiline_squish'
|
|
3
3
|
|
4
4
|
describe String do
|
5
5
|
describe '#multiline_squish' do
|
6
|
-
it 'should replace multiple spaces with one space'
|
7
|
-
|
8
|
-
|
9
|
-
it 'should
|
6
|
+
it 'should replace multiple spaces with one space' do
|
7
|
+
"abc def ghi\n jk lmn".multiline_squish.should == "abc def ghi\njk lmn"
|
8
|
+
end
|
9
|
+
it 'should replace tabs with a space' do
|
10
|
+
"abc\tdef ghi \t jk".multiline_squish.should == 'abc def ghi jk'
|
11
|
+
end
|
12
|
+
it 'should replace \r\n with \n' do
|
13
|
+
"abc def ghi\r\njk lmn".multiline_squish.should == "abc def ghi\njk lmn"
|
14
|
+
end
|
15
|
+
it 'should preserve rows pagination' do
|
16
|
+
"abc def ghi\njk lmn".multiline_squish.should == "abc def ghi\njk lmn"
|
17
|
+
end
|
10
18
|
end
|
11
19
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-31 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -59,16 +59,14 @@ files:
|
|
59
59
|
- lib/bioinform.rb
|
60
60
|
- lib/bioinform/data_models.rb
|
61
61
|
- lib/bioinform/data_models/collection.rb
|
62
|
-
- lib/bioinform/data_models/parser.rb
|
63
|
-
- lib/bioinform/data_models/parsers.rb
|
64
|
-
- lib/bioinform/data_models/parsers/array_parser.rb
|
65
|
-
- lib/bioinform/data_models/parsers/hash_parser.rb
|
66
|
-
- lib/bioinform/data_models/parsers/string_fantom_parser.rb
|
67
|
-
- lib/bioinform/data_models/parsers/string_parser.rb
|
68
62
|
- lib/bioinform/data_models/pcm.rb
|
69
63
|
- lib/bioinform/data_models/pm.rb
|
70
64
|
- lib/bioinform/data_models/ppm.rb
|
71
65
|
- lib/bioinform/data_models/pwm.rb
|
66
|
+
- lib/bioinform/parsers.rb
|
67
|
+
- lib/bioinform/parsers/parser.rb
|
68
|
+
- lib/bioinform/parsers/string_fantom_parser.rb
|
69
|
+
- lib/bioinform/parsers/string_parser.rb
|
72
70
|
- lib/bioinform/support.rb
|
73
71
|
- lib/bioinform/support/array_product.rb
|
74
72
|
- lib/bioinform/support/array_zip.rb
|
@@ -83,13 +81,12 @@ files:
|
|
83
81
|
- lib/bioinform/support/same_by.rb
|
84
82
|
- lib/bioinform/support/yaml_dump_file.rb
|
85
83
|
- lib/bioinform/version.rb
|
86
|
-
- spec/data_models/
|
87
|
-
- spec/data_models/parsers/array_parser_spec.rb
|
88
|
-
- spec/data_models/parsers/hash_parser_spec.rb
|
89
|
-
- spec/data_models/parsers/string_fantom_parser_spec.rb
|
90
|
-
- spec/data_models/parsers/string_parser_spec.rb
|
84
|
+
- spec/data_models/pcm_spec.rb
|
91
85
|
- spec/data_models/pm_spec.rb
|
92
86
|
- spec/data_models/pwm_spec.rb
|
87
|
+
- spec/parsers/parser_spec.rb
|
88
|
+
- spec/parsers/string_fantom_parser_spec.rb
|
89
|
+
- spec/parsers/string_parser_spec.rb
|
93
90
|
- spec/spec_helper.rb
|
94
91
|
- spec/support/array_product_spec.rb
|
95
92
|
- spec/support/array_zip_spec.rb
|
@@ -129,13 +126,12 @@ summary: Classes for work with different input formats of positional matrices an
|
|
129
126
|
several useful extensions for Enumerable module like parametric map and callable
|
130
127
|
symbols
|
131
128
|
test_files:
|
132
|
-
- spec/data_models/
|
133
|
-
- spec/data_models/parsers/array_parser_spec.rb
|
134
|
-
- spec/data_models/parsers/hash_parser_spec.rb
|
135
|
-
- spec/data_models/parsers/string_fantom_parser_spec.rb
|
136
|
-
- spec/data_models/parsers/string_parser_spec.rb
|
129
|
+
- spec/data_models/pcm_spec.rb
|
137
130
|
- spec/data_models/pm_spec.rb
|
138
131
|
- spec/data_models/pwm_spec.rb
|
132
|
+
- spec/parsers/parser_spec.rb
|
133
|
+
- spec/parsers/string_fantom_parser_spec.rb
|
134
|
+
- spec/parsers/string_parser_spec.rb
|
139
135
|
- spec/spec_helper.rb
|
140
136
|
- spec/support/array_product_spec.rb
|
141
137
|
- spec/support/array_zip_spec.rb
|