bioinform 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bioinform/data_models/pcm.rb +12 -0
- data/lib/bioinform/data_models/pm.rb +38 -54
- data/lib/bioinform/data_models/pwm.rb +7 -7
- data/lib/bioinform/data_models.rb +2 -0
- data/lib/bioinform/parsers/parser.rb +40 -0
- data/lib/bioinform/{data_models/parsers → parsers}/string_fantom_parser.rb +1 -2
- data/lib/bioinform/{data_models/parsers → parsers}/string_parser.rb +7 -7
- data/lib/bioinform/parsers.rb +3 -0
- data/lib/bioinform/support/partial_sums.rb +2 -0
- data/lib/bioinform/version.rb +1 -1
- data/lib/bioinform.rb +0 -1
- data/spec/data_models/pcm_spec.rb +27 -0
- data/spec/data_models/pm_spec.rb +62 -144
- data/spec/data_models/pwm_spec.rb +3 -7
- data/spec/parsers/parser_spec.rb +58 -0
- data/spec/parsers/string_fantom_parser_spec.rb +28 -0
- data/spec/parsers/string_parser_spec.rb +46 -0
- data/spec/spec_helper.rb +13 -21
- data/spec/support/multiline_squish_spec.rb +12 -4
- data/spec/support/partial_sums_spec.rb +3 -0
- metadata +14 -18
- data/lib/bioinform/data_models/parser.rb +0 -38
- data/lib/bioinform/data_models/parsers/array_parser.rb +0 -17
- data/lib/bioinform/data_models/parsers/hash_parser.rb +0 -19
- data/lib/bioinform/data_models/parsers.rb +0 -6
- data/spec/data_models/parser_spec.rb +0 -46
- data/spec/data_models/parsers/array_parser_spec.rb +0 -53
- data/spec/data_models/parsers/hash_parser_spec.rb +0 -60
- data/spec/data_models/parsers/string_fantom_parser_spec.rb +0 -38
- data/spec/data_models/parsers/string_parser_spec.rb +0 -156
data/spec/data_models/pm_spec.rb
CHANGED
@@ -3,96 +3,22 @@ require 'bioinform/data_models/pm'
|
|
3
3
|
|
4
4
|
module Bioinform
|
5
5
|
describe PM do
|
6
|
-
include Parser::Helpers
|
7
|
-
|
8
6
|
describe '#valid?' do
|
9
7
|
it 'should be true iff an argument is an array of arrays of 4 numerics in a column' do
|
10
|
-
|
11
|
-
PM.new.instance_eval{@matrix =
|
12
|
-
PM.new.instance_eval{@matrix =
|
13
|
-
PM.new.instance_eval{@matrix = [
|
14
|
-
PM.new.instance_eval{@matrix = [[1,2,3],[1,4,6.5]]; self }.valid?.should be_false
|
15
|
-
PM.new.instance_eval{@matrix = [[1,2,
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
describe '#initialize' do
|
20
|
-
context 'when parser specified' do
|
21
|
-
before :each do
|
22
|
-
parser_stub :ParserBad, false, { matrix: [[0,0,0,0],[1,1,1,1]], name: 'Bad' }
|
23
|
-
parser_stub :ParserGood, true, { matrix: [[1,1,1,1],[1,1,1,1]], name: 'Good' }
|
24
|
-
parser_stub :ParserWithIncompleteOutput, true, { name: 'Without `matrix` key' }
|
25
|
-
parser_stub :ParserGoodWithoutName, true, { matrix: [[1,1,1,1],[1,1,1,1]] }
|
26
|
-
parser_stub :ParserWithInvalidMatrix, true, { matrix: [[1,1,1],[1,1,1]] }
|
27
|
-
end
|
28
|
-
after :each do
|
29
|
-
parser_subclasses_cleanup
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'should raise an ArgumentError if parser cannot parse input' do
|
33
|
-
expect{ PM.new('my stub input', ParserBad) }.to raise_error ArgumentError
|
34
|
-
end
|
35
|
-
it 'should raise an ArgumentError if parser output doesn\'t have `matrix` key' do
|
36
|
-
expect{ PM.new('my stub input', ParserWithIncompleteOutput) }.to raise_error ArgumentError
|
37
|
-
end
|
38
|
-
it 'should raise an ArgumentError if parser output has invalid matrix' do
|
39
|
-
expect{ PM.new('my stub input', ParserWithInvalidMatrix) }.to raise_error ArgumentError
|
40
|
-
end
|
41
|
-
|
42
|
-
context 'when parse was successful' do
|
43
|
-
it 'should load matrix from parser\'s resulting hash' do
|
44
|
-
pm = PM.new('my stub input', ParserGoodWithoutName)
|
45
|
-
pm.matrix.should == [[1,1,1,1],[1,1,1,1]]
|
46
|
-
pm.name.should be_nil
|
47
|
-
end
|
48
|
-
it 'should set other available attributes from parse resulting hash' do
|
49
|
-
pm = PM.new('my stub input', ParserGood)
|
50
|
-
pm.matrix.should == [[1,1,1,1],[1,1,1,1]]
|
51
|
-
pm.name.should == 'Good'
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
context 'when parser not specified' do
|
57
|
-
after :each do
|
58
|
-
parser_subclasses_cleanup
|
59
|
-
end
|
60
|
-
it 'should raise an ArgumentError if no one parser can parse input' do
|
61
|
-
parser_stub :ParserBad, false, { matrix: [[0,0,0,0],[1,1,1,1]], name: 'Bad' }
|
62
|
-
expect{ PM.new('my stub input') }.to raise_error ArgumentError
|
63
|
-
end
|
64
|
-
it 'should use first parsed which can parse input' do
|
65
|
-
parser_stub :ParserBad, false, { matrix: [[0,0,0,0],[1,1,1,1]], name: 'Bad' }
|
66
|
-
parser_stub :ParserGoodFirst, true, { matrix: [[1,1,1,1],[1,1,1,1]], name: 'GoodFirst' }
|
67
|
-
parser_stub :ParserGoodSecond, true, { matrix: [[1,1,1,1],[1,1,1,1]], name: 'GoodSecond' }
|
68
|
-
|
69
|
-
pm = PM.new('my stub input')
|
70
|
-
pm.name.should == 'GoodFirst'
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
describe '#matrix=' do
|
76
|
-
it 'should replace matrix if argument is a valid matrix' do
|
77
|
-
@pm = PM.new()
|
78
|
-
@pm.matrix.should be_nil
|
79
|
-
|
80
|
-
@pm.matrix = [[1,2,3,4],[1,4,5,6.5]]
|
81
|
-
@pm.matrix.should == [[1,2,3,4],[1,4,5,6.5]]
|
82
|
-
|
83
|
-
@pm.matrix = [[1,4,5,6.5], [2,2,2,2]]
|
84
|
-
@pm.matrix.should == [[1,4,5,6.5],[2,2,2,2]]
|
85
|
-
end
|
86
|
-
it 'should raise an exception if argument isn\'t valid matrix' do
|
87
|
-
@pm = PM.new
|
88
|
-
expect{ @pm.matrix = [[1,2,3,4],[1,4,5]] }.to raise_error
|
8
|
+
|
9
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3,4],[1,4,5,6.5]]; self }.valid?.should be_true
|
10
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]}; self }.valid?.should be_false
|
11
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}]; self }.valid?.should be_false
|
12
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3,4],[1,4,6.5]]; self }.valid?.should be_false
|
13
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,3],[1,4,6.5]]; self }.valid?.should be_false
|
14
|
+
PM.new([[0,0,0,0]]).instance_eval{@matrix = [[1,2,'3','4'],[1,'4','5',6.5]]; self }.valid?.should be_false
|
15
|
+
|
89
16
|
end
|
90
|
-
end
|
17
|
+
end
|
91
18
|
|
92
19
|
describe '#to_s' do
|
93
20
|
before :each do
|
94
|
-
@pm = PM.new
|
95
|
-
@pm.matrix = [[1,2,3,4],[1,4,5,6.5]]
|
21
|
+
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
96
22
|
end
|
97
23
|
it 'should return string with single-tabulated multiline matrix' do
|
98
24
|
@pm.to_s.should == "1\t2\t3\t4\n1\t4\t5\t6.5"
|
@@ -115,28 +41,52 @@ module Bioinform
|
|
115
41
|
end
|
116
42
|
|
117
43
|
describe '#pretty_string' do
|
118
|
-
it 'should
|
119
|
-
|
44
|
+
it 'should format string with 7-chars fields' do
|
45
|
+
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should == " A C G T \n 1.0 2.0 3.0 4.0\n 5.0 6.0 7.0 8.0"
|
46
|
+
end
|
47
|
+
it 'should return a string of floats formatted with spaces' do
|
48
|
+
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should match(/1.0 +2.0 +3.0 +4.0 *\n *5.0 +6.0 +7.0 +8.0/)
|
49
|
+
end
|
50
|
+
it 'should contain first string of ACGT letters' do
|
51
|
+
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.lines.first.should match(/A +C +G +T/)
|
52
|
+
end
|
53
|
+
it 'should round floats upto 3 digits' do
|
54
|
+
PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] ).pretty_string.should match(/1.1 +2.22 +3.333 +4.444 *\n *5.5 +6.66 +7.777 +8.889/)
|
55
|
+
end
|
56
|
+
|
120
57
|
context 'with name specified' do
|
121
|
-
|
58
|
+
before :each do
|
59
|
+
@pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
|
60
|
+
@pm.name = 'MyName'
|
61
|
+
end
|
62
|
+
it 'should contain name if parameter `with_name` isn\'t false' do
|
63
|
+
@pm.pretty_string.should match(/MyName\n/)
|
64
|
+
end
|
65
|
+
it 'should not contain name if parameter `with_name` is false' do
|
66
|
+
@pm.pretty_string(false).should_not match(/MyName\n/)
|
67
|
+
end
|
122
68
|
end
|
123
69
|
context 'without name specified' do
|
124
|
-
|
70
|
+
before :each do
|
71
|
+
@pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
|
72
|
+
end
|
73
|
+
it 'should not contain name whether parameter `with_name` is or isn\'t false' do
|
74
|
+
@pm.pretty_string.should_not match(/MyName\n/)
|
75
|
+
@pm.pretty_string(false).should_not match(/MyName\n/)
|
76
|
+
end
|
125
77
|
end
|
126
78
|
end
|
127
79
|
|
128
80
|
describe '#size' do
|
129
81
|
it 'should return number of positions' do
|
130
|
-
@pm = PM.new
|
131
|
-
@pm.matrix = [[1,2,3,4],[1,4,5,6.5]]
|
82
|
+
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
132
83
|
@pm.size.should == 2
|
133
84
|
end
|
134
85
|
end
|
135
86
|
|
136
87
|
describe '#to_hash' do
|
137
88
|
before :each do
|
138
|
-
@pm = PM.new
|
139
|
-
@pm.matrix = [[1,2,3,4],[1,4,5,6.5]]
|
89
|
+
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
140
90
|
@hsh = @pm.to_hash
|
141
91
|
end
|
142
92
|
it 'should return a hash with keys A, C, G, T' do
|
@@ -159,8 +109,7 @@ module Bioinform
|
|
159
109
|
|
160
110
|
describe '#background' do
|
161
111
|
before :each do
|
162
|
-
@pm = PM.new
|
163
|
-
@pm.matrix = [[1,2,3,4],[1,4,5,6.5]]
|
112
|
+
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
164
113
|
end
|
165
114
|
context 'when none arguments passed' do
|
166
115
|
context 'when pm just created' do
|
@@ -169,7 +118,7 @@ module Bioinform
|
|
169
118
|
end
|
170
119
|
end
|
171
120
|
it 'should return background' do
|
172
|
-
@pm.
|
121
|
+
@pm.background = [0.2, 0.3, 0.3, 0.2]
|
173
122
|
@pm.background.should == [0.2, 0.3, 0.3, 0.2]
|
174
123
|
end
|
175
124
|
end
|
@@ -181,15 +130,14 @@ module Bioinform
|
|
181
130
|
end
|
182
131
|
context 'when more than one argument passed' do
|
183
132
|
it 'should raise an ArgumentError' do
|
184
|
-
expect { @pm.background(:first, :second
|
133
|
+
expect { @pm.background(:first, :second) }.to raise_error ArgumentError
|
185
134
|
end
|
186
135
|
end
|
187
136
|
end
|
188
137
|
|
189
138
|
describe '#reverse_complement!' do
|
190
139
|
before :each do
|
191
|
-
@pm = PM.new
|
192
|
-
@pm.matrix = [[1, 2, 3, 4], [1, 4, 5, 6.5]]
|
140
|
+
@pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
|
193
141
|
end
|
194
142
|
it 'should return pm object itself' do
|
195
143
|
@pm.reverse_complement!.should be_equal(@pm)
|
@@ -202,8 +150,7 @@ module Bioinform
|
|
202
150
|
|
203
151
|
describe '#left_augment!' do
|
204
152
|
before :each do
|
205
|
-
@pm = PM.new
|
206
|
-
@pm.matrix = [[1, 2, 3, 4], [1, 4, 5, 6.5]]
|
153
|
+
@pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
|
207
154
|
end
|
208
155
|
it 'should return pm object itself' do
|
209
156
|
@pm.left_augment!(2).should be_equal(@pm)
|
@@ -216,8 +163,7 @@ module Bioinform
|
|
216
163
|
|
217
164
|
describe '#right_augment!' do
|
218
165
|
before :each do
|
219
|
-
@pm = PM.new
|
220
|
-
@pm.matrix = [[1, 2, 3, 4], [1, 4, 5, 6.5]]
|
166
|
+
@pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
|
221
167
|
end
|
222
168
|
it 'should return pm object itself' do
|
223
169
|
@pm.right_augment!(2).should be_equal(@pm)
|
@@ -230,8 +176,7 @@ module Bioinform
|
|
230
176
|
|
231
177
|
describe '#shift_to_zero!' do
|
232
178
|
before :each do
|
233
|
-
@pm = PM.new
|
234
|
-
@pm.matrix = [[1, 2, 3, 4], [5, 6.5, 3, 4]]
|
179
|
+
@pm = PM.new( [[1, 2, 3, 4], [5, 6.5, 3, 4]] )
|
235
180
|
end
|
236
181
|
it 'should return pm object itself' do
|
237
182
|
@pm.shift_to_zero!.should be_equal(@pm)
|
@@ -244,8 +189,7 @@ module Bioinform
|
|
244
189
|
|
245
190
|
describe '#discrete!' do
|
246
191
|
before :each do
|
247
|
-
@pm = PM.new
|
248
|
-
@pm.matrix = [[1.3, 2.0, 3.2, 4.9], [6.51, 6.5, 3.25, 4.633]]
|
192
|
+
@pm = PM.new( [[1.3, 2.0, 3.2, 4.9], [6.51, 6.5, 3.25, 4.633]] )
|
249
193
|
end
|
250
194
|
it 'should return pm object itself' do
|
251
195
|
@pm.discrete!(10).should be_equal(@pm)
|
@@ -262,28 +206,10 @@ module Bioinform
|
|
262
206
|
end
|
263
207
|
end
|
264
208
|
|
265
|
-
describe '#background_sum' do
|
266
|
-
before :each do
|
267
|
-
@pm = PM.new
|
268
|
-
@pm.matrix = [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6]]
|
269
|
-
end
|
270
|
-
context 'when background is [1,1,1,1]' do
|
271
|
-
it 'should be 4' do
|
272
|
-
@pm.background_sum.should == 4
|
273
|
-
end
|
274
|
-
end
|
275
|
-
it 'should be sum of background' do
|
276
|
-
@pm.background( [0.2, 0.3, 0.3, 0.2] )
|
277
|
-
@pm.background_sum.should == 1.0
|
278
|
-
end
|
279
|
-
end
|
280
|
-
|
281
209
|
describe '#vocabulary_volume' do
|
282
210
|
before :each do
|
283
|
-
@pm_2_positions = PM.new
|
284
|
-
@
|
285
|
-
@pm_3_positions = PM.new
|
286
|
-
@pm_3_positions.matrix = [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6], [1, 2, 3, 4]]
|
211
|
+
@pm_2_positions = PM.new( [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6]] )
|
212
|
+
@pm_3_positions = PM.new( [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6], [1, 2, 3, 4]] )
|
287
213
|
end
|
288
214
|
context 'when background is [1,1,1,1]' do
|
289
215
|
it 'should be equal to number of words' do
|
@@ -294,33 +220,30 @@ module Bioinform
|
|
294
220
|
context 'when background is normalized probabilities' do
|
295
221
|
it 'should be 1.0' do
|
296
222
|
@pm_2_positions.background( [0.2, 0.3, 0.3, 0.2] )
|
297
|
-
@pm_2_positions.
|
223
|
+
@pm_2_positions.vocabulary_volume.should == 1.0
|
298
224
|
|
299
225
|
@pm_3_positions.background( [0.2, 0.3, 0.3, 0.2] )
|
300
|
-
@pm_3_positions.
|
226
|
+
@pm_3_positions.vocabulary_volume.should == 1.0
|
301
227
|
end
|
302
228
|
end
|
303
229
|
end
|
304
230
|
|
305
231
|
describe '#best_score' do
|
306
232
|
it 'should be equal to best score' do
|
307
|
-
@pm = PM.new
|
308
|
-
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
233
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
309
234
|
@pm.best_score.should == 4.9 + 7.13 + (-1.0)
|
310
235
|
end
|
311
236
|
end
|
312
237
|
describe '#worst_score' do
|
313
238
|
it 'should be equal to worst score' do
|
314
|
-
@pm = PM.new
|
315
|
-
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
239
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
316
240
|
@pm.worst_score.should == 1.3 + 3.25 + (-1.5)
|
317
241
|
end
|
318
242
|
end
|
319
243
|
|
320
244
|
describe '#best_suffix' do
|
321
245
|
it 'should return maximal score of suffices from i-th position inclusively i.e. [i..end]' do
|
322
|
-
@pm = PM.new
|
323
|
-
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
246
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
324
247
|
@pm.best_suffix(0).should == (4.9 + 7.13 - 1.0)
|
325
248
|
@pm.best_suffix(1).should == (7.13 - 1.0)
|
326
249
|
@pm.best_suffix(2).should == (-1.0)
|
@@ -329,8 +252,7 @@ module Bioinform
|
|
329
252
|
end
|
330
253
|
describe '#worst_suffix' do
|
331
254
|
it 'should return minimal score of suffices from i-th position inclusively i.e. [i..end]' do
|
332
|
-
@pm = PM.new
|
333
|
-
@pm.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
255
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
334
256
|
@pm.worst_suffix(0).should == (1.3 + 3.25 - 1.5)
|
335
257
|
@pm.worst_suffix(1).should == (3.25 - 1.5)
|
336
258
|
@pm.worst_suffix(2).should == (- 1.5)
|
@@ -341,10 +263,8 @@ module Bioinform
|
|
341
263
|
[:shift_to_zero, :reverse_complement].each do |meth|
|
342
264
|
describe "nonbang method #{meth}" do
|
343
265
|
before :each do
|
344
|
-
@pm = PM.new
|
345
|
-
@
|
346
|
-
@pm_2 = PM.new
|
347
|
-
@pm_2.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
266
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
267
|
+
@pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
348
268
|
end
|
349
269
|
it 'should return copy of object not object itself' do
|
350
270
|
@pm.send(meth).should_not be_equal @pm
|
@@ -358,10 +278,8 @@ module Bioinform
|
|
358
278
|
[:discrete , :left_augment, :right_augment].each do |meth|
|
359
279
|
describe "nonbang method #{meth}" do
|
360
280
|
before :each do
|
361
|
-
@pm = PM.new
|
362
|
-
@
|
363
|
-
@pm_2 = PM.new
|
364
|
-
@pm_2.matrix = [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]]
|
281
|
+
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
282
|
+
@pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
365
283
|
end
|
366
284
|
it 'should return copy of object not object itself' do
|
367
285
|
@pm.send(meth, 2).should_not be_equal @pm
|
@@ -5,13 +5,11 @@ module Bioinform
|
|
5
5
|
describe PWM do
|
6
6
|
describe '#score_mean' do
|
7
7
|
it 'should be equal to a mean score of pwm' do
|
8
|
-
pwm = PWM.new
|
9
|
-
pwm.matrix = [[1,2,1,2],[4,6,8,6],[2,2,2,2]]
|
8
|
+
pwm = PWM.new( [[1,2,1,2],[4,6,8,6],[2,2,2,2]] )
|
10
9
|
pwm.score_mean.should == 1.5 + 6 + 2
|
11
10
|
end
|
12
11
|
it 'should be equal to a mean score of pwm by measure induced from background probability mean' do
|
13
|
-
pwm = PWM.new.background([0.2, 0.3, 0.3, 0.2])
|
14
|
-
pwm.matrix = [[1,2,1,2],[4,6,8,6],[2,2,2,2]]
|
12
|
+
pwm = PWM.new( [[1,2,1,2],[4,6,8,6],[2,2,2,2]] ).background([0.2, 0.3, 0.3, 0.2])
|
15
13
|
pwm.score_mean.should == ((0.2*1+0.3*2+0.3*1+0.2*2) + (0.2*4+0.3*6+0.3*8+0.2*6) + (0.2*2+0.3*2+0.3*2+0.2*2)) / (0.2+0.3+0.3+0.2)
|
16
14
|
end
|
17
15
|
end
|
@@ -24,9 +22,7 @@ module Bioinform
|
|
24
22
|
|
25
23
|
describe '#score' do
|
26
24
|
let(:pwm) do
|
27
|
-
|
28
|
-
pwm.matrix = [[10000,20000,30000,40000],[1000,2000,3000,4000],[100,200,300,400],[10,20,30,40],[1,2,3,4]]
|
29
|
-
pwm
|
25
|
+
PWM.new( [[10000,20000,30000,40000],[1000,2000,3000,4000],[100,200,300,400],[10,20,30,40],[1,2,3,4]] )
|
30
26
|
end
|
31
27
|
it 'should evaluate to score of given word' do
|
32
28
|
pwm.score('aAAAA').should == 11111
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/parsers/parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
describe Parser do
|
6
|
+
good_cases = {
|
7
|
+
'Array Nx4' => {input: [[0,1,2,3],[10,11,12,13]],
|
8
|
+
matrix: [[0,1,2,3],[10,11,12,13]] },
|
9
|
+
|
10
|
+
'Array 4xN' => {input: [[0,10],[1,11],[2,12],[3,13]],
|
11
|
+
matrix: [[0,1,2,3],[10,11,12,13]] },
|
12
|
+
|
13
|
+
'Hash A,C,G,T => Arrays' => { input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13]},
|
14
|
+
matrix: [[0,1,2,3],[10,11,12,13]] },
|
15
|
+
|
16
|
+
'Hash array of hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13}],
|
17
|
+
matrix: [[0,1,2,3],[10,11,12,13]] },
|
18
|
+
|
19
|
+
'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]],
|
20
|
+
matrix: [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]] },
|
21
|
+
|
22
|
+
'Hash A,C,G,T => 4-Arrays' => { input: {:A => [0,10,100,1000], :c => [1,11,101,1001],'g' => [2,12,102,1002],'T' => [3,13,103,1003]},
|
23
|
+
matrix: [[0,1,2,3],[10,11,12,13],[100,101,102,103],[1000,1001,1002,1003]] },
|
24
|
+
|
25
|
+
'4-Arrays of A,C,G,T hashes' => { input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
|
26
|
+
{:A => 10, :c => 11, 'g' => 12, 'T' => 13},
|
27
|
+
{:A => 100, :c => 101, 'g' => 102, 'T' => 103},
|
28
|
+
{:A => 1000, :c => 1001, 'g' => 1002, 'T' => 1003}],
|
29
|
+
matrix: [[0,1,2,3],[10,11,12,13],[100,101,102,103],[1000,1001,1002,1003]] }
|
30
|
+
}
|
31
|
+
|
32
|
+
bad_cases = {
|
33
|
+
'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
|
34
|
+
|
35
|
+
'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
|
36
|
+
|
37
|
+
'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
|
38
|
+
|
39
|
+
'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
|
40
|
+
{:A => 10, :c => 11, 'g' => 12}] },
|
41
|
+
'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
|
42
|
+
{:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
|
43
|
+
|
44
|
+
'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3},
|
45
|
+
{:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
|
46
|
+
|
47
|
+
'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
|
48
|
+
|
49
|
+
'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
|
50
|
+
|
51
|
+
'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
|
52
|
+
|
53
|
+
'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
|
54
|
+
}
|
55
|
+
|
56
|
+
parser_specs(Parser, good_cases, bad_cases)
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/parsers/string_fantom_parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
describe StringFantomParser do
|
6
|
+
good_cases = {
|
7
|
+
'string in Fantom-format' => {input: "
|
8
|
+
NA motif_CTNCAG
|
9
|
+
P0 A C G T
|
10
|
+
P1 0 1878368 0 0
|
11
|
+
P2 0 0 0 1878368
|
12
|
+
P3 469592 469592 469592 469592
|
13
|
+
P4 0 1878368 0 0
|
14
|
+
P5 1878368 0 0 0
|
15
|
+
P6 0 0 1878368 0",
|
16
|
+
matrix: [[0.0, 1878368.0, 0.0, 0.0],
|
17
|
+
[0.0, 0.0, 0.0, 1878368.0],
|
18
|
+
[469592.0, 469592.0, 469592.0, 469592.0],
|
19
|
+
[0.0, 1878368.0, 0.0, 0.0],
|
20
|
+
[1878368.0, 0.0, 0.0, 0.0],
|
21
|
+
[0.0, 0.0, 1878368.0, 0.0]] }
|
22
|
+
}
|
23
|
+
|
24
|
+
bad_cases = { }
|
25
|
+
|
26
|
+
parser_specs(StringFantomParser, good_cases, bad_cases)
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'bioinform/parsers/string_parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
describe StringParser do
|
6
|
+
good_cases = {
|
7
|
+
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
|
8
|
+
matrix: [[1,2,3,4],[5,6,7,8]] },
|
9
|
+
|
10
|
+
'4xN string' => {input: "1 5\n2 6\n3 7\n 4 8",
|
11
|
+
matrix: [[1,2,3,4],[5,6,7,8]] },
|
12
|
+
|
13
|
+
'string with name' => {input: "TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
14
|
+
matrix: [[1,2,3,4],[5,6,7,8]], name: 'TestMatrix' },
|
15
|
+
|
16
|
+
'string with name (with introduction sign)' => {input: ">\t TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
17
|
+
matrix: [[1,2,3,4],[5,6,7,8]],
|
18
|
+
name: 'TestMatrix' },
|
19
|
+
|
20
|
+
'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
|
21
|
+
matrix: [[1,2,3,4],[5,6,7,8]], name: 'Testmatrix_first:subname+sub-subname' },
|
22
|
+
|
23
|
+
'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210",
|
24
|
+
matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
|
25
|
+
|
26
|
+
'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3",
|
27
|
+
matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
|
28
|
+
|
29
|
+
'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8",
|
30
|
+
matrix: [[1,2,3,4],[5,6,7,8]] },
|
31
|
+
|
32
|
+
'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n",
|
33
|
+
matrix: [[1,2,3,4],[5,6,7,8]] },
|
34
|
+
|
35
|
+
'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8",
|
36
|
+
matrix: [[1,2,3,4],[5,6,7,8]] }
|
37
|
+
}
|
38
|
+
|
39
|
+
bad_cases = {
|
40
|
+
'string with non-numeric input' => {input: "1.23 4.56 78aaa 9.0\n9 -8.7 6.54 -3210" },
|
41
|
+
'string with empty exponent sign' => {input: "1.23 4.56 7.8 9.0\n 9e -8.7 6.54 3210" }
|
42
|
+
}
|
43
|
+
|
44
|
+
parser_specs(StringParser, good_cases, bad_cases)
|
45
|
+
end
|
46
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -2,27 +2,19 @@ $LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
|
|
2
2
|
$LOAD_PATH.unshift File.dirname(__FILE__)
|
3
3
|
|
4
4
|
require 'rspec'
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
Parser.subclasses.each do |klass|
|
19
|
-
#class_levels = klass.to_s.split('::')
|
20
|
-
#class_levels[0..-2].inject(Object){|klass, level| klass.const_get level}.const_set(class_name, class_levels.last)
|
21
|
-
|
22
|
-
Bioinform.send :remove_const, klass.name.split('::').last
|
23
|
-
end
|
24
|
-
Parser.subclasses.clear
|
25
|
-
end
|
5
|
+
|
6
|
+
def parser_specs(parser_klass, good_cases, bad_cases)
|
7
|
+
good_cases.each do |case_description, input_and_result|
|
8
|
+
it "should be able to parse #{case_description}" do
|
9
|
+
result = parser_klass.new(input_and_result[:input]).parse
|
10
|
+
result[:matrix].should == input_and_result[:matrix]
|
11
|
+
result[:name].should == input_and_result[:name] if input_and_result.has_key?(:name)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
bad_cases.each do |case_description, input|
|
16
|
+
it "should fail silently returning {} on parsing #{case_description}" do
|
17
|
+
parser_klass.new(input[:input]).parse.should == {}
|
26
18
|
end
|
27
19
|
end
|
28
20
|
end
|
@@ -3,9 +3,17 @@ require 'bioinform/support/multiline_squish'
|
|
3
3
|
|
4
4
|
describe String do
|
5
5
|
describe '#multiline_squish' do
|
6
|
-
it 'should replace multiple spaces with one space'
|
7
|
-
|
8
|
-
|
9
|
-
it 'should
|
6
|
+
it 'should replace multiple spaces with one space' do
|
7
|
+
"abc def ghi\n jk lmn".multiline_squish.should == "abc def ghi\njk lmn"
|
8
|
+
end
|
9
|
+
it 'should replace tabs with a space' do
|
10
|
+
"abc\tdef ghi \t jk".multiline_squish.should == 'abc def ghi jk'
|
11
|
+
end
|
12
|
+
it 'should replace \r\n with \n' do
|
13
|
+
"abc def ghi\r\njk lmn".multiline_squish.should == "abc def ghi\njk lmn"
|
14
|
+
end
|
15
|
+
it 'should preserve rows pagination' do
|
16
|
+
"abc def ghi\njk lmn".multiline_squish.should == "abc def ghi\njk lmn"
|
17
|
+
end
|
10
18
|
end
|
11
19
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-31 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -59,16 +59,14 @@ files:
|
|
59
59
|
- lib/bioinform.rb
|
60
60
|
- lib/bioinform/data_models.rb
|
61
61
|
- lib/bioinform/data_models/collection.rb
|
62
|
-
- lib/bioinform/data_models/parser.rb
|
63
|
-
- lib/bioinform/data_models/parsers.rb
|
64
|
-
- lib/bioinform/data_models/parsers/array_parser.rb
|
65
|
-
- lib/bioinform/data_models/parsers/hash_parser.rb
|
66
|
-
- lib/bioinform/data_models/parsers/string_fantom_parser.rb
|
67
|
-
- lib/bioinform/data_models/parsers/string_parser.rb
|
68
62
|
- lib/bioinform/data_models/pcm.rb
|
69
63
|
- lib/bioinform/data_models/pm.rb
|
70
64
|
- lib/bioinform/data_models/ppm.rb
|
71
65
|
- lib/bioinform/data_models/pwm.rb
|
66
|
+
- lib/bioinform/parsers.rb
|
67
|
+
- lib/bioinform/parsers/parser.rb
|
68
|
+
- lib/bioinform/parsers/string_fantom_parser.rb
|
69
|
+
- lib/bioinform/parsers/string_parser.rb
|
72
70
|
- lib/bioinform/support.rb
|
73
71
|
- lib/bioinform/support/array_product.rb
|
74
72
|
- lib/bioinform/support/array_zip.rb
|
@@ -83,13 +81,12 @@ files:
|
|
83
81
|
- lib/bioinform/support/same_by.rb
|
84
82
|
- lib/bioinform/support/yaml_dump_file.rb
|
85
83
|
- lib/bioinform/version.rb
|
86
|
-
- spec/data_models/
|
87
|
-
- spec/data_models/parsers/array_parser_spec.rb
|
88
|
-
- spec/data_models/parsers/hash_parser_spec.rb
|
89
|
-
- spec/data_models/parsers/string_fantom_parser_spec.rb
|
90
|
-
- spec/data_models/parsers/string_parser_spec.rb
|
84
|
+
- spec/data_models/pcm_spec.rb
|
91
85
|
- spec/data_models/pm_spec.rb
|
92
86
|
- spec/data_models/pwm_spec.rb
|
87
|
+
- spec/parsers/parser_spec.rb
|
88
|
+
- spec/parsers/string_fantom_parser_spec.rb
|
89
|
+
- spec/parsers/string_parser_spec.rb
|
93
90
|
- spec/spec_helper.rb
|
94
91
|
- spec/support/array_product_spec.rb
|
95
92
|
- spec/support/array_zip_spec.rb
|
@@ -129,13 +126,12 @@ summary: Classes for work with different input formats of positional matrices an
|
|
129
126
|
several useful extensions for Enumerable module like parametric map and callable
|
130
127
|
symbols
|
131
128
|
test_files:
|
132
|
-
- spec/data_models/
|
133
|
-
- spec/data_models/parsers/array_parser_spec.rb
|
134
|
-
- spec/data_models/parsers/hash_parser_spec.rb
|
135
|
-
- spec/data_models/parsers/string_fantom_parser_spec.rb
|
136
|
-
- spec/data_models/parsers/string_parser_spec.rb
|
129
|
+
- spec/data_models/pcm_spec.rb
|
137
130
|
- spec/data_models/pm_spec.rb
|
138
131
|
- spec/data_models/pwm_spec.rb
|
132
|
+
- spec/parsers/parser_spec.rb
|
133
|
+
- spec/parsers/string_fantom_parser_spec.rb
|
134
|
+
- spec/parsers/string_parser_spec.rb
|
139
135
|
- spec/spec_helper.rb
|
140
136
|
- spec/support/array_product_spec.rb
|
141
137
|
- spec/support/array_zip_spec.rb
|