bioinform 0.1.17 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/LICENSE +0 -1
- data/README.md +1 -1
- data/TODO.txt +23 -30
- data/bin/convert_motif +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +0 -2
- data/lib/bioinform.rb +54 -16
- data/lib/bioinform/alphabet.rb +85 -0
- data/lib/bioinform/background.rb +90 -0
- data/lib/bioinform/cli.rb +1 -2
- data/lib/bioinform/cli/convert_motif.rb +52 -17
- data/lib/bioinform/cli/pcm2pwm.rb +32 -26
- data/lib/bioinform/cli/split_motifs.rb +31 -30
- data/lib/bioinform/conversion_algorithms.rb +6 -0
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
- data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
- data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
- data/lib/bioinform/data_models.rb +1 -7
- data/lib/bioinform/data_models/named_model.rb +38 -0
- data/lib/bioinform/data_models/pcm.rb +18 -28
- data/lib/bioinform/data_models/pm.rb +73 -170
- data/lib/bioinform/data_models/ppm.rb +11 -24
- data/lib/bioinform/data_models/pwm.rb +30 -56
- data/lib/bioinform/errors.rb +17 -0
- data/lib/bioinform/formatters.rb +4 -2
- data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
- data/lib/bioinform/formatters/motif_formatter.rb +69 -0
- data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
- data/lib/bioinform/parsers.rb +1 -8
- data/lib/bioinform/parsers/matrix_parser.rb +44 -36
- data/lib/bioinform/parsers/motif_splitter.rb +45 -0
- data/lib/bioinform/support.rb +46 -14
- data/lib/bioinform/support/strip_doc.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +79 -0
- data/spec/background_spec.rb +57 -0
- data/spec/cli/cli_spec.rb +6 -6
- data/spec/cli/convert_motif_spec.rb +88 -88
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
- data/spec/cli/pcm2pwm_spec.rb +22 -23
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +6 -21
- data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
- data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
- data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
- data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
- data/spec/data_models/named_model_spec.rb +41 -0
- data/spec/data_models/pcm_spec.rb +114 -45
- data/spec/data_models/pm_spec.rb +132 -333
- data/spec/data_models/ppm_spec.rb +47 -44
- data/spec/data_models/pwm_spec.rb +85 -77
- data/spec/fabricators/motif_formats_fabricator.rb +116 -116
- data/spec/formatters/consensus_formatter_spec.rb +26 -0
- data/spec/formatters/raw_formatter_spec.rb +169 -0
- data/spec/parsers/matrix_parser_spec.rb +216 -0
- data/spec/parsers/motif_splitter_spec.rb +87 -0
- data/spec/spec_helper.rb +2 -2
- data/spec/spec_helper_source.rb +25 -5
- data/spec/support_spec.rb +31 -0
- metadata +43 -124
- data/bin/merge_into_collection +0 -4
- data/lib/bioinform/cli/merge_into_collection.rb +0 -80
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +0 -75
- data/lib/bioinform/data_models/motif.rb +0 -56
- data/lib/bioinform/formatters/raw_formatter.rb +0 -41
- data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
- data/lib/bioinform/parsers/parser.rb +0 -92
- data/lib/bioinform/parsers/splittable_parser.rb +0 -57
- data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
- data/lib/bioinform/parsers/string_parser.rb +0 -72
- data/lib/bioinform/parsers/trivial_parser.rb +0 -34
- data/lib/bioinform/parsers/yaml_parser.rb +0 -35
- data/lib/bioinform/support/advanced_scan.rb +0 -8
- data/lib/bioinform/support/array_product.rb +0 -6
- data/lib/bioinform/support/array_zip.rb +0 -6
- data/lib/bioinform/support/collect_hash.rb +0 -7
- data/lib/bioinform/support/deep_dup.rb +0 -5
- data/lib/bioinform/support/delete_many.rb +0 -14
- data/lib/bioinform/support/inverf.rb +0 -13
- data/lib/bioinform/support/multiline_squish.rb +0 -6
- data/lib/bioinform/support/parameters.rb +0 -28
- data/lib/bioinform/support/partial_sums.rb +0 -16
- data/lib/bioinform/support/same_by.rb +0 -12
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
- data/spec/cli/data/split_motifs/collection.yaml +0 -188
- data/spec/cli/merge_into_collection_spec.rb +0 -100
- data/spec/data_models/collection_spec.rb +0 -98
- data/spec/data_models/motif_spec.rb +0 -224
- data/spec/fabricators/collection_fabricator.rb +0 -8
- data/spec/fabricators/motif_fabricator.rb +0 -33
- data/spec/fabricators/pcm_fabricator.rb +0 -25
- data/spec/fabricators/pm_fabricator.rb +0 -52
- data/spec/fabricators/ppm_fabricator.rb +0 -14
- data/spec/fabricators/pwm_fabricator.rb +0 -16
- data/spec/parsers/parser_spec.rb +0 -152
- data/spec/parsers/string_fantom_parser_spec.rb +0 -70
- data/spec/parsers/string_parser_spec.rb +0 -77
- data/spec/parsers/trivial_parser_spec.rb +0 -64
- data/spec/parsers/yaml_parser_spec.rb +0 -50
- data/spec/support/advanced_scan_spec.rb +0 -32
- data/spec/support/array_product_spec.rb +0 -15
- data/spec/support/array_zip_spec.rb +0 -15
- data/spec/support/collect_hash_spec.rb +0 -15
- data/spec/support/delete_many_spec.rb +0 -44
- data/spec/support/inverf_spec.rb +0 -19
- data/spec/support/multiline_squish_spec.rb +0 -25
- data/spec/support/partial_sums_spec.rb +0 -30
- data/spec/support/same_by_spec.rb +0 -36
data/spec/data_models/pm_spec.rb
CHANGED
@@ -1,367 +1,166 @@
|
|
1
|
-
|
2
|
-
require_relative '../../lib/bioinform/data_models/pm'
|
1
|
+
require 'bioinform/data_models/pm'
|
3
2
|
|
4
|
-
|
5
|
-
describe PM do
|
6
|
-
{:as_pcm => [PCM, [[1,10,3,4],[5,6,7,0]]], :as_pwm => [PWM, [[1,2,3,4],[5,6,7,8]]], :as_ppm => [PPM, [[0.1,0.2,0.3,0.4],[0.5,0.1,0.3,0.1]]]}.each do |converter_method, (result_klass, matrix)|
|
7
|
-
describe "##{converter_method}" do
|
8
|
-
before :each do
|
9
|
-
@collection = Collection.new(name: 'Collection 1')
|
10
|
-
@matrix = matrix
|
11
|
-
@name = 'PM_motif'
|
12
|
-
@background = [0.2,0.3,0.3,0.2]
|
13
|
-
@tags = [@collection, 'Collection 2']
|
14
|
-
@pm = PM.new(matrix: matrix, name: @name, background: @background, tags: @tags)
|
15
|
-
@conv_motif = @pm.send converter_method
|
16
|
-
end
|
17
|
-
it "should return an instance of #{result_klass}" do
|
18
|
-
@conv_motif.should be_kind_of(result_klass)
|
19
|
-
end
|
20
|
-
it 'should return have the same matrix, name and background' do #, background and tags' do
|
21
|
-
@conv_motif.matrix.should == @matrix
|
22
|
-
@conv_motif.name.should == @name
|
23
|
-
@conv_motif.background.should == @background
|
24
|
-
# @conv_motif.tags.should == @tags
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
# describe '#tagged?' do
|
30
|
-
# context 'when PM marked with Collection object' do
|
31
|
-
# context 'without collection-name' do
|
32
|
-
# before :each do
|
33
|
-
# @marking_collection = Collection.new
|
34
|
-
# @nonmarking_collection = Collection.new
|
35
|
-
# @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
|
36
|
-
# @pm.mark(@marking_collection)
|
37
|
-
# end
|
38
|
-
# it 'should be true for marking collection' do
|
39
|
-
# @pm.should be_tagged(@marking_collection)
|
40
|
-
# end
|
41
|
-
# it 'should be false for nonmarking collection' do
|
42
|
-
# @pm.should_not be_tagged(@nonmarking_collection)
|
43
|
-
# end
|
44
|
-
# it 'should be false for nil-name' do
|
45
|
-
# @pm.should_not be_tagged(nil)
|
46
|
-
# end
|
47
|
-
# it 'should be false for any string' do
|
48
|
-
# @pm.should_not be_tagged('Another name')
|
49
|
-
# end
|
50
|
-
# end
|
51
|
-
# context 'with collection-name' do
|
52
|
-
# before :each do
|
53
|
-
# @marking_collection = Collection.new(name: 'Collection name')
|
54
|
-
# @nonmarking_collection = Collection.new(name: 'Another name')
|
55
|
-
# @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
|
56
|
-
# @pm.mark(@marking_collection)
|
57
|
-
# end
|
58
|
-
# it 'should be true for marking collection' do
|
59
|
-
# @pm.should be_tagged(@marking_collection)
|
60
|
-
# end
|
61
|
-
# it 'should be false for nonmarking collection' do
|
62
|
-
# @pm.should_not be_tagged(@nonmarking_collection)
|
63
|
-
# end
|
64
|
-
# it 'should be true for name of marking collection' do
|
65
|
-
# @pm.should be_tagged('Collection name')
|
66
|
-
# end
|
67
|
-
# it 'should be false for string that is not name of marking collection' do
|
68
|
-
# @pm.should_not be_tagged('Another name')
|
69
|
-
# end
|
70
|
-
# end
|
71
|
-
# end
|
72
|
-
|
73
|
-
# context 'when PM marked with name' do
|
74
|
-
# before :each do
|
75
|
-
# @nonmarking_collection = Collection.new(name: 'Another name')
|
76
|
-
# @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
|
77
|
-
# @pm.mark('Mark name')
|
78
|
-
# end
|
79
|
-
# it 'should be true for marking name' do
|
80
|
-
# @pm.should be_tagged('Mark name')
|
81
|
-
# end
|
82
|
-
# it 'should be false for string that is not marking name' do
|
83
|
-
# @pm.should_not be_tagged('Another name')
|
84
|
-
# end
|
85
|
-
# it 'should be false for nonmarking collection' do
|
86
|
-
# @pm.should_not be_tagged(@nonmarking_collection)
|
87
|
-
# end
|
88
|
-
# end
|
3
|
+
describe Bioinform::MotifModel::PM do
|
89
4
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
# @collection_3 = Collection.new(name: 'Nonmarking collection')
|
95
|
-
# @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
|
96
|
-
# @pm.mark(@collection_1)
|
97
|
-
# @pm.mark(@collection_2)
|
98
|
-
# @pm.mark('Stringy-name')
|
99
|
-
# end
|
100
|
-
# it 'should be true for each mark' do
|
101
|
-
# @pm.should be_tagged(@collection_1)
|
102
|
-
# @pm.should be_tagged(@collection_2)
|
103
|
-
# @pm.should be_tagged('Stringy-name')
|
104
|
-
# end
|
105
|
-
# it 'should be false for not presented marks' do
|
106
|
-
# @pm.should_not be_tagged(@collection_3)
|
107
|
-
# @pm.should_not be_tagged('Bad stringy-name')
|
108
|
-
# end
|
109
|
-
# end
|
110
|
-
# end
|
111
|
-
|
112
|
-
describe '#==' do
|
113
|
-
it 'should be true iff motifs have the same matrix, background and name' do
|
114
|
-
pm = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif')
|
115
|
-
pm_eq = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif')
|
116
|
-
pm_neq_matrix = PM.new(matrix: [[1,2,3,4],[15,16,17,18]], name: 'First motif')
|
117
|
-
pm_neq_name = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Second motif')
|
118
|
-
pm_neq_background = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif').set_parameters(background: [1,2,2,1])
|
5
|
+
describe '.new' do
|
6
|
+
specify 'with matrix having more than 4 elements in a position' do
|
7
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15,10],[-1.1, 0.6, 0.4, 0.321]]) }.to raise_error (Bioinform::Error)
|
8
|
+
end
|
119
9
|
|
120
|
-
|
121
|
-
|
122
|
-
pm.should_not == pm_neq_background
|
123
|
-
pm.should == pm_eq
|
124
|
-
end
|
10
|
+
specify 'with matrix having less than 4 elements in a position' do
|
11
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15],[-1.1, 0.6]]) }.to raise_error (Bioinform::Error)
|
125
12
|
end
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
PM.valid_matrix?( {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]} ).should be_false
|
130
|
-
PM.valid_matrix?( [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}] ).should be_false
|
131
|
-
PM.valid_matrix?( [[1,2,3,4],[1,4,6.5]] ).should be_false
|
132
|
-
PM.valid_matrix?( [[1,2,3],[1,4,6.5]] ).should be_false
|
133
|
-
PM.valid_matrix?( [[1,2,'3','4'],[1,'4','5',6.5]] ).should be_false
|
134
|
-
end
|
13
|
+
|
14
|
+
specify 'with matrix having positions in rows, nucleotides in columns' do
|
15
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3],[10,-11,12],[-1.1, 0.6, 0.4],[5,6,7]]) }.to raise_error (Bioinform::Error)
|
135
16
|
end
|
136
17
|
|
137
|
-
|
138
|
-
|
139
|
-
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
140
|
-
end
|
141
|
-
it 'should return string with single-tabulated multiline matrix' do
|
142
|
-
@pm.to_s.should == "1\t2\t3\t4\n1\t4\t5\t6.5"
|
143
|
-
end
|
144
|
-
it 'should return positions in rows, letters in cols' do
|
145
|
-
@pm.to_s.split("\n").size.should == 2
|
146
|
-
@pm.to_s.split("\n").map{|pos| pos.split.size}.all?{|sz| sz==4}.should be_true
|
147
|
-
end
|
148
|
-
context 'with name specified' do
|
149
|
-
before :each do
|
150
|
-
@pm.name = 'Stub name'
|
151
|
-
end
|
152
|
-
it 'should return a string with a name and a matrix from the next line' do
|
153
|
-
@pm.to_s.should == "Stub name\n1\t2\t3\t4\n1\t4\t5\t6.5"
|
154
|
-
end
|
155
|
-
it 'should not return a name if argument is set to false' do
|
156
|
-
@pm.to_s(with_name: false).should == "1\t2\t3\t4\n1\t4\t5\t6.5"
|
157
|
-
end
|
158
|
-
end
|
159
|
-
context 'in letters_as_rows mode' do
|
160
|
-
it 'should print matrix with row-markers' do
|
161
|
-
@pm.to_s(letters_as_rows: true).should == "A|1\t1\nC|2\t4\nG|3\t5\nT|4\t6.5"
|
162
|
-
end
|
163
|
-
end
|
18
|
+
specify 'with empty matrix' do
|
19
|
+
expect { Bioinform::MotifModel::PM.new([]) }.to raise_error (Bioinform::Error)
|
164
20
|
end
|
165
21
|
|
166
|
-
|
167
|
-
|
168
|
-
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should == " A C G T \n 1.0 2.0 3.0 4.0\n 5.0 6.0 7.0 8.0"
|
169
|
-
end
|
170
|
-
it 'should return a string of floats formatted with spaces' do
|
171
|
-
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should match(/1.0 +2.0 +3.0 +4.0 *\n *5.0 +6.0 +7.0 +8.0/)
|
172
|
-
end
|
173
|
-
it 'should contain first string of ACGT letters' do
|
174
|
-
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.lines.first.should match(/A +C +G +T/)
|
175
|
-
end
|
176
|
-
it 'should round floats upto 3 digits' do
|
177
|
-
PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] ).pretty_string.should match(/1.1 +2.22 +3.333 +4.444 *\n *5.5 +6.66 +7.777 +8.889/)
|
178
|
-
end
|
22
|
+
context 'with valid matrix' do
|
23
|
+
let(:matrix) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
|
179
24
|
|
180
|
-
|
181
|
-
|
182
|
-
@pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
|
183
|
-
@pm.name = 'MyName'
|
184
|
-
end
|
185
|
-
it 'should contain name if parameter `with_name` isn\'t false' do
|
186
|
-
@pm.pretty_string.should match(/MyName\n/)
|
187
|
-
end
|
188
|
-
it 'should not contain name if parameter `with_name` is false' do
|
189
|
-
@pm.pretty_string(with_name: false).should_not match(/MyName\n/)
|
190
|
-
end
|
25
|
+
specify do
|
26
|
+
expect{ Bioinform::MotifModel::PM.new(matrix) }.not_to raise_error
|
191
27
|
end
|
192
|
-
|
193
|
-
|
194
|
-
@pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
|
195
|
-
end
|
196
|
-
it 'should not contain name whether parameter `with_name` is or isn\'t false' do
|
197
|
-
@pm.pretty_string.should_not match(/MyName\n/)
|
198
|
-
@pm.pretty_string(with_name: false).should_not match(/MyName\n/)
|
199
|
-
end
|
28
|
+
specify do
|
29
|
+
expect( Bioinform::MotifModel::PM.new(matrix).matrix ).to eq matrix
|
200
30
|
end
|
201
|
-
|
202
|
-
|
203
|
-
@pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
|
204
|
-
@pm.pretty_string(letters_as_rows: true).should == @pm.to_s(letters_as_rows: true)
|
205
|
-
end
|
31
|
+
specify do
|
32
|
+
expect( Bioinform::MotifModel::PM.new(matrix).alphabet ).to eq Bioinform::NucleotideAlphabet
|
206
33
|
end
|
207
34
|
end
|
35
|
+
end
|
208
36
|
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
37
|
+
describe '.from_string' do
|
38
|
+
specify {
|
39
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").model.class ).to eq Bioinform::MotifModel::PM
|
40
|
+
}
|
41
|
+
specify {
|
42
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").name ).to be_nil
|
43
|
+
}
|
44
|
+
specify {
|
45
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").matrix ).to eq [[1,2,3,4],[5,6,7,8]]
|
46
|
+
}
|
47
|
+
specify {
|
48
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8") ).to be_kind_of Bioinform::MotifModel::NamedModel
|
49
|
+
}
|
50
|
+
|
51
|
+
specify {
|
52
|
+
expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8").name ).to eq 'Motif name'
|
53
|
+
}
|
54
|
+
specify {
|
55
|
+
expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8").matrix ).to eq [[1,2,3,4],[5,6,7,8]]
|
56
|
+
}
|
57
|
+
specify {
|
58
|
+
expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8") ).to be_kind_of Bioinform::MotifModel::NamedModel
|
59
|
+
}
|
60
|
+
|
61
|
+
context 'with custom parser' do
|
62
|
+
let(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :rows) }
|
63
|
+
specify{
|
64
|
+
expect( Bioinform::MotifModel::PM.from_string("1 5\n2 6\n3 7\n4 8", parser: parser).matrix ).to eq [[1,2,3,4],[5,6,7,8]]
|
65
|
+
}
|
66
|
+
end
|
67
|
+
context 'with custom alphabet' do
|
68
|
+
let(:alphabet) { Bioinform::NucleotideAlphabetWithN }
|
69
|
+
let(:parser) { Bioinform::MatrixParser.new(fix_nucleotides_number: alphabet.size) }
|
70
|
+
specify {
|
71
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4 10\n5 6 7 8 100", alphabet: alphabet, parser: parser).matrix ).to eq [[1,2,3,4,10],[5,6,7,8,100]]
|
72
|
+
}
|
73
|
+
specify {
|
74
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4 10\n5 6 7 8 100", alphabet: alphabet, parser: parser).alphabet ).to eq alphabet
|
75
|
+
}
|
214
76
|
end
|
77
|
+
end
|
215
78
|
|
216
|
-
|
79
|
+
describe '.from_file' do
|
80
|
+
include FakeFS::SpecHelpers
|
81
|
+
context 'with default configuration' do
|
217
82
|
before :each do
|
218
|
-
|
219
|
-
|
220
|
-
end
|
221
|
-
it 'should return a hash with keys A, C, G, T' do
|
222
|
-
@hsh.should be_kind_of Hash
|
223
|
-
@hsh.keys.sort.should == %w{A C G T}
|
224
|
-
end
|
225
|
-
it 'should contain matrix elements of corresponding letter' do
|
226
|
-
@hsh['A'].should == [1, 1]
|
227
|
-
@hsh['C'].should == [2, 4]
|
228
|
-
@hsh['G'].should == [3, 5]
|
229
|
-
@hsh['T'].should == [4, 6.5]
|
83
|
+
File.write 'motif.pwm', ">motif name inside\n1 2 3 4\n5 6 7 8"
|
84
|
+
File.write 'motifNameOutside.pwm', "1 2 3 4\n5 6 7 8"
|
230
85
|
end
|
231
|
-
it 'should be accessible both by name and symbol (e.g. pm.to_hash[:A] or pm.to_hash[\'A\'] is the same)' do
|
232
|
-
@hsh['A'].should == @hsh[:A]
|
233
|
-
@hsh['C'].should == @hsh[:C]
|
234
|
-
@hsh['G'].should == @hsh[:G]
|
235
|
-
@hsh['T'].should == @hsh[:T]
|
236
|
-
end
|
237
|
-
end
|
238
86
|
|
239
|
-
|
240
|
-
|
241
|
-
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
242
|
-
end
|
243
|
-
context 'when pm just created' do
|
244
|
-
it 'should be [1,1,1,1]' do
|
245
|
-
@pm.background.should == [1,1,1,1]
|
246
|
-
end
|
87
|
+
specify 'obtains motif name from file content when available' do
|
88
|
+
expect(Bioinform::MotifModel::PM.from_file('motif.pwm').name).to eq 'motif name inside'
|
247
89
|
end
|
248
|
-
end
|
249
90
|
|
250
|
-
|
251
|
-
|
252
|
-
@pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
|
253
|
-
end
|
254
|
-
it 'should return pm object itself' do
|
255
|
-
@pm.reverse_complement!.should be_equal(@pm)
|
91
|
+
specify 'obtains motif name from filename when it is not available in file content' do
|
92
|
+
expect(Bioinform::MotifModel::PM.from_file('motifNameOutside.pwm').name).to eq 'motifNameOutside'
|
256
93
|
end
|
257
|
-
it 'should reverse matrix rows and columns' do
|
258
|
-
@pm.reverse_complement!
|
259
|
-
@pm.matrix.should == [[6.5, 5, 4, 1], [4, 3, 2, 1]]
|
260
|
-
end
|
261
|
-
end
|
262
94
|
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
end
|
267
|
-
it 'should return pm object itself' do
|
268
|
-
@pm.left_augment!(2).should be_equal(@pm)
|
269
|
-
end
|
270
|
-
it 'should add number of zero columns from the left' do
|
271
|
-
@pm.left_augment!(2)
|
272
|
-
@pm.matrix.should == [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [1, 2, 3, 4], [1, 4, 5, 6.5]]
|
95
|
+
specify 'obtains motif matrix correct' do
|
96
|
+
expect(Bioinform::MotifModel::PM.from_file('motif.pwm').matrix).to eq [[1,2,3,4],[5,6,7,8]]
|
97
|
+
expect(Bioinform::MotifModel::PM.from_file('motifNameOutside.pwm').matrix).to eq [[1,2,3,4],[5,6,7,8]]
|
273
98
|
end
|
274
99
|
end
|
275
100
|
|
276
|
-
|
277
|
-
before :each do
|
278
|
-
@pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
|
279
|
-
end
|
280
|
-
it 'should return pm object itself' do
|
281
|
-
@pm.right_augment!(2).should be_equal(@pm)
|
282
|
-
end
|
283
|
-
it 'should add number of zero columns from the right' do
|
284
|
-
@pm.right_augment!(2)
|
285
|
-
@pm.matrix.should == [[1, 2, 3, 4], [1, 4, 5, 6.5], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
|
286
|
-
end
|
287
|
-
end
|
101
|
+
end
|
288
102
|
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
@pm.discrete!(1)
|
299
|
-
@pm.matrix.should == [[2, 2, 4, 5], [7, 7, 4, 5]]
|
300
|
-
end
|
301
|
-
end
|
302
|
-
it 'should discrete each element of matrix multiplied by rate' do
|
303
|
-
@pm.discrete!(10)
|
304
|
-
@pm.matrix.should == [[13, 20, 32, 49], [66, 65, 33, 47]]
|
305
|
-
end
|
103
|
+
context 'with different alphabet' do
|
104
|
+
let(:matrix_4) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
|
105
|
+
let(:matrix_15) { [[1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0],
|
106
|
+
[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
|
107
|
+
specify do
|
108
|
+
expect{ Bioinform::MotifModel::PM.new(matrix_4, alphabet: Bioinform::IUPACAlphabet) }.to raise_error Bioinform::Error
|
109
|
+
end
|
110
|
+
specify do
|
111
|
+
expect{ Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }.not_to raise_error
|
306
112
|
end
|
307
113
|
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
@pm_3_positions = PM.new( [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6], [1, 2, 3, 4]] )
|
312
|
-
end
|
313
|
-
context 'when background is [1,1,1,1]' do
|
314
|
-
it 'should be equal to number of words' do
|
315
|
-
@pm_2_positions.vocabulary_volume.should == 4**2
|
316
|
-
@pm_3_positions.vocabulary_volume.should == 4**3
|
317
|
-
end
|
318
|
-
end
|
319
|
-
context 'when background is normalized probabilities' do
|
320
|
-
it 'should be 1.0' do
|
321
|
-
@pm_2_positions.background = [0.2, 0.3, 0.3, 0.2]
|
322
|
-
@pm_2_positions.vocabulary_volume.should == 1.0
|
114
|
+
let(:iupac_pm) { Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }
|
115
|
+
specify { expect(iupac_pm.matrix).to eq matrix_15 }
|
116
|
+
specify { expect(iupac_pm.alphabet).to eq Bioinform::IUPACAlphabet }
|
323
117
|
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
118
|
+
# A C G T AC AG AT CG CT GT ACG ACT AGT CGT ACGT
|
119
|
+
# 1,2,3,1.567, 12, -11, 12, 0, -1.1, 0.6, 0.4, 0.321,0.11,-1.23, 2.0
|
120
|
+
specify { expect(iupac_pm.complemented.matrix).to eq [[1.567,3,2,1, 0.6, -1.1, 12, 0, -11, 12, -1.23, 0.11,0.321,0.4, 2.0],
|
121
|
+
[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
|
122
|
+
specify { expect(iupac_pm.complemented.alphabet).to eq Bioinform::IUPACAlphabet }
|
329
123
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
334
|
-
@pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
335
|
-
end
|
336
|
-
it 'should return copy of object not object itself' do
|
337
|
-
@pm.send(meth).should_not be_equal @pm
|
338
|
-
end
|
339
|
-
it 'should == to bang-method' do
|
340
|
-
@pm.send(meth).to_s.should == @pm_2.send("#{meth}!").to_s
|
341
|
-
end
|
342
|
-
end
|
343
|
-
end
|
124
|
+
specify { expect(iupac_pm.reversed.matrix).to eq [[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0],
|
125
|
+
[1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0]] }
|
126
|
+
specify { expect(iupac_pm.reversed.alphabet).to eq Bioinform::IUPACAlphabet }
|
344
127
|
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
349
|
-
@pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
350
|
-
end
|
351
|
-
it 'should return copy of object not object itself' do
|
352
|
-
@pm.send(meth, 2).should_not be_equal @pm
|
353
|
-
end
|
354
|
-
it 'should == to bang-method' do
|
355
|
-
@pm.send(meth, 2).to_s.should == @pm_2.send("#{meth}!", 2).to_s
|
356
|
-
end
|
357
|
-
end
|
358
|
-
end
|
128
|
+
specify { expect(iupac_pm.reverse_complemented.alphabet).to eq Bioinform::IUPACAlphabet }
|
129
|
+
specify { expect(iupac_pm.reverse_complemented.matrix).to eq [[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0],
|
130
|
+
[1.567,3,2,1, 0.6, -1.1, 12, 0, -11, 12, -1.23, 0.11,0.321,0.4, 2.0]] }
|
359
131
|
|
360
|
-
describe '#consensus' do
|
361
|
-
let(:pm) { PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] ) }
|
362
|
-
it 'is a string of nucleotides of maximal weights' do
|
363
|
-
pm.consensus.should == 'GAC'
|
364
|
-
end
|
365
|
-
end
|
366
132
|
end
|
367
|
-
|
133
|
+
|
134
|
+
context 'valid PM' do
|
135
|
+
let(:matrix) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
|
136
|
+
let(:pm) { Bioinform::MotifModel::PM.new(matrix) }
|
137
|
+
specify { expect( pm.length ).to eq 3 }
|
138
|
+
|
139
|
+
specify { expect(pm.to_s).to eq("1\t2\t3\t1.567\n"+"12\t-11\t12\t0\n"+"-1.1\t0.6\t0.4\t0.321") }
|
140
|
+
|
141
|
+
specify { expect(pm).to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]]) }
|
142
|
+
specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]], alphabet: Bioinform::ComplementableAlphabet.new([:A,:B,:C,:D],[:D,:C,:B,:A])) }
|
143
|
+
specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321],[1, 2, 3, 4]]) }
|
144
|
+
specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[1, 2, 3, 4]]) }
|
145
|
+
specify { expect(pm).not_to eq [[1,2,3,1.567],[12,-11,12,0],[1, 2, 3, 4]] }
|
146
|
+
|
147
|
+
specify { expect(pm.named('motif name')).to be_kind_of Bioinform::MotifModel::NamedModel }
|
148
|
+
specify { expect(pm.named('motif name').model).to eq pm }
|
149
|
+
specify { expect(pm.named('motif name').name).to eq 'motif name' }
|
150
|
+
|
151
|
+
describe '#reversed, #complemented, #reverse_complemented' do
|
152
|
+
specify { expect(pm.reversed.matrix).to eq [[-1.1, 0.6, 0.4, 0.321],[12,-11,12,0],[1,2,3,1.567]] }
|
153
|
+
specify { expect(pm.complemented.matrix).to eq [[1.567,3,2,1],[0,12,-11,12],[0.321,0.4,0.6,-1.1]] }
|
154
|
+
specify { expect(pm.reverse_complemented.matrix).to eq [[0.321,0.4,0.6,-1.1],[0,12,-11,12],[1.567,3,2,1]] }
|
155
|
+
specify { expect(pm.revcomp.matrix).to eq [[0.321,0.4,0.6,-1.1],[0,12,-11,12],[1.567,3,2,1]] }
|
156
|
+
specify { expect(pm.reversed).to be_kind_of Bioinform::MotifModel::PM }
|
157
|
+
specify { expect(pm.complemented).to be_kind_of Bioinform::MotifModel::PM }
|
158
|
+
specify { expect(pm.reverse_complemented).to be_kind_of Bioinform::MotifModel::PM }
|
159
|
+
specify { expect(pm.revcomp).to be_kind_of Bioinform::MotifModel::PM }
|
160
|
+
end
|
161
|
+
|
162
|
+
specify { expect{|b| pm.each_position(&b) }.to yield_successive_args([1,2,3,1.567], [12,-11,12,0], [-1.1, 0.6, 0.4, 0.321]) }
|
163
|
+
specify { expect(pm.each_position).to be_kind_of Enumerator }
|
164
|
+
specify { expect{|b| pm.each_position.each(&b) }.to yield_successive_args([1,2,3,1.567], [12,-11,12,0], [-1.1, 0.6, 0.4, 0.321]) }
|
165
|
+
end
|
166
|
+
end
|