bioinform 0.1.17 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/LICENSE +0 -1
- data/README.md +1 -1
- data/TODO.txt +23 -30
- data/bin/convert_motif +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +0 -2
- data/lib/bioinform.rb +54 -16
- data/lib/bioinform/alphabet.rb +85 -0
- data/lib/bioinform/background.rb +90 -0
- data/lib/bioinform/cli.rb +1 -2
- data/lib/bioinform/cli/convert_motif.rb +52 -17
- data/lib/bioinform/cli/pcm2pwm.rb +32 -26
- data/lib/bioinform/cli/split_motifs.rb +31 -30
- data/lib/bioinform/conversion_algorithms.rb +6 -0
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
- data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
- data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
- data/lib/bioinform/data_models.rb +1 -7
- data/lib/bioinform/data_models/named_model.rb +38 -0
- data/lib/bioinform/data_models/pcm.rb +18 -28
- data/lib/bioinform/data_models/pm.rb +73 -170
- data/lib/bioinform/data_models/ppm.rb +11 -24
- data/lib/bioinform/data_models/pwm.rb +30 -56
- data/lib/bioinform/errors.rb +17 -0
- data/lib/bioinform/formatters.rb +4 -2
- data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
- data/lib/bioinform/formatters/motif_formatter.rb +69 -0
- data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
- data/lib/bioinform/parsers.rb +1 -8
- data/lib/bioinform/parsers/matrix_parser.rb +44 -36
- data/lib/bioinform/parsers/motif_splitter.rb +45 -0
- data/lib/bioinform/support.rb +46 -14
- data/lib/bioinform/support/strip_doc.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +79 -0
- data/spec/background_spec.rb +57 -0
- data/spec/cli/cli_spec.rb +6 -6
- data/spec/cli/convert_motif_spec.rb +88 -88
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
- data/spec/cli/pcm2pwm_spec.rb +22 -23
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +6 -21
- data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
- data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
- data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
- data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
- data/spec/data_models/named_model_spec.rb +41 -0
- data/spec/data_models/pcm_spec.rb +114 -45
- data/spec/data_models/pm_spec.rb +132 -333
- data/spec/data_models/ppm_spec.rb +47 -44
- data/spec/data_models/pwm_spec.rb +85 -77
- data/spec/fabricators/motif_formats_fabricator.rb +116 -116
- data/spec/formatters/consensus_formatter_spec.rb +26 -0
- data/spec/formatters/raw_formatter_spec.rb +169 -0
- data/spec/parsers/matrix_parser_spec.rb +216 -0
- data/spec/parsers/motif_splitter_spec.rb +87 -0
- data/spec/spec_helper.rb +2 -2
- data/spec/spec_helper_source.rb +25 -5
- data/spec/support_spec.rb +31 -0
- metadata +43 -124
- data/bin/merge_into_collection +0 -4
- data/lib/bioinform/cli/merge_into_collection.rb +0 -80
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +0 -75
- data/lib/bioinform/data_models/motif.rb +0 -56
- data/lib/bioinform/formatters/raw_formatter.rb +0 -41
- data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
- data/lib/bioinform/parsers/parser.rb +0 -92
- data/lib/bioinform/parsers/splittable_parser.rb +0 -57
- data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
- data/lib/bioinform/parsers/string_parser.rb +0 -72
- data/lib/bioinform/parsers/trivial_parser.rb +0 -34
- data/lib/bioinform/parsers/yaml_parser.rb +0 -35
- data/lib/bioinform/support/advanced_scan.rb +0 -8
- data/lib/bioinform/support/array_product.rb +0 -6
- data/lib/bioinform/support/array_zip.rb +0 -6
- data/lib/bioinform/support/collect_hash.rb +0 -7
- data/lib/bioinform/support/deep_dup.rb +0 -5
- data/lib/bioinform/support/delete_many.rb +0 -14
- data/lib/bioinform/support/inverf.rb +0 -13
- data/lib/bioinform/support/multiline_squish.rb +0 -6
- data/lib/bioinform/support/parameters.rb +0 -28
- data/lib/bioinform/support/partial_sums.rb +0 -16
- data/lib/bioinform/support/same_by.rb +0 -12
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
- data/spec/cli/data/split_motifs/collection.yaml +0 -188
- data/spec/cli/merge_into_collection_spec.rb +0 -100
- data/spec/data_models/collection_spec.rb +0 -98
- data/spec/data_models/motif_spec.rb +0 -224
- data/spec/fabricators/collection_fabricator.rb +0 -8
- data/spec/fabricators/motif_fabricator.rb +0 -33
- data/spec/fabricators/pcm_fabricator.rb +0 -25
- data/spec/fabricators/pm_fabricator.rb +0 -52
- data/spec/fabricators/ppm_fabricator.rb +0 -14
- data/spec/fabricators/pwm_fabricator.rb +0 -16
- data/spec/parsers/parser_spec.rb +0 -152
- data/spec/parsers/string_fantom_parser_spec.rb +0 -70
- data/spec/parsers/string_parser_spec.rb +0 -77
- data/spec/parsers/trivial_parser_spec.rb +0 -64
- data/spec/parsers/yaml_parser_spec.rb +0 -50
- data/spec/support/advanced_scan_spec.rb +0 -32
- data/spec/support/array_product_spec.rb +0 -15
- data/spec/support/array_zip_spec.rb +0 -15
- data/spec/support/collect_hash_spec.rb +0 -15
- data/spec/support/delete_many_spec.rb +0 -44
- data/spec/support/inverf_spec.rb +0 -19
- data/spec/support/multiline_squish_spec.rb +0 -25
- data/spec/support/partial_sums_spec.rb +0 -30
- data/spec/support/same_by_spec.rb +0 -36
data/spec/data_models/pm_spec.rb
CHANGED
|
@@ -1,367 +1,166 @@
|
|
|
1
|
-
|
|
2
|
-
require_relative '../../lib/bioinform/data_models/pm'
|
|
1
|
+
require 'bioinform/data_models/pm'
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
describe PM do
|
|
6
|
-
{:as_pcm => [PCM, [[1,10,3,4],[5,6,7,0]]], :as_pwm => [PWM, [[1,2,3,4],[5,6,7,8]]], :as_ppm => [PPM, [[0.1,0.2,0.3,0.4],[0.5,0.1,0.3,0.1]]]}.each do |converter_method, (result_klass, matrix)|
|
|
7
|
-
describe "##{converter_method}" do
|
|
8
|
-
before :each do
|
|
9
|
-
@collection = Collection.new(name: 'Collection 1')
|
|
10
|
-
@matrix = matrix
|
|
11
|
-
@name = 'PM_motif'
|
|
12
|
-
@background = [0.2,0.3,0.3,0.2]
|
|
13
|
-
@tags = [@collection, 'Collection 2']
|
|
14
|
-
@pm = PM.new(matrix: matrix, name: @name, background: @background, tags: @tags)
|
|
15
|
-
@conv_motif = @pm.send converter_method
|
|
16
|
-
end
|
|
17
|
-
it "should return an instance of #{result_klass}" do
|
|
18
|
-
@conv_motif.should be_kind_of(result_klass)
|
|
19
|
-
end
|
|
20
|
-
it 'should return have the same matrix, name and background' do #, background and tags' do
|
|
21
|
-
@conv_motif.matrix.should == @matrix
|
|
22
|
-
@conv_motif.name.should == @name
|
|
23
|
-
@conv_motif.background.should == @background
|
|
24
|
-
# @conv_motif.tags.should == @tags
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# describe '#tagged?' do
|
|
30
|
-
# context 'when PM marked with Collection object' do
|
|
31
|
-
# context 'without collection-name' do
|
|
32
|
-
# before :each do
|
|
33
|
-
# @marking_collection = Collection.new
|
|
34
|
-
# @nonmarking_collection = Collection.new
|
|
35
|
-
# @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
|
|
36
|
-
# @pm.mark(@marking_collection)
|
|
37
|
-
# end
|
|
38
|
-
# it 'should be true for marking collection' do
|
|
39
|
-
# @pm.should be_tagged(@marking_collection)
|
|
40
|
-
# end
|
|
41
|
-
# it 'should be false for nonmarking collection' do
|
|
42
|
-
# @pm.should_not be_tagged(@nonmarking_collection)
|
|
43
|
-
# end
|
|
44
|
-
# it 'should be false for nil-name' do
|
|
45
|
-
# @pm.should_not be_tagged(nil)
|
|
46
|
-
# end
|
|
47
|
-
# it 'should be false for any string' do
|
|
48
|
-
# @pm.should_not be_tagged('Another name')
|
|
49
|
-
# end
|
|
50
|
-
# end
|
|
51
|
-
# context 'with collection-name' do
|
|
52
|
-
# before :each do
|
|
53
|
-
# @marking_collection = Collection.new(name: 'Collection name')
|
|
54
|
-
# @nonmarking_collection = Collection.new(name: 'Another name')
|
|
55
|
-
# @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
|
|
56
|
-
# @pm.mark(@marking_collection)
|
|
57
|
-
# end
|
|
58
|
-
# it 'should be true for marking collection' do
|
|
59
|
-
# @pm.should be_tagged(@marking_collection)
|
|
60
|
-
# end
|
|
61
|
-
# it 'should be false for nonmarking collection' do
|
|
62
|
-
# @pm.should_not be_tagged(@nonmarking_collection)
|
|
63
|
-
# end
|
|
64
|
-
# it 'should be true for name of marking collection' do
|
|
65
|
-
# @pm.should be_tagged('Collection name')
|
|
66
|
-
# end
|
|
67
|
-
# it 'should be false for string that is not name of marking collection' do
|
|
68
|
-
# @pm.should_not be_tagged('Another name')
|
|
69
|
-
# end
|
|
70
|
-
# end
|
|
71
|
-
# end
|
|
72
|
-
|
|
73
|
-
# context 'when PM marked with name' do
|
|
74
|
-
# before :each do
|
|
75
|
-
# @nonmarking_collection = Collection.new(name: 'Another name')
|
|
76
|
-
# @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
|
|
77
|
-
# @pm.mark('Mark name')
|
|
78
|
-
# end
|
|
79
|
-
# it 'should be true for marking name' do
|
|
80
|
-
# @pm.should be_tagged('Mark name')
|
|
81
|
-
# end
|
|
82
|
-
# it 'should be false for string that is not marking name' do
|
|
83
|
-
# @pm.should_not be_tagged('Another name')
|
|
84
|
-
# end
|
|
85
|
-
# it 'should be false for nonmarking collection' do
|
|
86
|
-
# @pm.should_not be_tagged(@nonmarking_collection)
|
|
87
|
-
# end
|
|
88
|
-
# end
|
|
3
|
+
describe Bioinform::MotifModel::PM do
|
|
89
4
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
# @collection_3 = Collection.new(name: 'Nonmarking collection')
|
|
95
|
-
# @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
|
|
96
|
-
# @pm.mark(@collection_1)
|
|
97
|
-
# @pm.mark(@collection_2)
|
|
98
|
-
# @pm.mark('Stringy-name')
|
|
99
|
-
# end
|
|
100
|
-
# it 'should be true for each mark' do
|
|
101
|
-
# @pm.should be_tagged(@collection_1)
|
|
102
|
-
# @pm.should be_tagged(@collection_2)
|
|
103
|
-
# @pm.should be_tagged('Stringy-name')
|
|
104
|
-
# end
|
|
105
|
-
# it 'should be false for not presented marks' do
|
|
106
|
-
# @pm.should_not be_tagged(@collection_3)
|
|
107
|
-
# @pm.should_not be_tagged('Bad stringy-name')
|
|
108
|
-
# end
|
|
109
|
-
# end
|
|
110
|
-
# end
|
|
111
|
-
|
|
112
|
-
describe '#==' do
|
|
113
|
-
it 'should be true iff motifs have the same matrix, background and name' do
|
|
114
|
-
pm = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif')
|
|
115
|
-
pm_eq = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif')
|
|
116
|
-
pm_neq_matrix = PM.new(matrix: [[1,2,3,4],[15,16,17,18]], name: 'First motif')
|
|
117
|
-
pm_neq_name = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Second motif')
|
|
118
|
-
pm_neq_background = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif').set_parameters(background: [1,2,2,1])
|
|
5
|
+
describe '.new' do
|
|
6
|
+
specify 'with matrix having more than 4 elements in a position' do
|
|
7
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15,10],[-1.1, 0.6, 0.4, 0.321]]) }.to raise_error (Bioinform::Error)
|
|
8
|
+
end
|
|
119
9
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
pm.should_not == pm_neq_background
|
|
123
|
-
pm.should == pm_eq
|
|
124
|
-
end
|
|
10
|
+
specify 'with matrix having less than 4 elements in a position' do
|
|
11
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15],[-1.1, 0.6]]) }.to raise_error (Bioinform::Error)
|
|
125
12
|
end
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
PM.valid_matrix?( {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]} ).should be_false
|
|
130
|
-
PM.valid_matrix?( [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}] ).should be_false
|
|
131
|
-
PM.valid_matrix?( [[1,2,3,4],[1,4,6.5]] ).should be_false
|
|
132
|
-
PM.valid_matrix?( [[1,2,3],[1,4,6.5]] ).should be_false
|
|
133
|
-
PM.valid_matrix?( [[1,2,'3','4'],[1,'4','5',6.5]] ).should be_false
|
|
134
|
-
end
|
|
13
|
+
|
|
14
|
+
specify 'with matrix having positions in rows, nucleotides in columns' do
|
|
15
|
+
expect { Bioinform::MotifModel::PM.new([[1,2,3],[10,-11,12],[-1.1, 0.6, 0.4],[5,6,7]]) }.to raise_error (Bioinform::Error)
|
|
135
16
|
end
|
|
136
17
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
|
140
|
-
end
|
|
141
|
-
it 'should return string with single-tabulated multiline matrix' do
|
|
142
|
-
@pm.to_s.should == "1\t2\t3\t4\n1\t4\t5\t6.5"
|
|
143
|
-
end
|
|
144
|
-
it 'should return positions in rows, letters in cols' do
|
|
145
|
-
@pm.to_s.split("\n").size.should == 2
|
|
146
|
-
@pm.to_s.split("\n").map{|pos| pos.split.size}.all?{|sz| sz==4}.should be_true
|
|
147
|
-
end
|
|
148
|
-
context 'with name specified' do
|
|
149
|
-
before :each do
|
|
150
|
-
@pm.name = 'Stub name'
|
|
151
|
-
end
|
|
152
|
-
it 'should return a string with a name and a matrix from the next line' do
|
|
153
|
-
@pm.to_s.should == "Stub name\n1\t2\t3\t4\n1\t4\t5\t6.5"
|
|
154
|
-
end
|
|
155
|
-
it 'should not return a name if argument is set to false' do
|
|
156
|
-
@pm.to_s(with_name: false).should == "1\t2\t3\t4\n1\t4\t5\t6.5"
|
|
157
|
-
end
|
|
158
|
-
end
|
|
159
|
-
context 'in letters_as_rows mode' do
|
|
160
|
-
it 'should print matrix with row-markers' do
|
|
161
|
-
@pm.to_s(letters_as_rows: true).should == "A|1\t1\nC|2\t4\nG|3\t5\nT|4\t6.5"
|
|
162
|
-
end
|
|
163
|
-
end
|
|
18
|
+
specify 'with empty matrix' do
|
|
19
|
+
expect { Bioinform::MotifModel::PM.new([]) }.to raise_error (Bioinform::Error)
|
|
164
20
|
end
|
|
165
21
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should == " A C G T \n 1.0 2.0 3.0 4.0\n 5.0 6.0 7.0 8.0"
|
|
169
|
-
end
|
|
170
|
-
it 'should return a string of floats formatted with spaces' do
|
|
171
|
-
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should match(/1.0 +2.0 +3.0 +4.0 *\n *5.0 +6.0 +7.0 +8.0/)
|
|
172
|
-
end
|
|
173
|
-
it 'should contain first string of ACGT letters' do
|
|
174
|
-
PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.lines.first.should match(/A +C +G +T/)
|
|
175
|
-
end
|
|
176
|
-
it 'should round floats upto 3 digits' do
|
|
177
|
-
PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] ).pretty_string.should match(/1.1 +2.22 +3.333 +4.444 *\n *5.5 +6.66 +7.777 +8.889/)
|
|
178
|
-
end
|
|
22
|
+
context 'with valid matrix' do
|
|
23
|
+
let(:matrix) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
|
|
179
24
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
@pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
|
|
183
|
-
@pm.name = 'MyName'
|
|
184
|
-
end
|
|
185
|
-
it 'should contain name if parameter `with_name` isn\'t false' do
|
|
186
|
-
@pm.pretty_string.should match(/MyName\n/)
|
|
187
|
-
end
|
|
188
|
-
it 'should not contain name if parameter `with_name` is false' do
|
|
189
|
-
@pm.pretty_string(with_name: false).should_not match(/MyName\n/)
|
|
190
|
-
end
|
|
25
|
+
specify do
|
|
26
|
+
expect{ Bioinform::MotifModel::PM.new(matrix) }.not_to raise_error
|
|
191
27
|
end
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
@pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
|
|
195
|
-
end
|
|
196
|
-
it 'should not contain name whether parameter `with_name` is or isn\'t false' do
|
|
197
|
-
@pm.pretty_string.should_not match(/MyName\n/)
|
|
198
|
-
@pm.pretty_string(with_name: false).should_not match(/MyName\n/)
|
|
199
|
-
end
|
|
28
|
+
specify do
|
|
29
|
+
expect( Bioinform::MotifModel::PM.new(matrix).matrix ).to eq matrix
|
|
200
30
|
end
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
@pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
|
|
204
|
-
@pm.pretty_string(letters_as_rows: true).should == @pm.to_s(letters_as_rows: true)
|
|
205
|
-
end
|
|
31
|
+
specify do
|
|
32
|
+
expect( Bioinform::MotifModel::PM.new(matrix).alphabet ).to eq Bioinform::NucleotideAlphabet
|
|
206
33
|
end
|
|
207
34
|
end
|
|
35
|
+
end
|
|
208
36
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
37
|
+
describe '.from_string' do
|
|
38
|
+
specify {
|
|
39
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").model.class ).to eq Bioinform::MotifModel::PM
|
|
40
|
+
}
|
|
41
|
+
specify {
|
|
42
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").name ).to be_nil
|
|
43
|
+
}
|
|
44
|
+
specify {
|
|
45
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").matrix ).to eq [[1,2,3,4],[5,6,7,8]]
|
|
46
|
+
}
|
|
47
|
+
specify {
|
|
48
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8") ).to be_kind_of Bioinform::MotifModel::NamedModel
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
specify {
|
|
52
|
+
expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8").name ).to eq 'Motif name'
|
|
53
|
+
}
|
|
54
|
+
specify {
|
|
55
|
+
expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8").matrix ).to eq [[1,2,3,4],[5,6,7,8]]
|
|
56
|
+
}
|
|
57
|
+
specify {
|
|
58
|
+
expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8") ).to be_kind_of Bioinform::MotifModel::NamedModel
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
context 'with custom parser' do
|
|
62
|
+
let(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :rows) }
|
|
63
|
+
specify{
|
|
64
|
+
expect( Bioinform::MotifModel::PM.from_string("1 5\n2 6\n3 7\n4 8", parser: parser).matrix ).to eq [[1,2,3,4],[5,6,7,8]]
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
context 'with custom alphabet' do
|
|
68
|
+
let(:alphabet) { Bioinform::NucleotideAlphabetWithN }
|
|
69
|
+
let(:parser) { Bioinform::MatrixParser.new(fix_nucleotides_number: alphabet.size) }
|
|
70
|
+
specify {
|
|
71
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4 10\n5 6 7 8 100", alphabet: alphabet, parser: parser).matrix ).to eq [[1,2,3,4,10],[5,6,7,8,100]]
|
|
72
|
+
}
|
|
73
|
+
specify {
|
|
74
|
+
expect( Bioinform::MotifModel::PM.from_string("1 2 3 4 10\n5 6 7 8 100", alphabet: alphabet, parser: parser).alphabet ).to eq alphabet
|
|
75
|
+
}
|
|
214
76
|
end
|
|
77
|
+
end
|
|
215
78
|
|
|
216
|
-
|
|
79
|
+
describe '.from_file' do
|
|
80
|
+
include FakeFS::SpecHelpers
|
|
81
|
+
context 'with default configuration' do
|
|
217
82
|
before :each do
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
end
|
|
221
|
-
it 'should return a hash with keys A, C, G, T' do
|
|
222
|
-
@hsh.should be_kind_of Hash
|
|
223
|
-
@hsh.keys.sort.should == %w{A C G T}
|
|
224
|
-
end
|
|
225
|
-
it 'should contain matrix elements of corresponding letter' do
|
|
226
|
-
@hsh['A'].should == [1, 1]
|
|
227
|
-
@hsh['C'].should == [2, 4]
|
|
228
|
-
@hsh['G'].should == [3, 5]
|
|
229
|
-
@hsh['T'].should == [4, 6.5]
|
|
83
|
+
File.write 'motif.pwm', ">motif name inside\n1 2 3 4\n5 6 7 8"
|
|
84
|
+
File.write 'motifNameOutside.pwm', "1 2 3 4\n5 6 7 8"
|
|
230
85
|
end
|
|
231
|
-
it 'should be accessible both by name and symbol (e.g. pm.to_hash[:A] or pm.to_hash[\'A\'] is the same)' do
|
|
232
|
-
@hsh['A'].should == @hsh[:A]
|
|
233
|
-
@hsh['C'].should == @hsh[:C]
|
|
234
|
-
@hsh['G'].should == @hsh[:G]
|
|
235
|
-
@hsh['T'].should == @hsh[:T]
|
|
236
|
-
end
|
|
237
|
-
end
|
|
238
86
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
@pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
|
|
242
|
-
end
|
|
243
|
-
context 'when pm just created' do
|
|
244
|
-
it 'should be [1,1,1,1]' do
|
|
245
|
-
@pm.background.should == [1,1,1,1]
|
|
246
|
-
end
|
|
87
|
+
specify 'obtains motif name from file content when available' do
|
|
88
|
+
expect(Bioinform::MotifModel::PM.from_file('motif.pwm').name).to eq 'motif name inside'
|
|
247
89
|
end
|
|
248
|
-
end
|
|
249
90
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
@pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
|
|
253
|
-
end
|
|
254
|
-
it 'should return pm object itself' do
|
|
255
|
-
@pm.reverse_complement!.should be_equal(@pm)
|
|
91
|
+
specify 'obtains motif name from filename when it is not available in file content' do
|
|
92
|
+
expect(Bioinform::MotifModel::PM.from_file('motifNameOutside.pwm').name).to eq 'motifNameOutside'
|
|
256
93
|
end
|
|
257
|
-
it 'should reverse matrix rows and columns' do
|
|
258
|
-
@pm.reverse_complement!
|
|
259
|
-
@pm.matrix.should == [[6.5, 5, 4, 1], [4, 3, 2, 1]]
|
|
260
|
-
end
|
|
261
|
-
end
|
|
262
94
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
end
|
|
267
|
-
it 'should return pm object itself' do
|
|
268
|
-
@pm.left_augment!(2).should be_equal(@pm)
|
|
269
|
-
end
|
|
270
|
-
it 'should add number of zero columns from the left' do
|
|
271
|
-
@pm.left_augment!(2)
|
|
272
|
-
@pm.matrix.should == [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [1, 2, 3, 4], [1, 4, 5, 6.5]]
|
|
95
|
+
specify 'obtains motif matrix correct' do
|
|
96
|
+
expect(Bioinform::MotifModel::PM.from_file('motif.pwm').matrix).to eq [[1,2,3,4],[5,6,7,8]]
|
|
97
|
+
expect(Bioinform::MotifModel::PM.from_file('motifNameOutside.pwm').matrix).to eq [[1,2,3,4],[5,6,7,8]]
|
|
273
98
|
end
|
|
274
99
|
end
|
|
275
100
|
|
|
276
|
-
|
|
277
|
-
before :each do
|
|
278
|
-
@pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
|
|
279
|
-
end
|
|
280
|
-
it 'should return pm object itself' do
|
|
281
|
-
@pm.right_augment!(2).should be_equal(@pm)
|
|
282
|
-
end
|
|
283
|
-
it 'should add number of zero columns from the right' do
|
|
284
|
-
@pm.right_augment!(2)
|
|
285
|
-
@pm.matrix.should == [[1, 2, 3, 4], [1, 4, 5, 6.5], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
|
|
286
|
-
end
|
|
287
|
-
end
|
|
101
|
+
end
|
|
288
102
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
@pm.discrete!(1)
|
|
299
|
-
@pm.matrix.should == [[2, 2, 4, 5], [7, 7, 4, 5]]
|
|
300
|
-
end
|
|
301
|
-
end
|
|
302
|
-
it 'should discrete each element of matrix multiplied by rate' do
|
|
303
|
-
@pm.discrete!(10)
|
|
304
|
-
@pm.matrix.should == [[13, 20, 32, 49], [66, 65, 33, 47]]
|
|
305
|
-
end
|
|
103
|
+
context 'with different alphabet' do
|
|
104
|
+
let(:matrix_4) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
|
|
105
|
+
let(:matrix_15) { [[1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0],
|
|
106
|
+
[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
|
|
107
|
+
specify do
|
|
108
|
+
expect{ Bioinform::MotifModel::PM.new(matrix_4, alphabet: Bioinform::IUPACAlphabet) }.to raise_error Bioinform::Error
|
|
109
|
+
end
|
|
110
|
+
specify do
|
|
111
|
+
expect{ Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }.not_to raise_error
|
|
306
112
|
end
|
|
307
113
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
@pm_3_positions = PM.new( [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6], [1, 2, 3, 4]] )
|
|
312
|
-
end
|
|
313
|
-
context 'when background is [1,1,1,1]' do
|
|
314
|
-
it 'should be equal to number of words' do
|
|
315
|
-
@pm_2_positions.vocabulary_volume.should == 4**2
|
|
316
|
-
@pm_3_positions.vocabulary_volume.should == 4**3
|
|
317
|
-
end
|
|
318
|
-
end
|
|
319
|
-
context 'when background is normalized probabilities' do
|
|
320
|
-
it 'should be 1.0' do
|
|
321
|
-
@pm_2_positions.background = [0.2, 0.3, 0.3, 0.2]
|
|
322
|
-
@pm_2_positions.vocabulary_volume.should == 1.0
|
|
114
|
+
let(:iupac_pm) { Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }
|
|
115
|
+
specify { expect(iupac_pm.matrix).to eq matrix_15 }
|
|
116
|
+
specify { expect(iupac_pm.alphabet).to eq Bioinform::IUPACAlphabet }
|
|
323
117
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
118
|
+
# A C G T AC AG AT CG CT GT ACG ACT AGT CGT ACGT
|
|
119
|
+
# 1,2,3,1.567, 12, -11, 12, 0, -1.1, 0.6, 0.4, 0.321,0.11,-1.23, 2.0
|
|
120
|
+
specify { expect(iupac_pm.complemented.matrix).to eq [[1.567,3,2,1, 0.6, -1.1, 12, 0, -11, 12, -1.23, 0.11,0.321,0.4, 2.0],
|
|
121
|
+
[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
|
|
122
|
+
specify { expect(iupac_pm.complemented.alphabet).to eq Bioinform::IUPACAlphabet }
|
|
329
123
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
|
334
|
-
@pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
|
335
|
-
end
|
|
336
|
-
it 'should return copy of object not object itself' do
|
|
337
|
-
@pm.send(meth).should_not be_equal @pm
|
|
338
|
-
end
|
|
339
|
-
it 'should == to bang-method' do
|
|
340
|
-
@pm.send(meth).to_s.should == @pm_2.send("#{meth}!").to_s
|
|
341
|
-
end
|
|
342
|
-
end
|
|
343
|
-
end
|
|
124
|
+
specify { expect(iupac_pm.reversed.matrix).to eq [[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0],
|
|
125
|
+
[1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0]] }
|
|
126
|
+
specify { expect(iupac_pm.reversed.alphabet).to eq Bioinform::IUPACAlphabet }
|
|
344
127
|
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
@pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
|
349
|
-
@pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
|
|
350
|
-
end
|
|
351
|
-
it 'should return copy of object not object itself' do
|
|
352
|
-
@pm.send(meth, 2).should_not be_equal @pm
|
|
353
|
-
end
|
|
354
|
-
it 'should == to bang-method' do
|
|
355
|
-
@pm.send(meth, 2).to_s.should == @pm_2.send("#{meth}!", 2).to_s
|
|
356
|
-
end
|
|
357
|
-
end
|
|
358
|
-
end
|
|
128
|
+
specify { expect(iupac_pm.reverse_complemented.alphabet).to eq Bioinform::IUPACAlphabet }
|
|
129
|
+
specify { expect(iupac_pm.reverse_complemented.matrix).to eq [[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0],
|
|
130
|
+
[1.567,3,2,1, 0.6, -1.1, 12, 0, -11, 12, -1.23, 0.11,0.321,0.4, 2.0]] }
|
|
359
131
|
|
|
360
|
-
describe '#consensus' do
|
|
361
|
-
let(:pm) { PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] ) }
|
|
362
|
-
it 'is a string of nucleotides of maximal weights' do
|
|
363
|
-
pm.consensus.should == 'GAC'
|
|
364
|
-
end
|
|
365
|
-
end
|
|
366
132
|
end
|
|
367
|
-
|
|
133
|
+
|
|
134
|
+
context 'valid PM' do
|
|
135
|
+
let(:matrix) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
|
|
136
|
+
let(:pm) { Bioinform::MotifModel::PM.new(matrix) }
|
|
137
|
+
specify { expect( pm.length ).to eq 3 }
|
|
138
|
+
|
|
139
|
+
specify { expect(pm.to_s).to eq("1\t2\t3\t1.567\n"+"12\t-11\t12\t0\n"+"-1.1\t0.6\t0.4\t0.321") }
|
|
140
|
+
|
|
141
|
+
specify { expect(pm).to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]]) }
|
|
142
|
+
specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]], alphabet: Bioinform::ComplementableAlphabet.new([:A,:B,:C,:D],[:D,:C,:B,:A])) }
|
|
143
|
+
specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321],[1, 2, 3, 4]]) }
|
|
144
|
+
specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[1, 2, 3, 4]]) }
|
|
145
|
+
specify { expect(pm).not_to eq [[1,2,3,1.567],[12,-11,12,0],[1, 2, 3, 4]] }
|
|
146
|
+
|
|
147
|
+
specify { expect(pm.named('motif name')).to be_kind_of Bioinform::MotifModel::NamedModel }
|
|
148
|
+
specify { expect(pm.named('motif name').model).to eq pm }
|
|
149
|
+
specify { expect(pm.named('motif name').name).to eq 'motif name' }
|
|
150
|
+
|
|
151
|
+
describe '#reversed, #complemented, #reverse_complemented' do
|
|
152
|
+
specify { expect(pm.reversed.matrix).to eq [[-1.1, 0.6, 0.4, 0.321],[12,-11,12,0],[1,2,3,1.567]] }
|
|
153
|
+
specify { expect(pm.complemented.matrix).to eq [[1.567,3,2,1],[0,12,-11,12],[0.321,0.4,0.6,-1.1]] }
|
|
154
|
+
specify { expect(pm.reverse_complemented.matrix).to eq [[0.321,0.4,0.6,-1.1],[0,12,-11,12],[1.567,3,2,1]] }
|
|
155
|
+
specify { expect(pm.revcomp.matrix).to eq [[0.321,0.4,0.6,-1.1],[0,12,-11,12],[1.567,3,2,1]] }
|
|
156
|
+
specify { expect(pm.reversed).to be_kind_of Bioinform::MotifModel::PM }
|
|
157
|
+
specify { expect(pm.complemented).to be_kind_of Bioinform::MotifModel::PM }
|
|
158
|
+
specify { expect(pm.reverse_complemented).to be_kind_of Bioinform::MotifModel::PM }
|
|
159
|
+
specify { expect(pm.revcomp).to be_kind_of Bioinform::MotifModel::PM }
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
specify { expect{|b| pm.each_position(&b) }.to yield_successive_args([1,2,3,1.567], [12,-11,12,0], [-1.1, 0.6, 0.4, 0.321]) }
|
|
163
|
+
specify { expect(pm.each_position).to be_kind_of Enumerator }
|
|
164
|
+
specify { expect{|b| pm.each_position.each(&b) }.to yield_successive_args([1,2,3,1.567], [12,-11,12,0], [-1.1, 0.6, 0.4, 0.321]) }
|
|
165
|
+
end
|
|
166
|
+
end
|