bioinform 0.1.17 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/LICENSE +0 -1
  4. data/README.md +1 -1
  5. data/TODO.txt +23 -30
  6. data/bin/convert_motif +4 -0
  7. data/bin/pcm2pwm +1 -1
  8. data/bin/split_motifs +1 -1
  9. data/bioinform.gemspec +0 -2
  10. data/lib/bioinform.rb +54 -16
  11. data/lib/bioinform/alphabet.rb +85 -0
  12. data/lib/bioinform/background.rb +90 -0
  13. data/lib/bioinform/cli.rb +1 -2
  14. data/lib/bioinform/cli/convert_motif.rb +52 -17
  15. data/lib/bioinform/cli/pcm2pwm.rb +32 -26
  16. data/lib/bioinform/cli/split_motifs.rb +31 -30
  17. data/lib/bioinform/conversion_algorithms.rb +6 -0
  18. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
  19. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
  20. data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
  21. data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
  22. data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
  23. data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
  24. data/lib/bioinform/data_models.rb +1 -7
  25. data/lib/bioinform/data_models/named_model.rb +38 -0
  26. data/lib/bioinform/data_models/pcm.rb +18 -28
  27. data/lib/bioinform/data_models/pm.rb +73 -170
  28. data/lib/bioinform/data_models/ppm.rb +11 -24
  29. data/lib/bioinform/data_models/pwm.rb +30 -56
  30. data/lib/bioinform/errors.rb +17 -0
  31. data/lib/bioinform/formatters.rb +4 -2
  32. data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
  33. data/lib/bioinform/formatters/motif_formatter.rb +69 -0
  34. data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
  35. data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
  36. data/lib/bioinform/parsers.rb +1 -8
  37. data/lib/bioinform/parsers/matrix_parser.rb +44 -36
  38. data/lib/bioinform/parsers/motif_splitter.rb +45 -0
  39. data/lib/bioinform/support.rb +46 -14
  40. data/lib/bioinform/support/strip_doc.rb +1 -1
  41. data/lib/bioinform/version.rb +1 -1
  42. data/spec/alphabet_spec.rb +79 -0
  43. data/spec/background_spec.rb +57 -0
  44. data/spec/cli/cli_spec.rb +6 -6
  45. data/spec/cli/convert_motif_spec.rb +88 -88
  46. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
  47. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
  48. data/spec/cli/pcm2pwm_spec.rb +22 -23
  49. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
  50. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
  51. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
  52. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
  53. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  54. data/spec/cli/split_motifs_spec.rb +6 -21
  55. data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
  56. data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
  57. data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
  58. data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
  59. data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
  60. data/spec/data_models/named_model_spec.rb +41 -0
  61. data/spec/data_models/pcm_spec.rb +114 -45
  62. data/spec/data_models/pm_spec.rb +132 -333
  63. data/spec/data_models/ppm_spec.rb +47 -44
  64. data/spec/data_models/pwm_spec.rb +85 -77
  65. data/spec/fabricators/motif_formats_fabricator.rb +116 -116
  66. data/spec/formatters/consensus_formatter_spec.rb +26 -0
  67. data/spec/formatters/raw_formatter_spec.rb +169 -0
  68. data/spec/parsers/matrix_parser_spec.rb +216 -0
  69. data/spec/parsers/motif_splitter_spec.rb +87 -0
  70. data/spec/spec_helper.rb +2 -2
  71. data/spec/spec_helper_source.rb +25 -5
  72. data/spec/support_spec.rb +31 -0
  73. metadata +43 -124
  74. data/bin/merge_into_collection +0 -4
  75. data/lib/bioinform/cli/merge_into_collection.rb +0 -80
  76. data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
  77. data/lib/bioinform/data_models/collection.rb +0 -75
  78. data/lib/bioinform/data_models/motif.rb +0 -56
  79. data/lib/bioinform/formatters/raw_formatter.rb +0 -41
  80. data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
  81. data/lib/bioinform/parsers/parser.rb +0 -92
  82. data/lib/bioinform/parsers/splittable_parser.rb +0 -57
  83. data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
  84. data/lib/bioinform/parsers/string_parser.rb +0 -72
  85. data/lib/bioinform/parsers/trivial_parser.rb +0 -34
  86. data/lib/bioinform/parsers/yaml_parser.rb +0 -35
  87. data/lib/bioinform/support/advanced_scan.rb +0 -8
  88. data/lib/bioinform/support/array_product.rb +0 -6
  89. data/lib/bioinform/support/array_zip.rb +0 -6
  90. data/lib/bioinform/support/collect_hash.rb +0 -7
  91. data/lib/bioinform/support/deep_dup.rb +0 -5
  92. data/lib/bioinform/support/delete_many.rb +0 -14
  93. data/lib/bioinform/support/inverf.rb +0 -13
  94. data/lib/bioinform/support/multiline_squish.rb +0 -6
  95. data/lib/bioinform/support/parameters.rb +0 -28
  96. data/lib/bioinform/support/partial_sums.rb +0 -16
  97. data/lib/bioinform/support/same_by.rb +0 -12
  98. data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
  99. data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
  100. data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
  101. data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
  102. data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
  103. data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
  104. data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
  105. data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
  106. data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
  107. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
  108. data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
  109. data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
  110. data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
  111. data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
  112. data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
  113. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
  114. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
  115. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
  116. data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
  117. data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
  118. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
  119. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
  120. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
  121. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
  122. data/spec/cli/data/split_motifs/collection.yaml +0 -188
  123. data/spec/cli/merge_into_collection_spec.rb +0 -100
  124. data/spec/data_models/collection_spec.rb +0 -98
  125. data/spec/data_models/motif_spec.rb +0 -224
  126. data/spec/fabricators/collection_fabricator.rb +0 -8
  127. data/spec/fabricators/motif_fabricator.rb +0 -33
  128. data/spec/fabricators/pcm_fabricator.rb +0 -25
  129. data/spec/fabricators/pm_fabricator.rb +0 -52
  130. data/spec/fabricators/ppm_fabricator.rb +0 -14
  131. data/spec/fabricators/pwm_fabricator.rb +0 -16
  132. data/spec/parsers/parser_spec.rb +0 -152
  133. data/spec/parsers/string_fantom_parser_spec.rb +0 -70
  134. data/spec/parsers/string_parser_spec.rb +0 -77
  135. data/spec/parsers/trivial_parser_spec.rb +0 -64
  136. data/spec/parsers/yaml_parser_spec.rb +0 -50
  137. data/spec/support/advanced_scan_spec.rb +0 -32
  138. data/spec/support/array_product_spec.rb +0 -15
  139. data/spec/support/array_zip_spec.rb +0 -15
  140. data/spec/support/collect_hash_spec.rb +0 -15
  141. data/spec/support/delete_many_spec.rb +0 -44
  142. data/spec/support/inverf_spec.rb +0 -19
  143. data/spec/support/multiline_squish_spec.rb +0 -25
  144. data/spec/support/partial_sums_spec.rb +0 -30
  145. data/spec/support/same_by_spec.rb +0 -36
@@ -1,367 +1,166 @@
1
- require_relative '../spec_helper'
2
- require_relative '../../lib/bioinform/data_models/pm'
1
+ require 'bioinform/data_models/pm'
3
2
 
4
- module Bioinform
5
- describe PM do
6
- {:as_pcm => [PCM, [[1,10,3,4],[5,6,7,0]]], :as_pwm => [PWM, [[1,2,3,4],[5,6,7,8]]], :as_ppm => [PPM, [[0.1,0.2,0.3,0.4],[0.5,0.1,0.3,0.1]]]}.each do |converter_method, (result_klass, matrix)|
7
- describe "##{converter_method}" do
8
- before :each do
9
- @collection = Collection.new(name: 'Collection 1')
10
- @matrix = matrix
11
- @name = 'PM_motif'
12
- @background = [0.2,0.3,0.3,0.2]
13
- @tags = [@collection, 'Collection 2']
14
- @pm = PM.new(matrix: matrix, name: @name, background: @background, tags: @tags)
15
- @conv_motif = @pm.send converter_method
16
- end
17
- it "should return an instance of #{result_klass}" do
18
- @conv_motif.should be_kind_of(result_klass)
19
- end
20
- it 'should return have the same matrix, name and background' do #, background and tags' do
21
- @conv_motif.matrix.should == @matrix
22
- @conv_motif.name.should == @name
23
- @conv_motif.background.should == @background
24
- # @conv_motif.tags.should == @tags
25
- end
26
- end
27
- end
28
-
29
- # describe '#tagged?' do
30
- # context 'when PM marked with Collection object' do
31
- # context 'without collection-name' do
32
- # before :each do
33
- # @marking_collection = Collection.new
34
- # @nonmarking_collection = Collection.new
35
- # @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
36
- # @pm.mark(@marking_collection)
37
- # end
38
- # it 'should be true for marking collection' do
39
- # @pm.should be_tagged(@marking_collection)
40
- # end
41
- # it 'should be false for nonmarking collection' do
42
- # @pm.should_not be_tagged(@nonmarking_collection)
43
- # end
44
- # it 'should be false for nil-name' do
45
- # @pm.should_not be_tagged(nil)
46
- # end
47
- # it 'should be false for any string' do
48
- # @pm.should_not be_tagged('Another name')
49
- # end
50
- # end
51
- # context 'with collection-name' do
52
- # before :each do
53
- # @marking_collection = Collection.new(name: 'Collection name')
54
- # @nonmarking_collection = Collection.new(name: 'Another name')
55
- # @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
56
- # @pm.mark(@marking_collection)
57
- # end
58
- # it 'should be true for marking collection' do
59
- # @pm.should be_tagged(@marking_collection)
60
- # end
61
- # it 'should be false for nonmarking collection' do
62
- # @pm.should_not be_tagged(@nonmarking_collection)
63
- # end
64
- # it 'should be true for name of marking collection' do
65
- # @pm.should be_tagged('Collection name')
66
- # end
67
- # it 'should be false for string that is not name of marking collection' do
68
- # @pm.should_not be_tagged('Another name')
69
- # end
70
- # end
71
- # end
72
-
73
- # context 'when PM marked with name' do
74
- # before :each do
75
- # @nonmarking_collection = Collection.new(name: 'Another name')
76
- # @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
77
- # @pm.mark('Mark name')
78
- # end
79
- # it 'should be true for marking name' do
80
- # @pm.should be_tagged('Mark name')
81
- # end
82
- # it 'should be false for string that is not marking name' do
83
- # @pm.should_not be_tagged('Another name')
84
- # end
85
- # it 'should be false for nonmarking collection' do
86
- # @pm.should_not be_tagged(@nonmarking_collection)
87
- # end
88
- # end
3
+ describe Bioinform::MotifModel::PM do
89
4
 
90
- # context 'when PM marked with several marks' do
91
- # before :each do
92
- # @collection_1 = Collection.new(name: 'First name')
93
- # @collection_2 = Collection.new(name: 'Second name')
94
- # @collection_3 = Collection.new(name: 'Nonmarking collection')
95
- # @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
96
- # @pm.mark(@collection_1)
97
- # @pm.mark(@collection_2)
98
- # @pm.mark('Stringy-name')
99
- # end
100
- # it 'should be true for each mark' do
101
- # @pm.should be_tagged(@collection_1)
102
- # @pm.should be_tagged(@collection_2)
103
- # @pm.should be_tagged('Stringy-name')
104
- # end
105
- # it 'should be false for not presented marks' do
106
- # @pm.should_not be_tagged(@collection_3)
107
- # @pm.should_not be_tagged('Bad stringy-name')
108
- # end
109
- # end
110
- # end
111
-
112
- describe '#==' do
113
- it 'should be true iff motifs have the same matrix, background and name' do
114
- pm = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif')
115
- pm_eq = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif')
116
- pm_neq_matrix = PM.new(matrix: [[1,2,3,4],[15,16,17,18]], name: 'First motif')
117
- pm_neq_name = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Second motif')
118
- pm_neq_background = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif').set_parameters(background: [1,2,2,1])
5
+ describe '.new' do
6
+ specify 'with matrix having more than 4 elements in a position' do
7
+ expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15,10],[-1.1, 0.6, 0.4, 0.321]]) }.to raise_error (Bioinform::Error)
8
+ end
119
9
 
120
- pm.should_not == pm_neq_matrix
121
- pm.should_not == pm_neq_name
122
- pm.should_not == pm_neq_background
123
- pm.should == pm_eq
124
- end
10
+ specify 'with matrix having less than 4 elements in a position' do
11
+ expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15],[-1.1, 0.6]]) }.to raise_error (Bioinform::Error)
125
12
  end
126
- describe '::valid_matrix?' do
127
- it 'should be true iff an argument is an array of arrays of 4 numerics in a column' do
128
- PM.valid_matrix?( [[1,2,3,4],[1,4,5,6.5]] ).should be_true
129
- PM.valid_matrix?( {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]} ).should be_false
130
- PM.valid_matrix?( [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}] ).should be_false
131
- PM.valid_matrix?( [[1,2,3,4],[1,4,6.5]] ).should be_false
132
- PM.valid_matrix?( [[1,2,3],[1,4,6.5]] ).should be_false
133
- PM.valid_matrix?( [[1,2,'3','4'],[1,'4','5',6.5]] ).should be_false
134
- end
13
+
14
+ specify 'with matrix having positions in rows, nucleotides in columns' do
15
+ expect { Bioinform::MotifModel::PM.new([[1,2,3],[10,-11,12],[-1.1, 0.6, 0.4],[5,6,7]]) }.to raise_error (Bioinform::Error)
135
16
  end
136
17
 
137
- describe '#to_s' do
138
- before :each do
139
- @pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
140
- end
141
- it 'should return string with single-tabulated multiline matrix' do
142
- @pm.to_s.should == "1\t2\t3\t4\n1\t4\t5\t6.5"
143
- end
144
- it 'should return positions in rows, letters in cols' do
145
- @pm.to_s.split("\n").size.should == 2
146
- @pm.to_s.split("\n").map{|pos| pos.split.size}.all?{|sz| sz==4}.should be_true
147
- end
148
- context 'with name specified' do
149
- before :each do
150
- @pm.name = 'Stub name'
151
- end
152
- it 'should return a string with a name and a matrix from the next line' do
153
- @pm.to_s.should == "Stub name\n1\t2\t3\t4\n1\t4\t5\t6.5"
154
- end
155
- it 'should not return a name if argument is set to false' do
156
- @pm.to_s(with_name: false).should == "1\t2\t3\t4\n1\t4\t5\t6.5"
157
- end
158
- end
159
- context 'in letters_as_rows mode' do
160
- it 'should print matrix with row-markers' do
161
- @pm.to_s(letters_as_rows: true).should == "A|1\t1\nC|2\t4\nG|3\t5\nT|4\t6.5"
162
- end
163
- end
18
+ specify 'with empty matrix' do
19
+ expect { Bioinform::MotifModel::PM.new([]) }.to raise_error (Bioinform::Error)
164
20
  end
165
21
 
166
- describe '#pretty_string' do
167
- it 'should format string with 7-chars fields' do
168
- PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should == " A C G T \n 1.0 2.0 3.0 4.0\n 5.0 6.0 7.0 8.0"
169
- end
170
- it 'should return a string of floats formatted with spaces' do
171
- PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should match(/1.0 +2.0 +3.0 +4.0 *\n *5.0 +6.0 +7.0 +8.0/)
172
- end
173
- it 'should contain first string of ACGT letters' do
174
- PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.lines.first.should match(/A +C +G +T/)
175
- end
176
- it 'should round floats upto 3 digits' do
177
- PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] ).pretty_string.should match(/1.1 +2.22 +3.333 +4.444 *\n *5.5 +6.66 +7.777 +8.889/)
178
- end
22
+ context 'with valid matrix' do
23
+ let(:matrix) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
179
24
 
180
- context 'with name specified' do
181
- before :each do
182
- @pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
183
- @pm.name = 'MyName'
184
- end
185
- it 'should contain name if parameter `with_name` isn\'t false' do
186
- @pm.pretty_string.should match(/MyName\n/)
187
- end
188
- it 'should not contain name if parameter `with_name` is false' do
189
- @pm.pretty_string(with_name: false).should_not match(/MyName\n/)
190
- end
25
+ specify do
26
+ expect{ Bioinform::MotifModel::PM.new(matrix) }.not_to raise_error
191
27
  end
192
- context 'without name specified' do
193
- before :each do
194
- @pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
195
- end
196
- it 'should not contain name whether parameter `with_name` is or isn\'t false' do
197
- @pm.pretty_string.should_not match(/MyName\n/)
198
- @pm.pretty_string(with_name: false).should_not match(/MyName\n/)
199
- end
28
+ specify do
29
+ expect( Bioinform::MotifModel::PM.new(matrix).matrix ).to eq matrix
200
30
  end
201
- context 'in letters_as_rows mode' do
202
- it 'should print matrix with row-markers as to_s do' do
203
- @pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
204
- @pm.pretty_string(letters_as_rows: true).should == @pm.to_s(letters_as_rows: true)
205
- end
31
+ specify do
32
+ expect( Bioinform::MotifModel::PM.new(matrix).alphabet ).to eq Bioinform::NucleotideAlphabet
206
33
  end
207
34
  end
35
+ end
208
36
 
209
- describe '#size' do
210
- it 'should return number of positions' do
211
- @pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
212
- @pm.size.should == 2
213
- end
37
+ describe '.from_string' do
38
+ specify {
39
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").model.class ).to eq Bioinform::MotifModel::PM
40
+ }
41
+ specify {
42
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").name ).to be_nil
43
+ }
44
+ specify {
45
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").matrix ).to eq [[1,2,3,4],[5,6,7,8]]
46
+ }
47
+ specify {
48
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8") ).to be_kind_of Bioinform::MotifModel::NamedModel
49
+ }
50
+
51
+ specify {
52
+ expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8").name ).to eq 'Motif name'
53
+ }
54
+ specify {
55
+ expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8").matrix ).to eq [[1,2,3,4],[5,6,7,8]]
56
+ }
57
+ specify {
58
+ expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8") ).to be_kind_of Bioinform::MotifModel::NamedModel
59
+ }
60
+
61
+ context 'with custom parser' do
62
+ let(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :rows) }
63
+ specify{
64
+ expect( Bioinform::MotifModel::PM.from_string("1 5\n2 6\n3 7\n4 8", parser: parser).matrix ).to eq [[1,2,3,4],[5,6,7,8]]
65
+ }
66
+ end
67
+ context 'with custom alphabet' do
68
+ let(:alphabet) { Bioinform::NucleotideAlphabetWithN }
69
+ let(:parser) { Bioinform::MatrixParser.new(fix_nucleotides_number: alphabet.size) }
70
+ specify {
71
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4 10\n5 6 7 8 100", alphabet: alphabet, parser: parser).matrix ).to eq [[1,2,3,4,10],[5,6,7,8,100]]
72
+ }
73
+ specify {
74
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4 10\n5 6 7 8 100", alphabet: alphabet, parser: parser).alphabet ).to eq alphabet
75
+ }
214
76
  end
77
+ end
215
78
 
216
- describe '#to_hash' do
79
+ describe '.from_file' do
80
+ include FakeFS::SpecHelpers
81
+ context 'with default configuration' do
217
82
  before :each do
218
- @pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
219
- @hsh = @pm.to_hash
220
- end
221
- it 'should return a hash with keys A, C, G, T' do
222
- @hsh.should be_kind_of Hash
223
- @hsh.keys.sort.should == %w{A C G T}
224
- end
225
- it 'should contain matrix elements of corresponding letter' do
226
- @hsh['A'].should == [1, 1]
227
- @hsh['C'].should == [2, 4]
228
- @hsh['G'].should == [3, 5]
229
- @hsh['T'].should == [4, 6.5]
83
+ File.write 'motif.pwm', ">motif name inside\n1 2 3 4\n5 6 7 8"
84
+ File.write 'motifNameOutside.pwm', "1 2 3 4\n5 6 7 8"
230
85
  end
231
- it 'should be accessible both by name and symbol (e.g. pm.to_hash[:A] or pm.to_hash[\'A\'] is the same)' do
232
- @hsh['A'].should == @hsh[:A]
233
- @hsh['C'].should == @hsh[:C]
234
- @hsh['G'].should == @hsh[:G]
235
- @hsh['T'].should == @hsh[:T]
236
- end
237
- end
238
86
 
239
- describe '#background' do
240
- before :each do
241
- @pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
242
- end
243
- context 'when pm just created' do
244
- it 'should be [1,1,1,1]' do
245
- @pm.background.should == [1,1,1,1]
246
- end
87
+ specify 'obtains motif name from file content when available' do
88
+ expect(Bioinform::MotifModel::PM.from_file('motif.pwm').name).to eq 'motif name inside'
247
89
  end
248
- end
249
90
 
250
- describe '#reverse_complement!' do
251
- before :each do
252
- @pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
253
- end
254
- it 'should return pm object itself' do
255
- @pm.reverse_complement!.should be_equal(@pm)
91
+ specify 'obtains motif name from filename when it is not available in file content' do
92
+ expect(Bioinform::MotifModel::PM.from_file('motifNameOutside.pwm').name).to eq 'motifNameOutside'
256
93
  end
257
- it 'should reverse matrix rows and columns' do
258
- @pm.reverse_complement!
259
- @pm.matrix.should == [[6.5, 5, 4, 1], [4, 3, 2, 1]]
260
- end
261
- end
262
94
 
263
- describe '#left_augment!' do
264
- before :each do
265
- @pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
266
- end
267
- it 'should return pm object itself' do
268
- @pm.left_augment!(2).should be_equal(@pm)
269
- end
270
- it 'should add number of zero columns from the left' do
271
- @pm.left_augment!(2)
272
- @pm.matrix.should == [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [1, 2, 3, 4], [1, 4, 5, 6.5]]
95
+ specify 'obtains motif matrix correct' do
96
+ expect(Bioinform::MotifModel::PM.from_file('motif.pwm').matrix).to eq [[1,2,3,4],[5,6,7,8]]
97
+ expect(Bioinform::MotifModel::PM.from_file('motifNameOutside.pwm').matrix).to eq [[1,2,3,4],[5,6,7,8]]
273
98
  end
274
99
  end
275
100
 
276
- describe '#right_augment!' do
277
- before :each do
278
- @pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
279
- end
280
- it 'should return pm object itself' do
281
- @pm.right_augment!(2).should be_equal(@pm)
282
- end
283
- it 'should add number of zero columns from the right' do
284
- @pm.right_augment!(2)
285
- @pm.matrix.should == [[1, 2, 3, 4], [1, 4, 5, 6.5], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
286
- end
287
- end
101
+ end
288
102
 
289
- describe '#discrete!' do
290
- before :each do
291
- @pm = PM.new( [[1.3, 2.0, 3.2, 4.9], [6.51, 6.5, 3.25, 4.633]] )
292
- end
293
- it 'should return pm object itself' do
294
- @pm.discrete!(10).should be_equal(@pm)
295
- end
296
- context 'rate is 1' do
297
- it 'should discrete each element of matrix' do
298
- @pm.discrete!(1)
299
- @pm.matrix.should == [[2, 2, 4, 5], [7, 7, 4, 5]]
300
- end
301
- end
302
- it 'should discrete each element of matrix multiplied by rate' do
303
- @pm.discrete!(10)
304
- @pm.matrix.should == [[13, 20, 32, 49], [66, 65, 33, 47]]
305
- end
103
+ context 'with different alphabet' do
104
+ let(:matrix_4) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
105
+ let(:matrix_15) { [[1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0],
106
+ [0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
107
+ specify do
108
+ expect{ Bioinform::MotifModel::PM.new(matrix_4, alphabet: Bioinform::IUPACAlphabet) }.to raise_error Bioinform::Error
109
+ end
110
+ specify do
111
+ expect{ Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }.not_to raise_error
306
112
  end
307
113
 
308
- describe '#vocabulary_volume' do
309
- before :each do
310
- @pm_2_positions = PM.new( [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6]] )
311
- @pm_3_positions = PM.new( [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6], [1, 2, 3, 4]] )
312
- end
313
- context 'when background is [1,1,1,1]' do
314
- it 'should be equal to number of words' do
315
- @pm_2_positions.vocabulary_volume.should == 4**2
316
- @pm_3_positions.vocabulary_volume.should == 4**3
317
- end
318
- end
319
- context 'when background is normalized probabilities' do
320
- it 'should be 1.0' do
321
- @pm_2_positions.background = [0.2, 0.3, 0.3, 0.2]
322
- @pm_2_positions.vocabulary_volume.should == 1.0
114
+ let(:iupac_pm) { Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }
115
+ specify { expect(iupac_pm.matrix).to eq matrix_15 }
116
+ specify { expect(iupac_pm.alphabet).to eq Bioinform::IUPACAlphabet }
323
117
 
324
- @pm_3_positions.background = [0.2, 0.3, 0.3, 0.2]
325
- @pm_3_positions.vocabulary_volume.should == 1.0
326
- end
327
- end
328
- end
118
+ # A C G T AC AG AT CG CT GT ACG ACT AGT CGT ACGT
119
+ # 1,2,3,1.567, 12, -11, 12, 0, -1.1, 0.6, 0.4, 0.321,0.11,-1.23, 2.0
120
+ specify { expect(iupac_pm.complemented.matrix).to eq [[1.567,3,2,1, 0.6, -1.1, 12, 0, -11, 12, -1.23, 0.11,0.321,0.4, 2.0],
121
+ [0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
122
+ specify { expect(iupac_pm.complemented.alphabet).to eq Bioinform::IUPACAlphabet }
329
123
 
330
- [:reverse_complement].each do |meth|
331
- describe "nonbang method #{meth}" do
332
- before :each do
333
- @pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
334
- @pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
335
- end
336
- it 'should return copy of object not object itself' do
337
- @pm.send(meth).should_not be_equal @pm
338
- end
339
- it 'should == to bang-method' do
340
- @pm.send(meth).to_s.should == @pm_2.send("#{meth}!").to_s
341
- end
342
- end
343
- end
124
+ specify { expect(iupac_pm.reversed.matrix).to eq [[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0],
125
+ [1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0]] }
126
+ specify { expect(iupac_pm.reversed.alphabet).to eq Bioinform::IUPACAlphabet }
344
127
 
345
- [:discrete , :left_augment, :right_augment].each do |meth|
346
- describe "nonbang method #{meth}" do
347
- before :each do
348
- @pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
349
- @pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
350
- end
351
- it 'should return copy of object not object itself' do
352
- @pm.send(meth, 2).should_not be_equal @pm
353
- end
354
- it 'should == to bang-method' do
355
- @pm.send(meth, 2).to_s.should == @pm_2.send("#{meth}!", 2).to_s
356
- end
357
- end
358
- end
128
+ specify { expect(iupac_pm.reverse_complemented.alphabet).to eq Bioinform::IUPACAlphabet }
129
+ specify { expect(iupac_pm.reverse_complemented.matrix).to eq [[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0],
130
+ [1.567,3,2,1, 0.6, -1.1, 12, 0, -11, 12, -1.23, 0.11,0.321,0.4, 2.0]] }
359
131
 
360
- describe '#consensus' do
361
- let(:pm) { PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] ) }
362
- it 'is a string of nucleotides of maximal weights' do
363
- pm.consensus.should == 'GAC'
364
- end
365
- end
366
132
  end
367
- end
133
+
134
+ context 'valid PM' do
135
+ let(:matrix) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
136
+ let(:pm) { Bioinform::MotifModel::PM.new(matrix) }
137
+ specify { expect( pm.length ).to eq 3 }
138
+
139
+ specify { expect(pm.to_s).to eq("1\t2\t3\t1.567\n"+"12\t-11\t12\t0\n"+"-1.1\t0.6\t0.4\t0.321") }
140
+
141
+ specify { expect(pm).to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]]) }
142
+ specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]], alphabet: Bioinform::ComplementableAlphabet.new([:A,:B,:C,:D],[:D,:C,:B,:A])) }
143
+ specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321],[1, 2, 3, 4]]) }
144
+ specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[1, 2, 3, 4]]) }
145
+ specify { expect(pm).not_to eq [[1,2,3,1.567],[12,-11,12,0],[1, 2, 3, 4]] }
146
+
147
+ specify { expect(pm.named('motif name')).to be_kind_of Bioinform::MotifModel::NamedModel }
148
+ specify { expect(pm.named('motif name').model).to eq pm }
149
+ specify { expect(pm.named('motif name').name).to eq 'motif name' }
150
+
151
+ describe '#reversed, #complemented, #reverse_complemented' do
152
+ specify { expect(pm.reversed.matrix).to eq [[-1.1, 0.6, 0.4, 0.321],[12,-11,12,0],[1,2,3,1.567]] }
153
+ specify { expect(pm.complemented.matrix).to eq [[1.567,3,2,1],[0,12,-11,12],[0.321,0.4,0.6,-1.1]] }
154
+ specify { expect(pm.reverse_complemented.matrix).to eq [[0.321,0.4,0.6,-1.1],[0,12,-11,12],[1.567,3,2,1]] }
155
+ specify { expect(pm.revcomp.matrix).to eq [[0.321,0.4,0.6,-1.1],[0,12,-11,12],[1.567,3,2,1]] }
156
+ specify { expect(pm.reversed).to be_kind_of Bioinform::MotifModel::PM }
157
+ specify { expect(pm.complemented).to be_kind_of Bioinform::MotifModel::PM }
158
+ specify { expect(pm.reverse_complemented).to be_kind_of Bioinform::MotifModel::PM }
159
+ specify { expect(pm.revcomp).to be_kind_of Bioinform::MotifModel::PM }
160
+ end
161
+
162
+ specify { expect{|b| pm.each_position(&b) }.to yield_successive_args([1,2,3,1.567], [12,-11,12,0], [-1.1, 0.6, 0.4, 0.321]) }
163
+ specify { expect(pm.each_position).to be_kind_of Enumerator }
164
+ specify { expect{|b| pm.each_position.each(&b) }.to yield_successive_args([1,2,3,1.567], [12,-11,12,0], [-1.1, 0.6, 0.4, 0.321]) }
165
+ end
166
+ end