bioinform 0.1.17 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/LICENSE +0 -1
  4. data/README.md +1 -1
  5. data/TODO.txt +23 -30
  6. data/bin/convert_motif +4 -0
  7. data/bin/pcm2pwm +1 -1
  8. data/bin/split_motifs +1 -1
  9. data/bioinform.gemspec +0 -2
  10. data/lib/bioinform.rb +54 -16
  11. data/lib/bioinform/alphabet.rb +85 -0
  12. data/lib/bioinform/background.rb +90 -0
  13. data/lib/bioinform/cli.rb +1 -2
  14. data/lib/bioinform/cli/convert_motif.rb +52 -17
  15. data/lib/bioinform/cli/pcm2pwm.rb +32 -26
  16. data/lib/bioinform/cli/split_motifs.rb +31 -30
  17. data/lib/bioinform/conversion_algorithms.rb +6 -0
  18. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
  19. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
  20. data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
  21. data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
  22. data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
  23. data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
  24. data/lib/bioinform/data_models.rb +1 -7
  25. data/lib/bioinform/data_models/named_model.rb +38 -0
  26. data/lib/bioinform/data_models/pcm.rb +18 -28
  27. data/lib/bioinform/data_models/pm.rb +73 -170
  28. data/lib/bioinform/data_models/ppm.rb +11 -24
  29. data/lib/bioinform/data_models/pwm.rb +30 -56
  30. data/lib/bioinform/errors.rb +17 -0
  31. data/lib/bioinform/formatters.rb +4 -2
  32. data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
  33. data/lib/bioinform/formatters/motif_formatter.rb +69 -0
  34. data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
  35. data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
  36. data/lib/bioinform/parsers.rb +1 -8
  37. data/lib/bioinform/parsers/matrix_parser.rb +44 -36
  38. data/lib/bioinform/parsers/motif_splitter.rb +45 -0
  39. data/lib/bioinform/support.rb +46 -14
  40. data/lib/bioinform/support/strip_doc.rb +1 -1
  41. data/lib/bioinform/version.rb +1 -1
  42. data/spec/alphabet_spec.rb +79 -0
  43. data/spec/background_spec.rb +57 -0
  44. data/spec/cli/cli_spec.rb +6 -6
  45. data/spec/cli/convert_motif_spec.rb +88 -88
  46. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
  47. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
  48. data/spec/cli/pcm2pwm_spec.rb +22 -23
  49. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
  50. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
  51. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
  52. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
  53. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  54. data/spec/cli/split_motifs_spec.rb +6 -21
  55. data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
  56. data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
  57. data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
  58. data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
  59. data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
  60. data/spec/data_models/named_model_spec.rb +41 -0
  61. data/spec/data_models/pcm_spec.rb +114 -45
  62. data/spec/data_models/pm_spec.rb +132 -333
  63. data/spec/data_models/ppm_spec.rb +47 -44
  64. data/spec/data_models/pwm_spec.rb +85 -77
  65. data/spec/fabricators/motif_formats_fabricator.rb +116 -116
  66. data/spec/formatters/consensus_formatter_spec.rb +26 -0
  67. data/spec/formatters/raw_formatter_spec.rb +169 -0
  68. data/spec/parsers/matrix_parser_spec.rb +216 -0
  69. data/spec/parsers/motif_splitter_spec.rb +87 -0
  70. data/spec/spec_helper.rb +2 -2
  71. data/spec/spec_helper_source.rb +25 -5
  72. data/spec/support_spec.rb +31 -0
  73. metadata +43 -124
  74. data/bin/merge_into_collection +0 -4
  75. data/lib/bioinform/cli/merge_into_collection.rb +0 -80
  76. data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
  77. data/lib/bioinform/data_models/collection.rb +0 -75
  78. data/lib/bioinform/data_models/motif.rb +0 -56
  79. data/lib/bioinform/formatters/raw_formatter.rb +0 -41
  80. data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
  81. data/lib/bioinform/parsers/parser.rb +0 -92
  82. data/lib/bioinform/parsers/splittable_parser.rb +0 -57
  83. data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
  84. data/lib/bioinform/parsers/string_parser.rb +0 -72
  85. data/lib/bioinform/parsers/trivial_parser.rb +0 -34
  86. data/lib/bioinform/parsers/yaml_parser.rb +0 -35
  87. data/lib/bioinform/support/advanced_scan.rb +0 -8
  88. data/lib/bioinform/support/array_product.rb +0 -6
  89. data/lib/bioinform/support/array_zip.rb +0 -6
  90. data/lib/bioinform/support/collect_hash.rb +0 -7
  91. data/lib/bioinform/support/deep_dup.rb +0 -5
  92. data/lib/bioinform/support/delete_many.rb +0 -14
  93. data/lib/bioinform/support/inverf.rb +0 -13
  94. data/lib/bioinform/support/multiline_squish.rb +0 -6
  95. data/lib/bioinform/support/parameters.rb +0 -28
  96. data/lib/bioinform/support/partial_sums.rb +0 -16
  97. data/lib/bioinform/support/same_by.rb +0 -12
  98. data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
  99. data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
  100. data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
  101. data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
  102. data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
  103. data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
  104. data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
  105. data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
  106. data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
  107. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
  108. data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
  109. data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
  110. data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
  111. data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
  112. data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
  113. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
  114. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
  115. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
  116. data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
  117. data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
  118. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
  119. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
  120. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
  121. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
  122. data/spec/cli/data/split_motifs/collection.yaml +0 -188
  123. data/spec/cli/merge_into_collection_spec.rb +0 -100
  124. data/spec/data_models/collection_spec.rb +0 -98
  125. data/spec/data_models/motif_spec.rb +0 -224
  126. data/spec/fabricators/collection_fabricator.rb +0 -8
  127. data/spec/fabricators/motif_fabricator.rb +0 -33
  128. data/spec/fabricators/pcm_fabricator.rb +0 -25
  129. data/spec/fabricators/pm_fabricator.rb +0 -52
  130. data/spec/fabricators/ppm_fabricator.rb +0 -14
  131. data/spec/fabricators/pwm_fabricator.rb +0 -16
  132. data/spec/parsers/parser_spec.rb +0 -152
  133. data/spec/parsers/string_fantom_parser_spec.rb +0 -70
  134. data/spec/parsers/string_parser_spec.rb +0 -77
  135. data/spec/parsers/trivial_parser_spec.rb +0 -64
  136. data/spec/parsers/yaml_parser_spec.rb +0 -50
  137. data/spec/support/advanced_scan_spec.rb +0 -32
  138. data/spec/support/array_product_spec.rb +0 -15
  139. data/spec/support/array_zip_spec.rb +0 -15
  140. data/spec/support/collect_hash_spec.rb +0 -15
  141. data/spec/support/delete_many_spec.rb +0 -44
  142. data/spec/support/inverf_spec.rb +0 -19
  143. data/spec/support/multiline_squish_spec.rb +0 -25
  144. data/spec/support/partial_sums_spec.rb +0 -30
  145. data/spec/support/same_by_spec.rb +0 -36
@@ -1,367 +1,166 @@
1
- require_relative '../spec_helper'
2
- require_relative '../../lib/bioinform/data_models/pm'
1
+ require 'bioinform/data_models/pm'
3
2
 
4
- module Bioinform
5
- describe PM do
6
- {:as_pcm => [PCM, [[1,10,3,4],[5,6,7,0]]], :as_pwm => [PWM, [[1,2,3,4],[5,6,7,8]]], :as_ppm => [PPM, [[0.1,0.2,0.3,0.4],[0.5,0.1,0.3,0.1]]]}.each do |converter_method, (result_klass, matrix)|
7
- describe "##{converter_method}" do
8
- before :each do
9
- @collection = Collection.new(name: 'Collection 1')
10
- @matrix = matrix
11
- @name = 'PM_motif'
12
- @background = [0.2,0.3,0.3,0.2]
13
- @tags = [@collection, 'Collection 2']
14
- @pm = PM.new(matrix: matrix, name: @name, background: @background, tags: @tags)
15
- @conv_motif = @pm.send converter_method
16
- end
17
- it "should return an instance of #{result_klass}" do
18
- @conv_motif.should be_kind_of(result_klass)
19
- end
20
- it 'should return have the same matrix, name and background' do #, background and tags' do
21
- @conv_motif.matrix.should == @matrix
22
- @conv_motif.name.should == @name
23
- @conv_motif.background.should == @background
24
- # @conv_motif.tags.should == @tags
25
- end
26
- end
27
- end
28
-
29
- # describe '#tagged?' do
30
- # context 'when PM marked with Collection object' do
31
- # context 'without collection-name' do
32
- # before :each do
33
- # @marking_collection = Collection.new
34
- # @nonmarking_collection = Collection.new
35
- # @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
36
- # @pm.mark(@marking_collection)
37
- # end
38
- # it 'should be true for marking collection' do
39
- # @pm.should be_tagged(@marking_collection)
40
- # end
41
- # it 'should be false for nonmarking collection' do
42
- # @pm.should_not be_tagged(@nonmarking_collection)
43
- # end
44
- # it 'should be false for nil-name' do
45
- # @pm.should_not be_tagged(nil)
46
- # end
47
- # it 'should be false for any string' do
48
- # @pm.should_not be_tagged('Another name')
49
- # end
50
- # end
51
- # context 'with collection-name' do
52
- # before :each do
53
- # @marking_collection = Collection.new(name: 'Collection name')
54
- # @nonmarking_collection = Collection.new(name: 'Another name')
55
- # @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
56
- # @pm.mark(@marking_collection)
57
- # end
58
- # it 'should be true for marking collection' do
59
- # @pm.should be_tagged(@marking_collection)
60
- # end
61
- # it 'should be false for nonmarking collection' do
62
- # @pm.should_not be_tagged(@nonmarking_collection)
63
- # end
64
- # it 'should be true for name of marking collection' do
65
- # @pm.should be_tagged('Collection name')
66
- # end
67
- # it 'should be false for string that is not name of marking collection' do
68
- # @pm.should_not be_tagged('Another name')
69
- # end
70
- # end
71
- # end
72
-
73
- # context 'when PM marked with name' do
74
- # before :each do
75
- # @nonmarking_collection = Collection.new(name: 'Another name')
76
- # @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
77
- # @pm.mark('Mark name')
78
- # end
79
- # it 'should be true for marking name' do
80
- # @pm.should be_tagged('Mark name')
81
- # end
82
- # it 'should be false for string that is not marking name' do
83
- # @pm.should_not be_tagged('Another name')
84
- # end
85
- # it 'should be false for nonmarking collection' do
86
- # @pm.should_not be_tagged(@nonmarking_collection)
87
- # end
88
- # end
3
+ describe Bioinform::MotifModel::PM do
89
4
 
90
- # context 'when PM marked with several marks' do
91
- # before :each do
92
- # @collection_1 = Collection.new(name: 'First name')
93
- # @collection_2 = Collection.new(name: 'Second name')
94
- # @collection_3 = Collection.new(name: 'Nonmarking collection')
95
- # @pm = PM.new(matrix:[[1,1,1,1]], name:'Motif name')
96
- # @pm.mark(@collection_1)
97
- # @pm.mark(@collection_2)
98
- # @pm.mark('Stringy-name')
99
- # end
100
- # it 'should be true for each mark' do
101
- # @pm.should be_tagged(@collection_1)
102
- # @pm.should be_tagged(@collection_2)
103
- # @pm.should be_tagged('Stringy-name')
104
- # end
105
- # it 'should be false for not presented marks' do
106
- # @pm.should_not be_tagged(@collection_3)
107
- # @pm.should_not be_tagged('Bad stringy-name')
108
- # end
109
- # end
110
- # end
111
-
112
- describe '#==' do
113
- it 'should be true iff motifs have the same matrix, background and name' do
114
- pm = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif')
115
- pm_eq = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif')
116
- pm_neq_matrix = PM.new(matrix: [[1,2,3,4],[15,16,17,18]], name: 'First motif')
117
- pm_neq_name = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Second motif')
118
- pm_neq_background = PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'First motif').set_parameters(background: [1,2,2,1])
5
+ describe '.new' do
6
+ specify 'with matrix having more than 4 elements in a position' do
7
+ expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15,10],[-1.1, 0.6, 0.4, 0.321]]) }.to raise_error (Bioinform::Error)
8
+ end
119
9
 
120
- pm.should_not == pm_neq_matrix
121
- pm.should_not == pm_neq_name
122
- pm.should_not == pm_neq_background
123
- pm.should == pm_eq
124
- end
10
+ specify 'with matrix having less than 4 elements in a position' do
11
+ expect { Bioinform::MotifModel::PM.new([[1,2,3,1.567],[10,11,12,15],[-1.1, 0.6]]) }.to raise_error (Bioinform::Error)
125
12
  end
126
- describe '::valid_matrix?' do
127
- it 'should be true iff an argument is an array of arrays of 4 numerics in a column' do
128
- PM.valid_matrix?( [[1,2,3,4],[1,4,5,6.5]] ).should be_true
129
- PM.valid_matrix?( {A: [1,1], C: [2,4], G: [3,5], T: [4, 6.5]} ).should be_false
130
- PM.valid_matrix?( [{A:1,C:2,G:3,T:4},{A:1,C:4,G:5,T: 6.5}] ).should be_false
131
- PM.valid_matrix?( [[1,2,3,4],[1,4,6.5]] ).should be_false
132
- PM.valid_matrix?( [[1,2,3],[1,4,6.5]] ).should be_false
133
- PM.valid_matrix?( [[1,2,'3','4'],[1,'4','5',6.5]] ).should be_false
134
- end
13
+
14
+ specify 'with matrix having positions in rows, nucleotides in columns' do
15
+ expect { Bioinform::MotifModel::PM.new([[1,2,3],[10,-11,12],[-1.1, 0.6, 0.4],[5,6,7]]) }.to raise_error (Bioinform::Error)
135
16
  end
136
17
 
137
- describe '#to_s' do
138
- before :each do
139
- @pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
140
- end
141
- it 'should return string with single-tabulated multiline matrix' do
142
- @pm.to_s.should == "1\t2\t3\t4\n1\t4\t5\t6.5"
143
- end
144
- it 'should return positions in rows, letters in cols' do
145
- @pm.to_s.split("\n").size.should == 2
146
- @pm.to_s.split("\n").map{|pos| pos.split.size}.all?{|sz| sz==4}.should be_true
147
- end
148
- context 'with name specified' do
149
- before :each do
150
- @pm.name = 'Stub name'
151
- end
152
- it 'should return a string with a name and a matrix from the next line' do
153
- @pm.to_s.should == "Stub name\n1\t2\t3\t4\n1\t4\t5\t6.5"
154
- end
155
- it 'should not return a name if argument is set to false' do
156
- @pm.to_s(with_name: false).should == "1\t2\t3\t4\n1\t4\t5\t6.5"
157
- end
158
- end
159
- context 'in letters_as_rows mode' do
160
- it 'should print matrix with row-markers' do
161
- @pm.to_s(letters_as_rows: true).should == "A|1\t1\nC|2\t4\nG|3\t5\nT|4\t6.5"
162
- end
163
- end
18
+ specify 'with empty matrix' do
19
+ expect { Bioinform::MotifModel::PM.new([]) }.to raise_error (Bioinform::Error)
164
20
  end
165
21
 
166
- describe '#pretty_string' do
167
- it 'should format string with 7-chars fields' do
168
- PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should == " A C G T \n 1.0 2.0 3.0 4.0\n 5.0 6.0 7.0 8.0"
169
- end
170
- it 'should return a string of floats formatted with spaces' do
171
- PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.should match(/1.0 +2.0 +3.0 +4.0 *\n *5.0 +6.0 +7.0 +8.0/)
172
- end
173
- it 'should contain first string of ACGT letters' do
174
- PM.new( [[1,2,3,4],[5,6,7,8]] ).pretty_string.lines.first.should match(/A +C +G +T/)
175
- end
176
- it 'should round floats upto 3 digits' do
177
- PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] ).pretty_string.should match(/1.1 +2.22 +3.333 +4.444 *\n *5.5 +6.66 +7.777 +8.889/)
178
- end
22
+ context 'with valid matrix' do
23
+ let(:matrix) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
179
24
 
180
- context 'with name specified' do
181
- before :each do
182
- @pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
183
- @pm.name = 'MyName'
184
- end
185
- it 'should contain name if parameter `with_name` isn\'t false' do
186
- @pm.pretty_string.should match(/MyName\n/)
187
- end
188
- it 'should not contain name if parameter `with_name` is false' do
189
- @pm.pretty_string(with_name: false).should_not match(/MyName\n/)
190
- end
25
+ specify do
26
+ expect{ Bioinform::MotifModel::PM.new(matrix) }.not_to raise_error
191
27
  end
192
- context 'without name specified' do
193
- before :each do
194
- @pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
195
- end
196
- it 'should not contain name whether parameter `with_name` is or isn\'t false' do
197
- @pm.pretty_string.should_not match(/MyName\n/)
198
- @pm.pretty_string(with_name: false).should_not match(/MyName\n/)
199
- end
28
+ specify do
29
+ expect( Bioinform::MotifModel::PM.new(matrix).matrix ).to eq matrix
200
30
  end
201
- context 'in letters_as_rows mode' do
202
- it 'should print matrix with row-markers as to_s do' do
203
- @pm = PM.new( [[1.1,2.22,3.333,4.4444],[5.5,6.66,7.777,8.8888]] )
204
- @pm.pretty_string(letters_as_rows: true).should == @pm.to_s(letters_as_rows: true)
205
- end
31
+ specify do
32
+ expect( Bioinform::MotifModel::PM.new(matrix).alphabet ).to eq Bioinform::NucleotideAlphabet
206
33
  end
207
34
  end
35
+ end
208
36
 
209
- describe '#size' do
210
- it 'should return number of positions' do
211
- @pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
212
- @pm.size.should == 2
213
- end
37
+ describe '.from_string' do
38
+ specify {
39
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").model.class ).to eq Bioinform::MotifModel::PM
40
+ }
41
+ specify {
42
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").name ).to be_nil
43
+ }
44
+ specify {
45
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8").matrix ).to eq [[1,2,3,4],[5,6,7,8]]
46
+ }
47
+ specify {
48
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4\n5 6 7 8") ).to be_kind_of Bioinform::MotifModel::NamedModel
49
+ }
50
+
51
+ specify {
52
+ expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8").name ).to eq 'Motif name'
53
+ }
54
+ specify {
55
+ expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8").matrix ).to eq [[1,2,3,4],[5,6,7,8]]
56
+ }
57
+ specify {
58
+ expect( Bioinform::MotifModel::PM.from_string(">Motif name\n1 2 3 4\n5 6 7 8") ).to be_kind_of Bioinform::MotifModel::NamedModel
59
+ }
60
+
61
+ context 'with custom parser' do
62
+ let(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :rows) }
63
+ specify{
64
+ expect( Bioinform::MotifModel::PM.from_string("1 5\n2 6\n3 7\n4 8", parser: parser).matrix ).to eq [[1,2,3,4],[5,6,7,8]]
65
+ }
66
+ end
67
+ context 'with custom alphabet' do
68
+ let(:alphabet) { Bioinform::NucleotideAlphabetWithN }
69
+ let(:parser) { Bioinform::MatrixParser.new(fix_nucleotides_number: alphabet.size) }
70
+ specify {
71
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4 10\n5 6 7 8 100", alphabet: alphabet, parser: parser).matrix ).to eq [[1,2,3,4,10],[5,6,7,8,100]]
72
+ }
73
+ specify {
74
+ expect( Bioinform::MotifModel::PM.from_string("1 2 3 4 10\n5 6 7 8 100", alphabet: alphabet, parser: parser).alphabet ).to eq alphabet
75
+ }
214
76
  end
77
+ end
215
78
 
216
- describe '#to_hash' do
79
+ describe '.from_file' do
80
+ include FakeFS::SpecHelpers
81
+ context 'with default configuration' do
217
82
  before :each do
218
- @pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
219
- @hsh = @pm.to_hash
220
- end
221
- it 'should return a hash with keys A, C, G, T' do
222
- @hsh.should be_kind_of Hash
223
- @hsh.keys.sort.should == %w{A C G T}
224
- end
225
- it 'should contain matrix elements of corresponding letter' do
226
- @hsh['A'].should == [1, 1]
227
- @hsh['C'].should == [2, 4]
228
- @hsh['G'].should == [3, 5]
229
- @hsh['T'].should == [4, 6.5]
83
+ File.write 'motif.pwm', ">motif name inside\n1 2 3 4\n5 6 7 8"
84
+ File.write 'motifNameOutside.pwm', "1 2 3 4\n5 6 7 8"
230
85
  end
231
- it 'should be accessible both by name and symbol (e.g. pm.to_hash[:A] or pm.to_hash[\'A\'] is the same)' do
232
- @hsh['A'].should == @hsh[:A]
233
- @hsh['C'].should == @hsh[:C]
234
- @hsh['G'].should == @hsh[:G]
235
- @hsh['T'].should == @hsh[:T]
236
- end
237
- end
238
86
 
239
- describe '#background' do
240
- before :each do
241
- @pm = PM.new( [[1,2,3,4],[1,4,5,6.5]] )
242
- end
243
- context 'when pm just created' do
244
- it 'should be [1,1,1,1]' do
245
- @pm.background.should == [1,1,1,1]
246
- end
87
+ specify 'obtains motif name from file content when available' do
88
+ expect(Bioinform::MotifModel::PM.from_file('motif.pwm').name).to eq 'motif name inside'
247
89
  end
248
- end
249
90
 
250
- describe '#reverse_complement!' do
251
- before :each do
252
- @pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
253
- end
254
- it 'should return pm object itself' do
255
- @pm.reverse_complement!.should be_equal(@pm)
91
+ specify 'obtains motif name from filename when it is not available in file content' do
92
+ expect(Bioinform::MotifModel::PM.from_file('motifNameOutside.pwm').name).to eq 'motifNameOutside'
256
93
  end
257
- it 'should reverse matrix rows and columns' do
258
- @pm.reverse_complement!
259
- @pm.matrix.should == [[6.5, 5, 4, 1], [4, 3, 2, 1]]
260
- end
261
- end
262
94
 
263
- describe '#left_augment!' do
264
- before :each do
265
- @pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
266
- end
267
- it 'should return pm object itself' do
268
- @pm.left_augment!(2).should be_equal(@pm)
269
- end
270
- it 'should add number of zero columns from the left' do
271
- @pm.left_augment!(2)
272
- @pm.matrix.should == [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [1, 2, 3, 4], [1, 4, 5, 6.5]]
95
+ specify 'obtains motif matrix correct' do
96
+ expect(Bioinform::MotifModel::PM.from_file('motif.pwm').matrix).to eq [[1,2,3,4],[5,6,7,8]]
97
+ expect(Bioinform::MotifModel::PM.from_file('motifNameOutside.pwm').matrix).to eq [[1,2,3,4],[5,6,7,8]]
273
98
  end
274
99
  end
275
100
 
276
- describe '#right_augment!' do
277
- before :each do
278
- @pm = PM.new( [[1, 2, 3, 4], [1, 4, 5, 6.5]] )
279
- end
280
- it 'should return pm object itself' do
281
- @pm.right_augment!(2).should be_equal(@pm)
282
- end
283
- it 'should add number of zero columns from the right' do
284
- @pm.right_augment!(2)
285
- @pm.matrix.should == [[1, 2, 3, 4], [1, 4, 5, 6.5], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
286
- end
287
- end
101
+ end
288
102
 
289
- describe '#discrete!' do
290
- before :each do
291
- @pm = PM.new( [[1.3, 2.0, 3.2, 4.9], [6.51, 6.5, 3.25, 4.633]] )
292
- end
293
- it 'should return pm object itself' do
294
- @pm.discrete!(10).should be_equal(@pm)
295
- end
296
- context 'rate is 1' do
297
- it 'should discrete each element of matrix' do
298
- @pm.discrete!(1)
299
- @pm.matrix.should == [[2, 2, 4, 5], [7, 7, 4, 5]]
300
- end
301
- end
302
- it 'should discrete each element of matrix multiplied by rate' do
303
- @pm.discrete!(10)
304
- @pm.matrix.should == [[13, 20, 32, 49], [66, 65, 33, 47]]
305
- end
103
+ context 'with different alphabet' do
104
+ let(:matrix_4) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
105
+ let(:matrix_15) { [[1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0],
106
+ [0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
107
+ specify do
108
+ expect{ Bioinform::MotifModel::PM.new(matrix_4, alphabet: Bioinform::IUPACAlphabet) }.to raise_error Bioinform::Error
109
+ end
110
+ specify do
111
+ expect{ Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }.not_to raise_error
306
112
  end
307
113
 
308
- describe '#vocabulary_volume' do
309
- before :each do
310
- @pm_2_positions = PM.new( [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6]] )
311
- @pm_3_positions = PM.new( [[1.3, 2.0, 3.2, 4.9], [5.0, 6.5, 3.2, 4.6], [1, 2, 3, 4]] )
312
- end
313
- context 'when background is [1,1,1,1]' do
314
- it 'should be equal to number of words' do
315
- @pm_2_positions.vocabulary_volume.should == 4**2
316
- @pm_3_positions.vocabulary_volume.should == 4**3
317
- end
318
- end
319
- context 'when background is normalized probabilities' do
320
- it 'should be 1.0' do
321
- @pm_2_positions.background = [0.2, 0.3, 0.3, 0.2]
322
- @pm_2_positions.vocabulary_volume.should == 1.0
114
+ let(:iupac_pm) { Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }
115
+ specify { expect(iupac_pm.matrix).to eq matrix_15 }
116
+ specify { expect(iupac_pm.alphabet).to eq Bioinform::IUPACAlphabet }
323
117
 
324
- @pm_3_positions.background = [0.2, 0.3, 0.3, 0.2]
325
- @pm_3_positions.vocabulary_volume.should == 1.0
326
- end
327
- end
328
- end
118
+ # A C G T AC AG AT CG CT GT ACG ACT AGT CGT ACGT
119
+ # 1,2,3,1.567, 12, -11, 12, 0, -1.1, 0.6, 0.4, 0.321,0.11,-1.23, 2.0
120
+ specify { expect(iupac_pm.complemented.matrix).to eq [[1.567,3,2,1, 0.6, -1.1, 12, 0, -11, 12, -1.23, 0.11,0.321,0.4, 2.0],
121
+ [0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
122
+ specify { expect(iupac_pm.complemented.alphabet).to eq Bioinform::IUPACAlphabet }
329
123
 
330
- [:reverse_complement].each do |meth|
331
- describe "nonbang method #{meth}" do
332
- before :each do
333
- @pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
334
- @pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
335
- end
336
- it 'should return copy of object not object itself' do
337
- @pm.send(meth).should_not be_equal @pm
338
- end
339
- it 'should == to bang-method' do
340
- @pm.send(meth).to_s.should == @pm_2.send("#{meth}!").to_s
341
- end
342
- end
343
- end
124
+ specify { expect(iupac_pm.reversed.matrix).to eq [[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0],
125
+ [1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0]] }
126
+ specify { expect(iupac_pm.reversed.alphabet).to eq Bioinform::IUPACAlphabet }
344
127
 
345
- [:discrete , :left_augment, :right_augment].each do |meth|
346
- describe "nonbang method #{meth}" do
347
- before :each do
348
- @pm = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
349
- @pm_2 = PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -1.0, -1.5, -1.0]] )
350
- end
351
- it 'should return copy of object not object itself' do
352
- @pm.send(meth, 2).should_not be_equal @pm
353
- end
354
- it 'should == to bang-method' do
355
- @pm.send(meth, 2).to_s.should == @pm_2.send("#{meth}!", 2).to_s
356
- end
357
- end
358
- end
128
+ specify { expect(iupac_pm.reverse_complemented.alphabet).to eq Bioinform::IUPACAlphabet }
129
+ specify { expect(iupac_pm.reverse_complemented.matrix).to eq [[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0],
130
+ [1.567,3,2,1, 0.6, -1.1, 12, 0, -11, 12, -1.23, 0.11,0.321,0.4, 2.0]] }
359
131
 
360
- describe '#consensus' do
361
- let(:pm) { PM.new( [[1.3, 2.0, 4.9, 3.2], [7.13, 6.5, 3.25, 4.633], [-1.0, -0.5, -1.5, -1.0]] ) }
362
- it 'is a string of nucleotides of maximal weights' do
363
- pm.consensus.should == 'GAC'
364
- end
365
- end
366
132
  end
367
- end
133
+
134
+ context 'valid PM' do
135
+ let(:matrix) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
136
+ let(:pm) { Bioinform::MotifModel::PM.new(matrix) }
137
+ specify { expect( pm.length ).to eq 3 }
138
+
139
+ specify { expect(pm.to_s).to eq("1\t2\t3\t1.567\n"+"12\t-11\t12\t0\n"+"-1.1\t0.6\t0.4\t0.321") }
140
+
141
+ specify { expect(pm).to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]]) }
142
+ specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]], alphabet: Bioinform::ComplementableAlphabet.new([:A,:B,:C,:D],[:D,:C,:B,:A])) }
143
+ specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321],[1, 2, 3, 4]]) }
144
+ specify { expect(pm).not_to eq Bioinform::MotifModel::PM.new( [[1,2,3,1.567],[12,-11,12,0],[1, 2, 3, 4]]) }
145
+ specify { expect(pm).not_to eq [[1,2,3,1.567],[12,-11,12,0],[1, 2, 3, 4]] }
146
+
147
+ specify { expect(pm.named('motif name')).to be_kind_of Bioinform::MotifModel::NamedModel }
148
+ specify { expect(pm.named('motif name').model).to eq pm }
149
+ specify { expect(pm.named('motif name').name).to eq 'motif name' }
150
+
151
+ describe '#reversed, #complemented, #reverse_complemented' do
152
+ specify { expect(pm.reversed.matrix).to eq [[-1.1, 0.6, 0.4, 0.321],[12,-11,12,0],[1,2,3,1.567]] }
153
+ specify { expect(pm.complemented.matrix).to eq [[1.567,3,2,1],[0,12,-11,12],[0.321,0.4,0.6,-1.1]] }
154
+ specify { expect(pm.reverse_complemented.matrix).to eq [[0.321,0.4,0.6,-1.1],[0,12,-11,12],[1.567,3,2,1]] }
155
+ specify { expect(pm.revcomp.matrix).to eq [[0.321,0.4,0.6,-1.1],[0,12,-11,12],[1.567,3,2,1]] }
156
+ specify { expect(pm.reversed).to be_kind_of Bioinform::MotifModel::PM }
157
+ specify { expect(pm.complemented).to be_kind_of Bioinform::MotifModel::PM }
158
+ specify { expect(pm.reverse_complemented).to be_kind_of Bioinform::MotifModel::PM }
159
+ specify { expect(pm.revcomp).to be_kind_of Bioinform::MotifModel::PM }
160
+ end
161
+
162
+ specify { expect{|b| pm.each_position(&b) }.to yield_successive_args([1,2,3,1.567], [12,-11,12,0], [-1.1, 0.6, 0.4, 0.321]) }
163
+ specify { expect(pm.each_position).to be_kind_of Enumerator }
164
+ specify { expect{|b| pm.each_position.each(&b) }.to yield_successive_args([1,2,3,1.567], [12,-11,12,0], [-1.1, 0.6, 0.4, 0.321]) }
165
+ end
166
+ end